In [2]:
import tensorflow as tf
from keras.layers import *
from keras.models import Model
from keras.datasets import imdb

Load the Dataset

In [3]:
vocab_size=20000
(X_train,y_train),(X_test,y_test) = imdb.load_data(num_words=vocab_size)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 0us/step


In [4]:
X_train.shape,X_test.shape

((25000,), (25000,))

Define Hyperparameters

In [5]:
max_len=200
embed_dim=32
num_head=2
ff_dim=32

Data Preprocessing

In [6]:
from keras.preprocessing.sequence import pad_sequences

In [8]:
X_train=pad_sequences(X_train,maxlen=max_len)
X_test=pad_sequences(X_test,maxlen=max_len)

In [9]:
X_train.shape

(25000, 200)

In [10]:
X_train[0]

array([    5,    25,   100,    43,   838,   112,    50,   670,     2,
           9,    35,   480,   284,     5,   150,     4,   172,   112,
         167,     2,   336,   385,    39,     4,   172,  4536,  1111,
          17,   546,    38,    13,   447,     4,   192,    50,    16,
           6,   147,  2025,    19,    14,    22,     4,  1920,  4613,
         469,     4,    22,    71,    87,    12,    16,    43,   530,
          38,    76,    15,    13,  1247,     4,    22,    17,   515,
          17,    12,    16,   626,    18, 19193,     5,    62,   386,
          12,     8,   316,     8,   106,     5,     4,  2223,  5244,
          16,   480,    66,  3785,    33,     4,   130,    12,    16,
          38,   619,     5,    25,   124,    51,    36,   135,    48,
          25,  1415,    33,     6,    22,    12,   215,    28,    77,
          52,     5,    14,   407,    16,    82, 10311,     8,     4,
         107,   117,  5952,    15,   256,     4,     2,     7,  3766,
           5,   723,

Build the Model

In [11]:
#Input Layer
inputs=Input(shape=(max_len,))

#Token Embedding
token_emb_layer=Embedding(input_dim=vocab_size,output_dim=embed_dim)
x=token_emb_layer(inputs)

#Position Embedding Layer
positions=tf.range(0,max_len)
pos_emb_layer=Embedding(input_dim=max_len,output_dim=embed_dim)
positions_emb=pos_emb_layer(positions)

#Add the token + position embedding
x=x+positions_emb

#Add transformer block
#1. Multi-head self Attention
attention_output=MultiHeadAttention(num_heads=num_head,key_dim=embed_dim)(x,x)

attention_output=Dropout(0.1)(attention_output)

#Residual connections
x1=LayerNormalization()(x+attention_output)
x1=Dense(ff_dim,activation="relu")(x1)

#Feed-forward neural network
ffn=Dense(ff_dim,activation="relu")(x1)
ffn=Dense(embed_dim)(ffn)
ffn=Dropout(0.1)(ffn)

#Residual connections(add+norm)
x2=LayerNormalization()(x+ffn)

#Classification Head
x3=GlobalAveragePooling1D()(x2)
x3=Dropout(0.1)(x3)
x3=Dense(20,activation="relu")(x3)
x3=Dropout(0.1)(x3)

#Output Layer
outputs=Dense(1,activation="sigmoid")(x3)

In [12]:
#Create object of model
model=Model(inputs=inputs,outputs=outputs)
model.compile(loss="binary_crossentropy",optimizer="adam",metrics=["accuracy"])

In [15]:
print(model.summary())

None


In [16]:
from keras.utils import plot_model

In [17]:
plot_model(model, show_layer_names=True, show_layer_activations=True , show_shapes=True)

You must install graphviz (see instructions at https://graphviz.gitlab.io/download/) for `plot_model` to work.


Train the model

In [18]:
history=model.fit(X_train,y_train,batch_size=32,epochs=5,validation_data=(X_test,y_test))

Epoch 1/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 42ms/step - accuracy: 0.7907 - loss: 0.4135 - val_accuracy: 0.8727 - val_loss: 0.2957
Epoch 2/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 43ms/step - accuracy: 0.9234 - loss: 0.2005 - val_accuracy: 0.8668 - val_loss: 0.3211
Epoch 3/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 43ms/step - accuracy: 0.9574 - loss: 0.1265 - val_accuracy: 0.8520 - val_loss: 0.3965
Epoch 4/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 43ms/step - accuracy: 0.9728 - loss: 0.0839 - val_accuracy: 0.8484 - val_loss: 0.5233
Epoch 5/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 43ms/step - accuracy: 0.9833 - loss: 0.0551 - val_accuracy: 0.8382 - val_loss: 0.6536


In [19]:
new=X_test[100]
new

array([    0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     1,     6,  1605,  7334,    33,     4,   863,
           7,    14,    22,    62,   306,     8,  7147,    43,   160,
         995, 12125, 12112,     2,  8462,  3340,     4,   552,   121,
         383,  9538,   341,     5,     4,   156,    26,    32,   616,
        2118,    25,   165,   181,     8,   783,   190,  1545,  5779,
          53,    10,    10,   247,    74,  1152,    23,  3883,  2120,
           5,   599,   151,    50,     9,     6,   117,     7,   257,
          57,   383,

In [20]:
import numpy as np
new=np.reshape(new,(1,max_len))

In [21]:
new.shape

(1, 200)

In [22]:
model.predict(new)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 266ms/step


array([[0.01692115]], dtype=float32)