In [None]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import *
import tensorflow.keras
from tensorflow.keras.models import Model,Sequential
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.optimizers import Adam,SGD
from tensorflow.math import l2_normalize
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import ModelCheckpoint

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
(xtrain,ytrain),(xtest,ytest)=imdb.load_data(num_words=5000) 

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [None]:
word_idx=imdb.get_word_index() #getting vocab from imdb data

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json


In [None]:
idx_word=dict([(value,key) for (key,value) in word_idx.items()])
actual_review=[idx_word.get(idx-3,'#') for idx in xtrain[0]] #if key is not present # will be used
actual_review=' '.join(actual_review)
print(actual_review)

# this film was just brilliant casting location scenery story direction everyone's really suited the part they played and you could just imagine being there robert # is an amazing actor and now the same being director # father came from the same scottish island as myself so i loved the fact there was a real connection with this film the witty remarks throughout the film were great it was just brilliant so much that i bought the film as soon as it was released for # and would recommend it to everyone to watch and the fly # was amazing really cried at the end it was so sad and you know what they say if you cry at a film it must have been good and this definitely was also # to the two little # that played the # of norman and paul they were just brilliant children are often left out of the # list i think because the stars that play them all grown up are such a big # for the whole film but these children are amazing and should be # for what they have done don't you think the whole story was

In [None]:
maxlen=500
xtrain=pad_sequences(xtrain,maxlen=maxlen,padding='post')
xtest=pad_sequences(xtest,maxlen=maxlen,padding='post')

In [None]:
xtrain=np.asarray(xtrain).astype('float32')
xtest=np.asarray(xtest).astype('float32')
ytrain=np.asarray(ytrain).astype('float32')
ytest=np.asarray(ytest).astype('float32')

train=tf.data.Dataset.from_tensor_slices((xtrain,ytrain)).batch(128)
test=tf.data.Dataset.from_tensor_slices((xtest,ytest)).batch(128)

In [None]:
vocab_size,emb_dimension=5000,32

# Model Architecture

In [None]:
class MyModel(Model):

  def __init__(self,vocab_size,emb_dimension):
    super(MyModel, self).__init__()
    self.emb=Embedding(vocab_size,emb_dimension,trainable=True)
    self.conv1=Conv1D(filters=64, kernel_size=5,activation='elu',kernel_regularizer=tf.keras.regularizers.l2(l=0.01))
    self.pool1=MaxPooling1D(pool_size=2)
    self.bn1=BatchNormalization()
    # convLayer2
    self.conv2=Conv1D(filters=128, kernel_size=6,activation='elu',kernel_regularizer=tf.keras.regularizers.l2(l=0.01))
    self.pool2=MaxPooling1D(pool_size=4)
    self.bn2=BatchNormalization()
    # convLayer3
    self.conv3=Conv1D(filters=128, kernel_size=7,activation='elu',kernel_regularizer=tf.keras.regularizers.l2(l=0.01))
    self.pool3=MaxPooling1D(pool_size=8)
    self.bn3=BatchNormalization()

    self.lstm1=GRU(128)

    self.y=MaxPooling1D(pool_size=2)
    self.lstm2=GRU(128)

    # self.concat=concatenate([x,y])

    self.dense=Dense(1, activation='sigmoid')

  def call(self,input):
    a=self.emb(input)
    x=self.conv1(a)
    x=self.pool1(x)
    x=self.bn1(x)
    # convLayer2
    x=self.conv2(x)
    x=self.pool2(x)
    x=self.bn2(x)
    # convLayer3
    x=self.conv3(x)
    x=self.pool3(x)
    x=self.bn3(x)

    x=self.lstm1(x)

    z=self.y(a)
    z=self.lstm2(z)

    x=concatenate([x,z])
    
    return self.dense(x)

In [None]:
model=MyModel(vocab_size=vocab_size,emb_dimension=emb_dimension)

In [None]:
train_acc_metric = tf.keras.metrics.BinaryAccuracy()
val_acc_metric = tf.keras.metrics.BinaryAccuracy()

In [None]:
loss=tf.keras.losses.BinaryCrossentropy()
optimizer=RMSprop(learning_rate=0.005)
epoch_losses = []
total_epochs=8

# Training

In [None]:
def model_training():
  for epoch in range(total_epochs): 
    batch_losses=[] 
    for inputs, outputs in train:
      with tf.GradientTape() as tape:
        current_loss = loss(model(inputs), outputs) 
        grads=tape.gradient(current_loss, model.trainable_variables) 
      optimizer.apply_gradients(zip(grads, model.trainable_variables))
      batch_losses.append(current_loss) 
    
      train_acc_metric.update_state(outputs,model(inputs))
    
    for inputs, outputs in test:
      val_acc_metric.update_state(outputs,model(inputs))
  
    train_acc = train_acc_metric.result().numpy()
    train_acc_metric.reset_states()

    val_acc = val_acc_metric.result().numpy()
    val_acc_metric.reset_states()

    print("epoch ",epoch,", Training acc : " , train_acc,end="")
    print("  Validation acc: ",val_acc)

In [None]:
model_training()

epoch  0 , Training acc :  0.49964926  Validation acc:  0.49973693
epoch  1 , Training acc :  0.497441  Validation acc:  0.50026304
epoch  2 , Training acc :  0.57916933  Validation acc:  0.77940047
epoch  3 , Training acc :  0.8206872  Validation acc:  0.8013074
epoch  4 , Training acc :  0.7348693  Validation acc:  0.7589684
epoch  5 , Training acc :  0.88599336  Validation acc:  0.88128185
epoch  6 , Training acc :  0.91432554  Validation acc:  0.8415258
epoch  7 , Training acc :  0.9282446  Validation acc:  0.8892379


In [None]:
optimizer=RMSprop(learning_rate=0.0005)
total_epochs=5
model_training()

epoch  0 , Training acc :  0.94375  Validation acc:  0.88999516
epoch  1 , Training acc :  0.9470185  Validation acc:  0.89243466
epoch  2 , Training acc :  0.94968915  Validation acc:  0.89371014
epoch  3 , Training acc :  0.9513234  Validation acc:  0.8929448
epoch  4 , Training acc :  0.95319676  Validation acc:  0.89398915


In [None]:
model.save_weights('drive/My Drive/Colab_files/model_101.h5')

Tried many learning rates with RMSprop but couldn't improve accuracy irrespective of how big or small is the learning rate so changed the optimiser.

In [None]:
optimizer=SGD(learning_rate=0.007)
total_epochs=5
model_training()

epoch  0 , Training acc :  0.9576212  Validation acc:  0.8879066
epoch  1 , Training acc :  0.9596142  Validation acc:  0.8940529
epoch  2 , Training acc :  0.9609296  Validation acc:  0.89308834
epoch  3 , Training acc :  0.96104914  Validation acc:  0.89373404
epoch  4 , Training acc :  0.96192604  Validation acc:  0.89393336


changing the optimizer also didn't help.

In [None]:
model.save_weights('drive/My Drive/Colab_files/model_101.h5')

In [None]:
model(xtrain[:2,:])
model.load_weights('drive/My Drive/Colab_files/model_101.h5')

In [None]:
model.summary()

Model: "my_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        multiple                  160000    
_________________________________________________________________
conv1d (Conv1D)              multiple                  10304     
_________________________________________________________________
max_pooling1d (MaxPooling1D) multiple                  0         
_________________________________________________________________
batch_normalization (BatchNo multiple                  256       
_________________________________________________________________
conv1d_1 (Conv1D)            multiple                  49280     
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 multiple                  0         
_________________________________________________________________
batch_normalization_1 (Batch multiple                  512

In [None]:
metric = tf.keras.metrics.AUC()
for inputs, outputs in test:
  metric.update_state(outputs,model(inputs))
auc=metric.result().numpy()

metric = tf.keras.metrics.BinaryAccuracy()
for inputs, outputs in test:
  metric.update_state(outputs,model(inputs))
acc=metric.result().numpy()

metric = tf.keras.metrics.Precision()
for inputs, outputs in test:
  metric.update_state(outputs,model(inputs))
pre=metric.result().numpy()

metric = tf.keras.metrics.Recall()
for inputs, outputs in test:
  metric.update_state(outputs,model(inputs))
recall=metric.result().numpy()

print('AUC: ',auc)
print('Accuracy: ',acc)
print('Precision: ',pre)
print('Recall: ',recall)

AUC:  0.9039802
Accuracy:  0.89393336
Precision:  0.89814067
Recall:  0.8888
