In [None]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import *
import tensorflow.keras
from tensorflow.keras.models import Model,Sequential
from tensorflow.keras.optimizers import RMSprop
from tensorflow.math import l2_normalize
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import ModelCheckpoint

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Loading Dataset

In [None]:
(xtrain,ytrain),(xtest,ytest)=imdb.load_data(num_words=5000) 

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [None]:
word_idx=imdb.get_word_index() #getting vocab from imdb data

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json


In [None]:
maxlen=500
vocab_size=5000
emb_dimension=300
xtrain=pad_sequences(xtrain,maxlen=maxlen,padding='post')
xtest=pad_sequences(xtest,maxlen=maxlen,padding='post')

# Loading Google's pretrained word2vec model

In [None]:
!wget -P download -c "https://s3.amazonaws.com/dl4j-distribution/GoogleNews-vectors-negative300.bin.gz"
from gensim.models import KeyedVectors
word2vec = KeyedVectors.load_word2vec_format('download/GoogleNews-vectors-negative300.bin.gz', binary=True)

# Embedding Layer

In [None]:
embedding_matrix = np.zeros((vocab_size,emb_dimension))
for word, i in word_idx.items():
    if word in word2vec.vocab and i<vocab_size:
        embedding_matrix[i] = word2vec.word_vec(word)

In [None]:
np.save('drive/My Drive/imdb/emb.npy',embedding_matrix)

In [None]:
embedding_matrix=np.load('drive/My Drive/imdb/emb.npy')

In [None]:
xtrain=np.asarray(xtrain).astype('float32')
xtest=np.asarray(xtest).astype('float32')
ytrain=np.asarray(ytrain).astype('float32')
ytest=np.asarray(ytest).astype('float32')

train=tf.data.Dataset.from_tensor_slices((xtrain,ytrain))
test=tf.data.Dataset.from_tensor_slices((xtest,ytest))   

In [None]:
train=train.batch(256)
test=test.batch(256)

# Model Architecture

In [None]:
class MyModel(Model):

  def __init__(self,vocab_size,emb_dimension,embedding_matrix,filters,kernel_size):
    super(MyModel, self).__init__()
    self.emb=Embedding(vocab_size,emb_dimension,weights=[embedding_matrix],trainable=True)
    #Channel 1
    self.conv1=Conv1D(filters=filters[0], kernel_size=kernel_size[0],activation='relu',kernel_regularizer=tf.keras.regularizers.l2(l=0.01))
    self.drop1=Dropout(0.5)
    self.bn1=BatchNormalization()
    self.lstm1=GRU(128)
    #Channel 2
    self.conv2=Conv1D(filters=filters[1], kernel_size=kernel_size[1],activation='relu',kernel_regularizer=tf.keras.regularizers.l2(l=0.01))
    self.drop2=Dropout(0.5)
    self.bn2=BatchNormalization()
    self.lstm2=GRU(128)

    self.drop3=Dropout(0.5)
    #Classification Layer
    self.dense=Dense(1,activation='sigmoid')

  def call(self,input):
    a=self.emb(input)
    x=self.conv1(a)
    x=self.drop1(x)
    x=self.bn1(x)
    x=self.lstm1(x)

    y=self.conv2(a)
    y=self.drop2(y)
    y=self.bn2(y)
    y=self.lstm2(y)

    x=concatenate([x,y])
    x=self.drop3(x)
    x=self.dense(x)
    return x

In [None]:
model=MyModel(vocab_size=vocab_size,emb_dimension=emb_dimension,embedding_matrix=embedding_matrix,filters=[128,64],kernel_size=[7,5])

In [None]:
train_acc_metric = tf.keras.metrics.BinaryAccuracy()
val_acc_metric = tf.keras.metrics.BinaryAccuracy()

In [None]:
loss=tf.keras.losses.BinaryCrossentropy()
optimizer=RMSprop(learning_rate=0.01)
epoch_losses = []
total_epochs=12

# Training

In [None]:
def model_training():
  for epoch in range(total_epochs): 
    batch_losses=[] 
    for inputs, outputs in train:
      with tf.GradientTape() as tape:
        current_loss = loss(model(inputs), outputs) 
        grads=tape.gradient(current_loss, model.trainable_variables) 
      optimizer.apply_gradients(zip(grads, model.trainable_variables))
      batch_losses.append(current_loss) 
    
      train_acc_metric.update_state(outputs,model(inputs))
    
    for inputs, outputs in test:
      val_acc_metric.update_state(outputs,model(inputs))
  
    train_acc = train_acc_metric.result().numpy()
    train_acc_metric.reset_states()

    val_acc = val_acc_metric.result().numpy()
    val_acc_metric.reset_states()

    print("epoch ",epoch,", Training acc : " , train_acc,end="")
    print("  Validation acc: ",val_acc)

In [None]:
model_training()

epoch  0 , Training acc :  0.50083894  Validation acc:  0.5000418
epoch  1 , Training acc :  0.5000418  Validation acc:  0.5000418
epoch  2 , Training acc :  0.5014729  Validation acc:  0.49995825
epoch  3 , Training acc :  0.7116588  Validation acc:  0.74720794
epoch  4 , Training acc :  0.84542793  Validation acc:  0.8225769
epoch  5 , Training acc :  0.8708242  Validation acc:  0.8172908
epoch  6 , Training acc :  0.8874229  Validation acc:  0.8576629
epoch  7 , Training acc :  0.9010284  Validation acc:  0.7985605
epoch  8 , Training acc :  0.90652716  Validation acc:  0.8479733
epoch  9 , Training acc :  0.9176707  Validation acc:  0.8322059
epoch  10 , Training acc :  0.91993135  Validation acc:  0.86240435
epoch  11 , Training acc :  0.92489296  Validation acc:  0.86148757


In [None]:
optimizer=RMSprop(learning_rate=0.005)
total_epochs=5
model_training()

epoch  0 , Training acc :  0.9573425  Validation acc:  0.87347203
epoch  1 , Training acc :  0.9610115  Validation acc:  0.8764236
epoch  2 , Training acc :  0.9653373  Validation acc:  0.8772378
epoch  3 , Training acc :  0.966273  Validation acc:  0.87119436
epoch  4 , Training acc :  0.9665122  Validation acc:  0.87695694


In [None]:
optimizer=RMSprop(learning_rate=0.005)
total_epochs=5
model_training()

epoch  0 , Training acc :  0.97364706  Validation acc:  0.8765185
epoch  1 , Training acc :  0.9742051  Validation acc:  0.8771543
epoch  2 , Training acc :  0.97564006  Validation acc:  0.8772568
epoch  3 , Training acc :  0.9764372  Validation acc:  0.87787753
epoch  4 , Training acc :  0.9760196  Validation acc:  0.8771999


In [None]:
model.save_weights('drive/My Drive/Colab_files/model_100_2.h5')

In [None]:
model.summary()

Model: "my_model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      multiple                  1500000   
_________________________________________________________________
conv1d_2 (Conv1D)            multiple                  268928    
_________________________________________________________________
dropout_3 (Dropout)          multiple                  0         
_________________________________________________________________
batch_normalization_2 (Batch multiple                  512       
_________________________________________________________________
gru_2 (GRU)                  multiple                  99072     
_________________________________________________________________
conv1d_3 (Conv1D)            multiple                  96064     
_________________________________________________________________
dropout_4 (Dropout)          multiple                  0

In [None]:
model(xtrain[:2,:])

<tf.Tensor: shape=(2, 1), dtype=float32, numpy=
array([[0.5],
       [0.5]], dtype=float32)>

In [None]:
model.load_weights('drive/My Drive/Colab_files/model_100_2.h5')

In [None]:
metric = tf.keras.metrics.AUC()
for inputs, outputs in test:
  metric.update_state(outputs,model(inputs))
auc=metric.result().numpy()

metric = tf.keras.metrics.BinaryAccuracy()
for inputs, outputs in test:
  metric.update_state(outputs,model(inputs))
acc=metric.result().numpy()

metric = tf.keras.metrics.Precision()
for inputs, outputs in test:
  metric.update_state(outputs,model(inputs))
pre=metric.result().numpy()

metric = tf.keras.metrics.Recall()
for inputs, outputs in test:
  metric.update_state(outputs,model(inputs))
recall=metric.result().numpy()

print('AUC: ',auc)
print('Accuracy: ',acc)
print('Precision: ',pre)
print('Recall: ',recall)

AUC:  0.8973719
Accuracy:  0.8771999
Precision:  0.8849232
Recall:  0.8668
