In [None]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import *
import tensorflow.keras
from tensorflow.keras.models import Model,Sequential
from tensorflow.keras.optimizers import RMSprop
from tensorflow.math import l2_normalize
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import ModelCheckpoint

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Loading Dataset

In [None]:
(xtrain,ytrain),(xtest,ytest)=imdb.load_data(num_words=5000) 

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [None]:
word_idx=imdb.get_word_index() #getting vocab from imdb data

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json


In [None]:
maxlen=500
vocab_size=5000
emb_dimension=300
xtrain=pad_sequences(xtrain,maxlen=maxlen,padding='post')
xtest=pad_sequences(xtest,maxlen=maxlen,padding='post')

# Loading Google's pretrained word2vec model

In [None]:
!wget -P download -c "https://s3.amazonaws.com/dl4j-distribution/GoogleNews-vectors-negative300.bin.gz"
from gensim.models import KeyedVectors
word2vec = KeyedVectors.load_word2vec_format('download/GoogleNews-vectors-negative300.bin.gz', binary=True)

--2020-11-24 01:51:25--  https://s3.amazonaws.com/dl4j-distribution/GoogleNews-vectors-negative300.bin.gz
Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.216.142.102
Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.216.142.102|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1647046227 (1.5G) [application/x-gzip]
Saving to: ‘download/GoogleNews-vectors-negative300.bin.gz’


2020-11-24 01:51:48 (67.7 MB/s) - ‘download/GoogleNews-vectors-negative300.bin.gz’ saved [1647046227/1647046227]



# Embedding Layer

In [None]:
embedding_matrix = np.zeros((vocab_size,emb_dimension))
for word, i in word_idx.items():
    if word in word2vec.vocab and i<vocab_size:
        embedding_matrix[i] = word2vec.word_vec(word)

In [None]:
np.save('drive/My Drive/imdb/emb.npy',embedding_matrix)

In [None]:
embedding_matrix=np.load('drive/My Drive/imdb/emb.npy')

In [None]:
xtrain=np.asarray(xtrain).astype('float32')
xtest=np.asarray(xtest).astype('float32')
ytrain=np.asarray(ytrain).astype('float32')
ytest=np.asarray(ytest).astype('float32')

train=tf.data.Dataset.from_tensor_slices((xtrain,ytrain))
test=tf.data.Dataset.from_tensor_slices((xtest,ytest))   

In [None]:
train=train.batch(128)
test=test.batch(128)

# Model Architecture

In [None]:
class MyModel(Model):

  def __init__(self,vocab_size,emb_dimension,embedding_matrix,filters,kernel_size):
    super(MyModel, self).__init__()
    self.emb=Embedding(vocab_size,emb_dimension,weights=[embedding_matrix],trainable=True)
    self.conv=Conv1D(filters=filters, kernel_size=kernel_size,activation='relu',kernel_regularizer=tf.keras.regularizers.l2(l=0.01))
    self.drop=Dropout(0.5)
    self.bn=BatchNormalization()
    self.lstm=GRU(128)
    self.dense=Dense(1,activation='sigmoid')

  def call(self,input):
    x=self.emb(input)
    x=self.conv(x)
    x=self.drop(x)
    x=self.bn(x)
    x=self.lstm(x)
    x=self.dense(x)
    return x

In [None]:
model=MyModel(vocab_size=vocab_size,emb_dimension=emb_dimension,embedding_matrix=embedding_matrix,filters=128,kernel_size=7)

In [None]:
train_acc_metric = tf.keras.metrics.BinaryAccuracy()
val_acc_metric = tf.keras.metrics.BinaryAccuracy()

In [None]:
loss=tf.keras.losses.BinaryCrossentropy()
optimizer=RMSprop(learning_rate=0.01)
epoch_losses = []
total_epochs=8

In [None]:
def model_training():
  for epoch in range(total_epochs): 
    batch_losses=[] 
    for inputs, outputs in train:
      with tf.GradientTape() as tape:
        current_loss = loss(model(inputs), outputs) 
        grads=tape.gradient(current_loss, model.trainable_variables) 
      optimizer.apply_gradients(zip(grads, model.trainable_variables))
      batch_losses.append(current_loss) 
    
      train_acc_metric.update_state(outputs,model(inputs))
    
    for inputs, outputs in test:
      val_acc_metric.update_state(outputs,model(inputs))
  
    train_acc = train_acc_metric.result().numpy()
    train_acc_metric.reset_states()

    val_acc = val_acc_metric.result().numpy()
    val_acc_metric.reset_states()

    print("epoch ",epoch,", Training acc : " , train_acc,end="")
    print("  Validation acc: ",val_acc)

In [None]:
model_training()

epoch  0 , Training acc :  0.50762916  Validation acc:  0.5050303
epoch  1 , Training acc :  0.5069675  Validation acc:  0.5002232
epoch  2 , Training acc :  0.6597099  Validation acc:  0.7083705
epoch  3 , Training acc :  0.85416937  Validation acc:  0.7364716
epoch  4 , Training acc :  0.8882494  Validation acc:  0.82190686
epoch  5 , Training acc :  0.91719544  Validation acc:  0.8573262
epoch  6 , Training acc :  0.93346626  Validation acc:  0.8605787
epoch  7 , Training acc :  0.93669486  Validation acc:  0.8657206


In [None]:
optimizer=RMSprop(learning_rate=0.005)
total_epochs=5
model_training()

epoch  0 , Training acc :  0.9466598  Validation acc:  0.8736926
epoch  1 , Training acc :  0.9531171  Validation acc:  0.875279
epoch  2 , Training acc :  0.9588967  Validation acc:  0.87608415
epoch  3 , Training acc :  0.96104914  Validation acc:  0.86260366
epoch  4 , Training acc :  0.9637596  Validation acc:  0.8741789


In [None]:
optimizer=RMSprop(learning_rate=0.0001)
total_epochs=5
model_training()

epoch  0 , Training acc :  0.9755084  Validation acc:  0.87727994
epoch  1 , Training acc :  0.9758769  Validation acc:  0.8777184
epoch  2 , Training acc :  0.9761161  Validation acc:  0.87779814
epoch  3 , Training acc :  0.9762357  Validation acc:  0.87807715
epoch  4 , Training acc :  0.9764748  Validation acc:  0.8782366


In [None]:
model.save_weights('drive/My Drive/Colab_files/model_100.h5')

In [None]:
model.summary()

Model: "my_model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      multiple                  1500000   
_________________________________________________________________
conv1d_2 (Conv1D)            multiple                  268928    
_________________________________________________________________
dropout (Dropout)            multiple                  0         
_________________________________________________________________
batch_normalization_2 (Batch multiple                  512       
_________________________________________________________________
gru_2 (GRU)                  multiple                  99072     
_________________________________________________________________
dense_2 (Dense)              multiple                  129       
Total params: 1,868,641
Trainable params: 1,868,385
Non-trainable params: 256
____________________________________________

In [None]:
model(xtrain[:2,:])
model.load_weights('drive/My Drive/Colab_files/model_100.h5')

In [None]:
metric = tf.keras.metrics.AUC()
for inputs, outputs in test:
  metric.update_state(outputs,model(inputs))
auc=metric.result().numpy()

metric = tf.keras.metrics.BinaryAccuracy()
for inputs, outputs in test:
  metric.update_state(outputs,model(inputs))
acc=metric.result().numpy()

metric = tf.keras.metrics.Precision()
for inputs, outputs in test:
  metric.update_state(outputs,model(inputs))
pre=metric.result().numpy()

metric = tf.keras.metrics.Recall()
for inputs, outputs in test:
  metric.update_state(outputs,model(inputs))
recall=metric.result().numpy()

print('AUC: ',auc)
print('Accuracy: ',acc)
print('Precision: ',pre)
print('Recall: ',recall)

AUC:  0.9112447
Accuracy:  0.87815684
Precision:  0.88131356
Recall:  0.87384
