In [None]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import *
import tensorflow.keras
from tensorflow.keras.models import Model,Sequential
from tensorflow.keras.optimizers import RMSprop
from tensorflow.math import l2_normalize
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import ModelCheckpoint

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Loading Dataset

In [None]:
(xtrain,ytrain),(xtest,ytest)=imdb.load_data(num_words=5000) 

In [None]:
word_idx=imdb.get_word_index() #getting vocab from imdb data

In [None]:
maxlen=500
vocab_size=5000
emb_dimension=300
xtrain=pad_sequences(xtrain,maxlen=maxlen,padding='post')
xtest=pad_sequences(xtest,maxlen=maxlen,padding='post')

# Loading Google's pretrained word2vec model

In [None]:
!wget -P download -c "https://s3.amazonaws.com/dl4j-distribution/GoogleNews-vectors-negative300.bin.gz"
from gensim.models import KeyedVectors
word2vec = KeyedVectors.load_word2vec_format('download/GoogleNews-vectors-negative300.bin.gz', binary=True)

--2020-11-23 16:43:18--  https://s3.amazonaws.com/dl4j-distribution/GoogleNews-vectors-negative300.bin.gz
Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.217.47.118
Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.217.47.118|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1647046227 (1.5G) [application/x-gzip]
Saving to: ‘download/GoogleNews-vectors-negative300.bin.gz’


2020-11-23 16:43:44 (60.3 MB/s) - ‘download/GoogleNews-vectors-negative300.bin.gz’ saved [1647046227/1647046227]



# Embedding Layer

In [None]:
embedding_matrix = np.zeros((vocab_size,emb_dimension))
for word, i in word_idx.items():
    if word in word2vec.vocab and i<vocab_size:
        embedding_matrix[i] = word2vec.word_vec(word)

In [None]:
np.save('drive/My Drive/imdb/emb.npy',embedding_matrix)

In [None]:
embedding_matrix=np.load('drive/My Drive/imdb/emb.npy')

In [None]:
xtrain=np.asarray(xtrain).astype('float32')
xtest=np.asarray(xtest).astype('float32')
ytrain=np.asarray(ytrain).astype('float32')
ytest=np.asarray(ytest).astype('float32')

train=tf.data.Dataset.from_tensor_slices((xtrain,ytrain))
test=tf.data.Dataset.from_tensor_slices((xtest,ytest))   

In [None]:
train=train.batch(128)
test=test.batch(128)

# Model Architecture

In [None]:
class MyModel(Model):

  def __init__(self,vocab_size,emb_dimension,embedding_matrix,filters,kernel_size):
    super(MyModel, self).__init__()
    self.emb=Embedding(vocab_size,emb_dimension,weights=[embedding_matrix],trainable=True)
    self.conv1=Conv1D(filters=filters[0], kernel_size=kernel_size[0],activation='relu',kernel_regularizer=tf.keras.regularizers.l2(l=0.01))
    self.conv2=Conv1D(filters=filters[1], kernel_size=kernel_size[1],activation='relu',kernel_regularizer=tf.keras.regularizers.l2(l=0.01))
    self.drop=Dropout(0.5)
    self.bn=BatchNormalization()
    
    self.lstm=GRU(128)
    self.dense=Dense(1,activation='sigmoid')

  def call(self,input):
    x=self.emb(input)
    x=self.conv1(x)
    x=self.conv2(x)
    x=self.drop(x)
    x=self.bn(x)
    x=self.lstm(x)
    x=self.dense(x)
    return x

In [None]:
model=MyModel(vocab_size=vocab_size,emb_dimension=emb_dimension,embedding_matrix=embedding_matrix,filters=[128,64],kernel_size=[7,5])

In [None]:
train_acc_metric = tf.keras.metrics.BinaryAccuracy()
val_acc_metric = tf.keras.metrics.BinaryAccuracy()

In [None]:
loss=tf.keras.losses.BinaryCrossentropy()
optimizer=RMSprop(learning_rate=0.01)
epoch_losses = []
total_epochs=12

In [None]:
def model_training():
  for epoch in range(total_epochs): 
    batch_losses=[] 
    for inputs, outputs in train:
      with tf.GradientTape() as tape:
        current_loss = loss(model(inputs), outputs) 
        grads=tape.gradient(current_loss, model.trainable_variables) 
      optimizer.apply_gradients(zip(grads, model.trainable_variables))
      batch_losses.append(current_loss) 
    
      train_acc_metric.update_state(outputs,model(inputs))
    
    for inputs, outputs in test:
      val_acc_metric.update_state(outputs,model(inputs))
  
    train_acc = train_acc_metric.result().numpy()
    train_acc_metric.reset_states()

    val_acc = val_acc_metric.result().numpy()
    val_acc_metric.reset_states()

    print("epoch ",epoch,", Training acc : " , train_acc,end="")
    print("  Validation acc: ",val_acc)

In [None]:
model_training()

epoch  0 , Training acc :  0.49964926  Validation acc:  0.49973693
epoch  1 , Training acc :  0.49648437  Validation acc:  0.50026304
epoch  2 , Training acc :  0.5020249  Validation acc:  0.50026304
epoch  3 , Training acc :  0.71426976  Validation acc:  0.7214684
epoch  4 , Training acc :  0.74866074  Validation acc:  0.728683
epoch  5 , Training acc :  0.7147321  Validation acc:  0.75685585
epoch  6 , Training acc :  0.7682956  Validation acc:  0.7571508
epoch  7 , Training acc :  0.7925622  Validation acc:  0.7684949
epoch  8 , Training acc :  0.79846936  Validation acc:  0.7612724
epoch  9 , Training acc :  0.79870856  Validation acc:  0.77154016
epoch  10 , Training acc :  0.80789226  Validation acc:  0.77265626
epoch  11 , Training acc :  0.8101881  Validation acc:  0.77566963


In [None]:
optimizer=RMSprop(learning_rate=0.01)
total_epochs=10
model_training()

epoch  0 , Training acc :  0.79961735  Validation acc:  0.7688217
epoch  1 , Training acc :  0.6842873  Validation acc:  0.52149236
epoch  2 , Training acc :  0.5352997  Validation acc:  0.5208466
epoch  3 , Training acc :  0.64019454  Validation acc:  0.73656726
epoch  4 , Training acc :  0.7894372  Validation acc:  0.75754946
epoch  5 , Training acc :  0.78058034  Validation acc:  0.768487
epoch  6 , Training acc :  0.79493785  Validation acc:  0.77020884
epoch  7 , Training acc :  0.80153066  Validation acc:  0.77299905
epoch  8 , Training acc :  0.8006617  Validation acc:  0.7618383
epoch  9 , Training acc :  0.79204404  Validation acc:  0.7495217


In [None]:
model.save_weights('drive/My Drive/Colab_files/model_100_3.h5')

In [None]:
model.summary()

Model: "my_model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      multiple                  1500000   
_________________________________________________________________
conv1d_4 (Conv1D)            multiple                  268928    
_________________________________________________________________
conv1d_5 (Conv1D)            multiple                  41024     
_________________________________________________________________
dropout_2 (Dropout)          multiple                  0         
_________________________________________________________________
batch_normalization_2 (Batch multiple                  256       
_________________________________________________________________
gru_2 (GRU)                  multiple                  74496     
_________________________________________________________________
dense_2 (Dense)              multiple                  1

In [None]:
model(xtrain[:2,:])

<tf.Tensor: shape=(2, 1), dtype=float32, numpy=
array([[0.5],
       [0.5]], dtype=float32)>

In [None]:
model.load_weights('drive/My Drive/Colab_files/model_100_3.h5')

In [None]:
metric = tf.keras.metrics.AUC()
for inputs, outputs in test:
  metric.update_state(outputs,model(inputs))
auc=metric.result().numpy()

metric = tf.keras.metrics.BinaryAccuracy()
for inputs, outputs in test:
  metric.update_state(outputs,model(inputs))
acc=metric.result().numpy()

metric = tf.keras.metrics.Precision()
for inputs, outputs in test:
  metric.update_state(outputs,model(inputs))
pre=metric.result().numpy()

metric = tf.keras.metrics.Recall()
for inputs, outputs in test:
  metric.update_state(outputs,model(inputs))
recall=metric.result().numpy()

print('AUC: ',auc)
print('Accuracy: ',acc)
print('Precision: ',pre)
print('Recall: ',recall)

AUC:  0.7787463
Accuracy:  0.7495217
Precision:  0.7967085
Recall:  0.67
