In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding,SimpleRNN,Dense

In [None]:
## Load the IMDB Dataset
from tensorflow.keras.datasets import imdb

In [None]:
## Loading IMDB Dataset
voc_size=10000
(x_train, y_train), (x_test, y_test)=imdb.load_data(num_words=voc_size)

In [None]:
(x_train.shape, y_train.shape), (x_test.shape, y_test.shape)

In [69]:
## Inspect a sample review and sample label
sample_review=x_train[0]
sample_label=y_train[0]
print(f"Sample review (as integers):{sample_review}") ## 1 represents start of sequence, 2 represents -Out of vocabulary , >=3 represents the actual word indices.
print(f'Sample label: {sample_label}')


Sample review (as integers):[1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 5952, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32]
Sample label: 1


In [70]:
len(sample_review)

218

In [76]:
## Understand the actual sentence based on word index  (Not an ideal way)
list=[]
for key,items in imdb.get_word_index().items():
  for i in range(len(sample_review)):
    if items==sample_review[i]-3:
      list.append(key)
      sent=' '.join(list)

In [77]:
sent ## (Not an ideal way, doesnt seem to give the actual text)

"much being being with with could shared robert lovely father profile watch have have story story i i i life an as as as myself imagine location it it it it it it real is if same same throughout but end of of everyone fishing were were came boy's fly up us should after true suited actor such remarks think think done part little all all all scottish played played soon really really great this this this paul two you you you you a a a praised children children at at everyone's don't also connection now stars so so so so someone's amazing amazing amazing would director there there list out bought for for for direction play whole whole be them they they they they just just just just the the the the the the the the the the the the the the the recommend island from been was was was was was was was was was was was and and and and and and and and and cried brilliant brilliant brilliant to to to cry fact these film film film film film film released what what witty sad say scenery that that that 

In [None]:
## Lets try the other way
word_index=imdb.get_word_index()
#word_index
reverse_word_index = {value: key for key, value in word_index.items()}
reverse_word_index

In [None]:
decoded_review = ' '.join([reverse_word_index.get(i - 3, '?') for i in sample_review])
decoded_review

In [None]:
# pad sequences
from tensorflow.keras.utils import pad_sequences
x_train_padded=pad_sequences(x_train, maxlen=500,padding='pre')
x_test_padded=pad_sequences(x_test, maxlen=500,padding='pre')

In [None]:
x_train_padded[0]

In [None]:
# Embedding
voc_size=10000
dim=10
from tensorflow.keras.models import Sequential
model=Sequential()
model.add(Embedding(voc_size,dim))
model.add(SimpleRNN(128,activation='relu'))
model.add(Dense(1,activation='sigmoid'))


In [78]:
model.summary()

In [79]:
model.compile('adam','binary_crossentropy',metrics=['accuracy'])

In [80]:
## Create an instance of early stopping callback
from tensorflow.keras.callbacks import EarlyStopping
early_stopping=EarlyStopping(monitor='val_loss',patience=5,restore_best_weights=True)

In [82]:
## Train the model with Earlystopping
model.fit(x_train_padded,y_train,epochs=10,batch_size=32,validation_split=0.2,callbacks=[early_stopping])

Epoch 1/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m91s[0m 143ms/step - accuracy: 0.5563 - loss: 0.7619 - val_accuracy: 0.6016 - val_loss: 0.6595
Epoch 2/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 138ms/step - accuracy: 0.6363 - loss: 0.6916 - val_accuracy: 0.6422 - val_loss: 0.6289
Epoch 3/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m146s[0m 145ms/step - accuracy: 0.6804 - loss: 0.6417 - val_accuracy: 0.6762 - val_loss: 0.6063
Epoch 4/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 145ms/step - accuracy: 0.6721 - loss: 393916482322432.0000 - val_accuracy: 0.5766 - val_loss: 0.6718
Epoch 5/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 138ms/step - accuracy: 0.7485 - loss: 0.5166 - val_accuracy: 0.7216 - val_loss: 0.5841
Epoch 6/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 138ms/step - accuracy: 0.7735 - loss: 806832320.0000 - val_accuracy: 0.7634 - val

<keras.src.callbacks.history.History at 0x79ecd0f61be0>

In [83]:
# Save model file
model.save('simple_rnn_imdb.h5')

