In [9]:
import numpy as np
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding,SimpleRNN,Dense

In [None]:
# load the imdb dataset
max_features = 10000
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

# print the shape of the data
print(f"x_train shape: {x_train.shape}, y_train shape: {y_train.shape}")
print(f"x_test shape: {x_test.shape}, y_test shape: {y_test.shape}")

In [None]:
# Inspect a sample review
sample_review = x_train[0]
sample_label = y_train[0] # as the dataset is applying the OHE encoding for the words and vocabulary size as 10000 given above
print(f"Sample review (word indices): {sample_review}")
print(f"Sample label: {sample_label}")

In [None]:
# Mapping word indices back to words
word_index = imdb.get_word_index()
# word_index
reverse_word_index = {value:key for key,value in word_index.items()}
reverse_word_index

In [None]:
# decode the sample review
decode_review = ' '.join([reverse_word_index.get(i-3,'?') for i in sample_review])
decode_review

In [None]:
# Pad sequences to ensure uniform input length
max_len=500 # maximum length of reviews
x_train = pad_sequences(x_train,padding='pre',maxlen=max_len)
x_test = pad_sequences(x_test,padding='pre',maxlen=max_len)

In [19]:
# Train RNN
model = Sequential()
model.add(Embedding(max_features,128,input_length=max_len)) # it takes input as vocabulary size, embedding dimension and input length
model.add(SimpleRNN(128,activation='tanh')) # here 128 is the number of units in the RNN layer or the dimensionality of the output space 
model.add(Dense(1,activation='sigmoid'))



In [20]:
model.summary()

In [21]:
# create an early stopping callback
from tensorflow.keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(monitor='val_loss',patience=5,restore_best_weights=True)


In [22]:
model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

In [23]:
# Train the model
history = model.fit(
    x_train,y_train,
    epochs=10,
    batch_size=32,
    validation_split=0.2,
    callbacks=[early_stopping]
)

Epoch 1/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 127ms/step - accuracy: 0.5851 - loss: 0.6621 - val_accuracy: 0.6314 - val_loss: 0.6295
Epoch 2/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 126ms/step - accuracy: 0.7406 - loss: 0.5302 - val_accuracy: 0.7274 - val_loss: 0.5402
Epoch 3/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 131ms/step - accuracy: 0.7624 - loss: 0.4955 - val_accuracy: 0.7608 - val_loss: 0.5270
Epoch 4/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 131ms/step - accuracy: 0.7456 - loss: 0.5149 - val_accuracy: 0.7358 - val_loss: 0.5632
Epoch 5/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 121ms/step - accuracy: 0.7707 - loss: 0.4789 - val_accuracy: 0.6380 - val_loss: 0.6446
Epoch 6/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 127ms/step - accuracy: 0.7928 - loss: 0.4443 - val_accuracy: 0.6566 - val_loss: 0.6526
Epoch 7/10

In [24]:
# save the model
model.save('simple_rnn_imdb_model.h5')

