## IMDB Dataset

In [None]:
#  Import Libraries
import numpy as np
import tensorflow as tf

from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense

In [6]:
# Load the IMDB dataset
max_features = 10000  # vocqabulary size
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)

# Print the shape of the data
print(f"Training data shape: {X_train.shape}, Labels shape: {y_train.shape}")
print(f"Test data shape: {X_test.shape}, labels shape: {y_test.shape}")

Training data shape: (25000,), Labels shape: (25000,)
Test data shape: (25000,), labels shape: (25000,)


In [10]:
# Explore the reviews
sample_review = X_train[0]
sample_label = y_train[0]

# Decode the review
print(f"Sample review (encoded): {sample_review}")
print(f"Sample label (encoded): {sample_label}")

Sample review (encoded): [1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 5952, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32]
Sample label (encoded): 1


In [16]:
# Mapping of words index back to words
word_index = imdb.get_word_index()
reverse_index = {value: key for key, value in word_index.items()}

# Decode the sample review
decoded_review = " ".join([reverse_index.get(i - 3, "?") for i in sample_review])
print(f"Decoded sample review: {decoded_review}")

Decoded sample review: ? this film was just brilliant casting location scenery story direction everyone's really suited the part they played and you could just imagine being there robert ? is an amazing actor and now the same being director ? father came from the same scottish island as myself so i loved the fact there was a real connection with this film the witty remarks throughout the film were great it was just brilliant so much that i bought the film as soon as it was released for ? and would recommend it to everyone to watch and the fly fishing was amazing really cried at the end it was so sad and you know what they say if you cry at a film it must have been good and this definitely was also ? to the two little boy's that played the ? of norman and paul they were just brilliant children are often left out of the ? list i think because the stars that play them all grown up are such a big profile for the whole film but these children are amazing and should be praised for what they 

In [24]:
# Import libraries
from tensorflow.keras.preprocessing import sequence

max_length = 500  # maximum length of sentences

# List of train and test data
train_test = [X_train, X_test]
for data in train_test:
  # Pad sequences to the same length
  data = sequence.pad_sequences(data, maxlen=max_length)
  print(f"Padded data shape: {data.shape}")

Padded data shape: (25000, 500)
Padded data shape: (25000, 500)


### Train RNN Model

In [27]:
# Create the RNN model
model = Sequential()
model.add(Embedding(max_features, 128, input_length=max_length)) # Embedding layer
model.add(SimpleRNN(128, activation="relu"))
model.add(Dense(1, activation="sigmoid"))  # Output layer

# Compile the model
model.compile(
  optimizer="adam",
  loss="binary_crossentropy",
  metrics=["accuracy"]
)

# Model summary
model.summary()



In [28]:
# Create an instance of EarlyStopping
from tensorflow.keras.callbacks import EarlyStopping

early_stop = EarlyStopping(
  monitor="val_loss",
  patience=5,
  restore_best_weights=True
)

In [29]:
# Train the model with early stopping
history = model.fit(
  X_train, y_train,
  epochs=10,
  batch_size=32,
  validation_split=0.2,
  callbacks=[early_stop]
)

Epoch 1/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 85ms/step - accuracy: 0.5708 - loss: 22292.3340 - val_accuracy: 0.6338 - val_loss: 0.6316
Epoch 2/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 83ms/step - accuracy: 0.7055 - loss: 0.5980 - val_accuracy: 0.6750 - val_loss: 0.6019
Epoch 3/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 82ms/step - accuracy: 0.6835 - loss: 48450.1133 - val_accuracy: 0.6700 - val_loss: 0.6065
Epoch 4/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 81ms/step - accuracy: 0.7772 - loss: 0.5413 - val_accuracy: 0.7100 - val_loss: 0.5832
Epoch 5/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 83ms/step - accuracy: 0.8196 - loss: 0.5033 - val_accuracy: 0.7198 - val_loss: 0.5684
Epoch 6/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 84ms/step - accuracy: 0.8461 - loss: 0.4626 - val_accuracy: 0.7272 - val_loss: 0.5531
Epoch 7/

In [30]:
# Save model file
model.save("../model/rnn_imdb.h5")

