<a href="https://colab.research.google.com/github/bhargav23/AIML-DL-Lab/blob/main/10_RNN_for_IMDB_Review_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**10. Implement a Recurrent Neural Network for IMDB movie review classification problem.**

In [1]:
# Import necessary libraries from TensorFlow and Keras
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense

In [2]:
# --- 1. Load and Preprocess the Data ---
# Set parameters for the dataset
max_features = 10000  # Vocabulary size (number of most frequent words to keep)
maxlen = 500          # Max length of a review (number of words)
batch_size = 32

# Load the IMDB dataset. The data is already pre-tokenized, where each word
# is replaced by an integer.
print("Loading data...")
(input_train, y_train), (input_test, y_test) = imdb.load_data(num_words=max_features)
print(len(input_train), 'train sequences')
print(len(input_test), 'test sequences')

# Preprocessing: Pad sequences to the same length.
# Reviews have different lengths, but neural networks require inputs of a fixed size.
# We pad shorter reviews with zeros and truncate longer ones.
print("Pad sequences (samples x time)")
input_train = sequence.pad_sequences(input_train, maxlen=maxlen)
input_test = sequence.pad_sequences(input_test, maxlen=maxlen)
print('input_train shape:', input_train.shape)
print('input_test shape:', input_test.shape)

Loading data...
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
25000 train sequences
25000 test sequences
Pad sequences (samples x time)
input_train shape: (25000, 500)
input_test shape: (25000, 500)


In [3]:
# --- 2. Build the RNN Model ---

print("\nBuilding the RNN model...")
model = Sequential()

# The Embedding layer turns positive integers (word indices) into dense vectors of fixed size.
# This is the first layer in our model.
# Input: (batch_size, maxlen) -> Output: (batch_size, maxlen, 32)
model.add(Embedding(max_features, 32))

# The SimpleRNN layer processes the sequence of word vectors.
# It maintains a hidden state that captures information from previous timesteps.
# Input: (batch_size, maxlen, 32) -> Output: (batch_size, 32)
model.add(SimpleRNN(32))

# The Dense layer is a standard fully connected layer.
# We use a 'sigmoid' activation function for binary classification (positive/negative).
# Output: A single probability score between 0 and 1.
model.add(Dense(1, activation='sigmoid'))

# Explicitly build the model to get a proper summary before training
# The input shape is (batch_size, sequence_length), where None is a placeholder for the batch size.
model.build(input_shape=(None, maxlen))

# Display a summary of the model's architecture
model.summary()


Building the RNN model...


In [4]:
# --- 3. Compile the Model ---

print("\nCompiling the model...")
# We configure the model for training.
# 'adam' is an efficient optimizer.
# 'binary_crossentropy' is the standard loss function for binary classification.
# 'accuracy' is the metric we want to monitor.
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])



Compiling the model...


In [5]:
# --- 4. Train the Model ---

print("\nTraining the model...")
# We fit the model to the training data.
# An epoch is one full pass over the entire training dataset.
# 'validation_split' reserves a portion of the training data to evaluate the
# model's performance on data it hasn't seen during training at the end of each epoch.
history = model.fit(input_train, y_train,
                    epochs=10,
                    batch_size=batch_size,
                    validation_split=0.2)


Training the model...
Epoch 1/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 35ms/step - accuracy: 0.6003 - loss: 0.6523 - val_accuracy: 0.7606 - val_loss: 0.5015
Epoch 2/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 33ms/step - accuracy: 0.8406 - loss: 0.3697 - val_accuracy: 0.8366 - val_loss: 0.4122
Epoch 3/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 33ms/step - accuracy: 0.9173 - loss: 0.2221 - val_accuracy: 0.8010 - val_loss: 0.4639
Epoch 4/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 35ms/step - accuracy: 0.9588 - loss: 0.1255 - val_accuracy: 0.7548 - val_loss: 0.6077
Epoch 5/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 35ms/step - accuracy: 0.9799 - loss: 0.0651 - val_accuracy: 0.7888 - val_loss: 0.6387
Epoch 6/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 35ms/step - accuracy: 0.9942 - loss: 0.0253 - val_accuracy: 0.7622 - val_loss: 

In [6]:
# --- 5. Evaluate the Model ---

print("\nEvaluating the model on the test set...")
# After training, we evaluate the final model's performance on the unseen test data.
loss, accuracy = model.evaluate(input_test, y_test, batch_size=batch_size)
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")



Evaluating the model on the test set...
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 9ms/step - accuracy: 0.7088 - loss: 0.7091
Test Loss: 0.6959
Test Accuracy: 0.7120
