In [1]:
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense, Dropout

# 1️⃣ Load dataset
max_features = 10000   # number of words to consider
maxlen = 100           # sequence length

(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)

# 2️⃣ Pad sequences
X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
X_test = sequence.pad_sequences(X_test, maxlen=maxlen)

# 3️⃣ Build DEEP RNN (Stacked GRUs)
model = Sequential([
    Embedding(max_features, 128, input_length=maxlen),

    # First GRU layer (returns sequences for stacking)
    GRU(64, return_sequences=True, dropout=0.3, recurrent_dropout=0.3),

    # Second GRU layer
    GRU(64, return_sequences=False, dropout=0.3, recurrent_dropout=0.3),

    # Fully connected output layer
    Dense(64, activation='relu'),
    Dropout(0.5),

    Dense(1, activation='sigmoid')
])

# 4️⃣ Compile
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# 5️⃣ Train
history = model.fit(X_train, y_train,
                    epochs=5,
                    batch_size=64,
                    validation_data=(X_test, y_test))

# 6️⃣ Evaluate
loss, acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {acc:.3f}")

Epoch 1/5




[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 50ms/step - accuracy: 0.7296 - loss: 0.5284 - val_accuracy: 0.8030 - val_loss: 0.4301
Epoch 2/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 49ms/step - accuracy: 0.8434 - loss: 0.3774 - val_accuracy: 0.7746 - val_loss: 0.4916
Epoch 3/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 49ms/step - accuracy: 0.8815 - loss: 0.3013 - val_accuracy: 0.8468 - val_loss: 0.3758
Epoch 4/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 49ms/step - accuracy: 0.9113 - loss: 0.2348 - val_accuracy: 0.8496 - val_loss: 0.3584
Epoch 5/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 50ms/step - accuracy: 0.9316 - loss: 0.1831 - val_accuracy: 0.8472 - val_loss: 0.4024
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 9ms/step - accuracy: 0.8472 - loss: 0.4024
Test Accuracy: 0.847


> Performance Improvements

> Stacking multiple GRU layers → learns hierarchical sequence patterns
> Dropout → reduces overfitting
> ReLU dense layer → adds nonlinearity
> Adam optimizer → faster convergence

 Deep SimpleRNN Model (Stacked RNN)
 

In [2]:
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense, Dropout

# 1️⃣ Load dataset
max_features = 10000   # vocabulary size
maxlen = 100           # sequence length

(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)

# 2️⃣ Pad sequences (same length)
X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
X_test = sequence.pad_sequences(X_test, maxlen=maxlen)

# 3️⃣ Build DEEP RNN Model
model = Sequential([
    Embedding(max_features, 128, input_length=maxlen),

    # First RNN layer (returns sequence for stacking)
    SimpleRNN(64, return_sequences=True, dropout=0.3, recurrent_dropout=0.3),

    # Second RNN layer
    SimpleRNN(64, return_sequences=False, dropout=0.3, recurrent_dropout=0.3),

    # Fully connected layer
    Dense(64, activation='relu'),
    Dropout(0.5),

    # Output layer
    Dense(1, activation='sigmoid')
])

# 4️⃣ Compile
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# 5️⃣ Train
history = model.fit(X_train, y_train,
                    epochs=5,
                    batch_size=64,
                    validation_data=(X_test, y_test))

# 6️⃣ Evaluate
loss, acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {acc:.3f}")

Epoch 1/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 19ms/step - accuracy: 0.5056 - loss: 0.7233 - val_accuracy: 0.5006 - val_loss: 0.6937
Epoch 2/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 18ms/step - accuracy: 0.4999 - loss: 0.6999 - val_accuracy: 0.5000 - val_loss: 0.6933
Epoch 3/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 19ms/step - accuracy: 0.5021 - loss: 0.6951 - val_accuracy: 0.5000 - val_loss: 0.6933
Epoch 4/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 19ms/step - accuracy: 0.4994 - loss: 0.6946 - val_accuracy: 0.4991 - val_loss: 0.6984
Epoch 5/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 19ms/step - accuracy: 0.5033 - loss: 0.6955 - val_accuracy: 0.5000 - val_loss: 0.6932
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.5000 - loss: 0.6932
Test Accuracy: 0.500


# conclusion 

The Deep SimpleRNN model shows lower validation accuracy because it overfits and cannot capture long-term dependencies.

Switching to Deep GRU or LSTM provides better generalization, higher validation performance, and more stable training.