# 1. Implementing a Basic RNN Model

In [2]:
import numpy as np
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, Embedding

# Loading the IMDB dataset
max_features = 10000  # Only consider the top 10,000 words
max_len = 500  # Only consider the first 500 words of each review

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

# Pad sequences to ensure uniform input length
x_train = sequence.pad_sequences(x_train, maxlen=max_len)
x_test = sequence.pad_sequences(x_test, maxlen=max_len)

# Building the RNN model
model = Sequential()
model.add(Embedding(max_features, 32))  # Embedding layer
model.add(SimpleRNN(32))  # RNN layer
model.add(Dense(1, activation='sigmoid'))  # Output layer

# Compiling the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Training the model
model.fit(x_train, y_train, epochs=5, batch_size=64, validation_split=0.2)

# Evaluating the model
loss, accuracy = model.evaluate(x_test, y_test)
print(f"Test Loss: {loss}, Test Accuracy: {accuracy}")

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 0us/step
Epoch 1/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 134ms/step - accuracy: 0.5785 - loss: 0.6656 - val_accuracy: 0.7794 - val_loss: 0.4800
Epoch 2/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 127ms/step - accuracy: 0.8339 - loss: 0.3813 - val_accuracy: 0.8334 - val_loss: 0.3859
Epoch 3/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 112ms/step - accuracy: 0.9186 - loss: 0.2170 - val_accuracy: 0.8400 - val_loss: 0.3750
Epoch 4/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 117ms/step - accuracy: 0.9718 - loss: 0.0996 - val_accuracy: 0.8248 - val_loss: 0.4397
Epoch 5/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 102ms/step - accuracy: 0.9893 - loss: 0.0478 - val_accuracy: 0.8372 - val_loss: 0.4900
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 22ms/step - accuracy: 0.8305

Explanation:

* We are using the IMDB dataset for sentiment analysis.
* The Embedding layer converts the words into vectors.
* The SimpleRNN layer processes the sequence of word embeddings.
* The Dense layer with a sigmoid activation function outputs the probability of the review being positive or negative.

# 2. Stacking RNN Layers and Bi-directional RNNs

* (A) Stacked RNN

In [3]:
from tensorflow.keras.layers import SimpleRNN

# Building a stacked RNN model
model = Sequential()
model.add(Embedding(max_features, 32))  # Embedding layer
model.add(SimpleRNN(32, return_sequences=True))  # First RNN layer
model.add(SimpleRNN(32))  # Second RNN layer
model.add(Dense(1, activation='sigmoid'))  # Output layer

# Compiling the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Training the model
model.fit(x_train, y_train, epochs=5, batch_size=64, validation_split=0.2)

# Evaluating the model
loss, accuracy = model.evaluate(x_test, y_test)
print(f"Test Loss: {loss}, Test Accuracy: {accuracy}")

Epoch 1/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 195ms/step - accuracy: 0.6261 - loss: 0.6152 - val_accuracy: 0.8076 - val_loss: 0.4286
Epoch 2/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 192ms/step - accuracy: 0.8463 - loss: 0.3731 - val_accuracy: 0.8644 - val_loss: 0.3405
Epoch 3/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 253ms/step - accuracy: 0.9062 - loss: 0.2403 - val_accuracy: 0.8292 - val_loss: 0.4081
Epoch 4/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 224ms/step - accuracy: 0.9550 - loss: 0.1345 - val_accuracy: 0.8062 - val_loss: 0.5056
Epoch 5/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 172ms/step - accuracy: 0.9820 - loss: 0.0617 - val_accuracy: 0.8192 - val_loss: 0.6122
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 35ms/step - accuracy: 0.8135 - loss: 0.6572
Test Loss: 0.6360751390457153, Test Accuracy: 0.8187599778175354


* (B) Bi-Directional RNN

In [4]:
from tensorflow.keras.layers import Bidirectional

# Building a bi-directional RNN model
model = Sequential()
model.add(Embedding(max_features, 32))  # Embedding layer
model.add(Bidirectional(SimpleRNN(32)))  # Bi-Directional RNN layer
model.add(Dense(1, activation='sigmoid'))  # Output layer

# Compiling the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Training the model
model.fit(x_train, y_train, epochs=5, batch_size=64, validation_split=0.2)

# Evaluating the model
loss, accuracy = model.evaluate(x_test, y_test)
print(f"Test Loss: {loss}, Test Accuracy: {accuracy}")

Epoch 1/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 120ms/step - accuracy: 0.5537 - loss: 0.6799 - val_accuracy: 0.7282 - val_loss: 0.5614
Epoch 2/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 126ms/step - accuracy: 0.8022 - loss: 0.4568 - val_accuracy: 0.8074 - val_loss: 0.4322
Epoch 3/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 127ms/step - accuracy: 0.8877 - loss: 0.2881 - val_accuracy: 0.8204 - val_loss: 0.4224
Epoch 4/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 141ms/step - accuracy: 0.9476 - loss: 0.1534 - val_accuracy: 0.8248 - val_loss: 0.4542
Epoch 5/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 123ms/step - accuracy: 0.9814 - loss: 0.0722 - val_accuracy: 0.8154 - val_loss: 0.5442
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 28ms/step - accuracy: 0.8088 - loss: 0.5571
Test Loss: 0.5470931529998779, Test Accuracy: 0.8123999834060669


Explanation:

* Stacked RNN: This model has two RNN layers stacked on top of each other, which helps in capturing more complex patterns in the sequences.
* Bi-Directional RNN: This model uses a bi-directional RNN to process the sequence from both directions (past and future), which helps in better understanding the context of each word.

# 3. Exploring Hybrid Architectures

In [5]:
from tensorflow.keras.layers import Conv1D, MaxPooling1D

# Building a hybrid model combining CNN + RNN
model = Sequential()
model.add(Embedding(max_features, 32))  # Embedding layer

# CNN layers to extract features from sequences
model.add(Conv1D(64, kernel_size=3, activation='relu'))
model.add(MaxPooling1D(pool_size=2))

# RNN layer to model temporal dependencies
model.add(SimpleRNN(32))
model.add(Dense(1, activation='sigmoid'))  # Output layer

# Compiling the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Training the model
model.fit(x_train, y_train, epochs=5, batch_size=64, validation_split=0.2)

# Evaluating the model
loss, accuracy = model.evaluate(x_test, y_test)
print(f"Test Loss: {loss}, Test Accuracy: {accuracy}")

Epoch 1/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 66ms/step - accuracy: 0.5597 - loss: 0.6676 - val_accuracy: 0.8142 - val_loss: 0.4251
Epoch 2/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 68ms/step - accuracy: 0.8528 - loss: 0.3504 - val_accuracy: 0.8578 - val_loss: 0.3480
Epoch 3/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 64ms/step - accuracy: 0.9116 - loss: 0.2291 - val_accuracy: 0.8426 - val_loss: 0.3585
Epoch 4/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 63ms/step - accuracy: 0.9457 - loss: 0.1563 - val_accuracy: 0.8668 - val_loss: 0.3991
Epoch 5/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 64ms/step - accuracy: 0.9658 - loss: 0.1014 - val_accuracy: 0.8742 - val_loss: 0.3800
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 13ms/step - accuracy: 0.8566 - loss: 0.4122
Test Loss: 0.41303208470344543, Test Accuracy: 0.8555200099945068


Explanation:

* The CNN layer extracts local features from the sequences.
* The MaxPooling1D layer reduces the dimensionality and focuses on the most relevant features.
* The RNN layer processes the sequence over time.
* This hybrid architecture can capture both local and temporal patterns in the data.

---

# Report:

* Basic RNN: The basic RNN model performed well on the IMDB dataset but had limitations in capturing long-term dependencies. This is because RNNs struggle with vanishing gradients for long sequences.

* Stacked RNN: The stacked RNN provided a slight improvement in performance due to its ability to capture more complex patterns. However, adding more layers also increased training time.

* Bi-Directional RNN: The bi-directional RNN showed a noticeable performance boost, as it processed the sequences in both directions (past and future), leading to better context understanding.

* Hybrid CNN + RNN: This model outperformed the previous ones by combining the feature extraction power of CNNs with the sequential modeling capabilities of RNNs. However, it required careful tuning of hyperparameters like the kernel size and pooling window for optimal performance.