# **PART A: Implementation of Sequence Models on the IMDB Dataset**

**Step 1**: Load and Preprocess the Dataset

In [2]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
#Import the Tokenizer and pad_sequences modules from tensorflow.keras.preprocessing
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load the dataset
file_path = '/content/drive/MyDrive/Colab Notebooks/DL LAB/IMDB Dataset.csv'
df = pd.read_csv(file_path)

# Check the first few rows of the dataset
df.head()

# Check for null values
print(df.isnull().sum())

# Encode sentiment labels to binary (positive: 1, negative: 0)
df['sentiment'] = df['sentiment'].map({'positive': 1, 'negative': 0})

# Split the dataset into training and testing sets
X = df['review'].values
y = df['sentiment'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Tokenize and pad sequences
max_words = 5000
max_len = 500
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X_train)

X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

X_train_pad = pad_sequences(X_train_seq, maxlen=max_len)
X_test_pad = pad_sequences(X_test_seq, maxlen=max_len)

review       0
sentiment    0
dtype: int64


**Explanation:**

---


In this code, we load the IMDB dataset from the provided CSV file and preprocess the data. We encode the target variable (sentiment) into binary format (1 for positive and 0 for negative). The dataset is split into training and testing sets, and the text reviews are tokenized and converted into sequences of integers. Finally, we pad the sequences to ensure they have the same length of 500 tokens.



**Step 2:** Simple LSTM for Sentiment Analysis

In [3]:
# Import necessary Keras libraries
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense
import numpy as np

# Set a random seed for reproducibility
np.random.seed(42)

# Define a fraction of the dataset to use
fraction = 0.1  # Use only 10% of the dataset

# Sample a smaller portion of the training and test sets
train_size = int(len(X_train_pad) * fraction)
test_size = int(len(X_test_pad) * fraction)

X_train_sample = X_train_pad[:train_size]
y_train_sample = y_train[:train_size]
X_test_sample = X_test_pad[:test_size]
y_test_sample = y_test[:test_size]

# Build the LSTM model
model = Sequential()
model.add(Embedding(max_words, 32, input_length=max_len))
model.add(LSTM(100))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model on the smaller dataset
model.fit(X_train_sample, y_train_sample, epochs=3, batch_size=64, validation_data=(X_test_sample, y_test_sample))

# Evaluate the model
loss, accuracy = model.evaluate(X_test_sample, y_test_sample, verbose=0)
print(f"Accuracy: {accuracy * 100:.2f}%")




Epoch 1/3
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 722ms/step - accuracy: 0.5317 - loss: 0.6925 - val_accuracy: 0.6100 - val_loss: 0.6559
Epoch 2/3
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 713ms/step - accuracy: 0.7659 - loss: 0.5361 - val_accuracy: 0.7870 - val_loss: 0.4706
Epoch 3/3
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 690ms/step - accuracy: 0.8754 - loss: 0.3361 - val_accuracy: 0.8070 - val_loss: 0.4412
Accuracy: 80.70%


**Explanation:**


---


In this section, we build a simple LSTM model for sentiment analysis. The model consists of an embedding layer (which converts text into dense vectors), followed by an LSTM layer with 100 units. The final output layer is a dense layer with a sigmoid activation function to predict binary sentiment (positive or negative). The model is compiled with binary cross-entropy loss and Adam optimizer, and we train it for 3 epochs. The accuracy of the model on the test data is then evaluated.



**Step 3:** Fine-tuned Custom LSTM Model with Additional Layers


In [5]:
# Import additional layers
from keras.layers import Dropout
import numpy as np

# Set a random seed for reproducibility
np.random.seed(42)

# Define a fraction of the dataset to use
fraction = 0.1  # Use only 10% of the dataset

# Sample a smaller portion of the training and test sets
train_size = int(len(X_train_pad) * fraction)
test_size = int(len(X_test_pad) * fraction)

X_train_sample = X_train_pad[:train_size]
y_train_sample = y_train[:train_size]
X_test_sample = X_test_pad[:test_size]
y_test_sample = y_test[:test_size]

# Build the custom LSTM model with dropout and additional LSTM layers
model_custom = Sequential()
model_custom.add(Embedding(max_words, 64, input_length=max_len))
model_custom.add(LSTM(128, return_sequences=True))
model_custom.add(Dropout(0.3))
model_custom.add(LSTM(64))
model_custom.add(Dense(1, activation='sigmoid'))

# Compile the model
model_custom.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the custom model on the smaller dataset
model_custom.fit(X_train_sample, y_train_sample, epochs=5, batch_size=64, validation_data=(X_test_sample, y_test_sample))

# Evaluate the custom model
loss_custom, accuracy_custom = model_custom.evaluate(X_test_sample, y_test_sample, verbose=0)
print(f"Custom Model Accuracy: {accuracy_custom * 100:.2f}%")


Epoch 1/5
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m127s[0m 2s/step - accuracy: 0.5525 - loss: 0.6808 - val_accuracy: 0.6990 - val_loss: 0.5936
Epoch 2/5
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m117s[0m 2s/step - accuracy: 0.8121 - loss: 0.4250 - val_accuracy: 0.7660 - val_loss: 0.4986
Epoch 3/5
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m137s[0m 2s/step - accuracy: 0.8968 - loss: 0.2590 - val_accuracy: 0.7910 - val_loss: 0.4697
Epoch 4/5
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m121s[0m 2s/step - accuracy: 0.9473 - loss: 0.1684 - val_accuracy: 0.7930 - val_loss: 0.5197
Epoch 5/5
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 2s/step - accuracy: 0.9602 - loss: 0.1205 - val_accuracy: 0.7820 - val_loss: 0.6268
Custom Model Accuracy: 78.20%


**Explanation:**

---


Here, we enhance the LSTM architecture by adding an additional LSTM layer with 128 units and a Dropout layer to prevent overfitting. The embedding size is increased to 64, and a dropout of 0.3 is applied between layers to improve generalization. The model is trained for 5 epochs and evaluated for performance on the test set.

**Step 4:** Bidirectional LSTM Model

In [6]:
# Import the Bidirectional wrapper
from keras.layers import Bidirectional
import numpy as np

# Set a random seed for reproducibility
np.random.seed(42)

# Define a fraction of the dataset to use
fraction = 0.1  # Use only 10% of the dataset

# Sample a smaller portion of the training and test sets
train_size = int(len(X_train_pad) * fraction)
test_size = int(len(X_test_pad) * fraction)

X_train_sample = X_train_pad[:train_size]
y_train_sample = y_train[:train_size]
X_test_sample = X_test_pad[:test_size]
y_test_sample = y_test[:test_size]

# Build a Bidirectional LSTM model
model_blstm = Sequential()
model_blstm.add(Embedding(max_words, 64, input_length=max_len))
model_blstm.add(Bidirectional(LSTM(100)))
model_blstm.add(Dense(1, activation='sigmoid'))

# Compile the model
model_blstm.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the Bidirectional LSTM model on the smaller dataset
model_blstm.fit(X_train_sample, y_train_sample, epochs=5, batch_size=64, validation_data=(X_test_sample, y_test_sample))

# Evaluate the Bidirectional LSTM model
loss_blstm, accuracy_blstm = model_blstm.evaluate(X_test_sample, y_test_sample, verbose=0)
print(f"Bidirectional LSTM Model Accuracy: {accuracy_blstm * 100:.2f}%")


Epoch 1/5
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m116s[0m 2s/step - accuracy: 0.5466 - loss: 0.6781 - val_accuracy: 0.7120 - val_loss: 0.6152
Epoch 2/5
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m99s[0m 2s/step - accuracy: 0.7567 - loss: 0.5677 - val_accuracy: 0.8110 - val_loss: 0.4267
Epoch 3/5
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 2s/step - accuracy: 0.8713 - loss: 0.3219 - val_accuracy: 0.8050 - val_loss: 0.4248
Epoch 4/5
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 2s/step - accuracy: 0.9334 - loss: 0.2049 - val_accuracy: 0.8010 - val_loss: 0.5207
Epoch 5/5
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 2s/step - accuracy: 0.9563 - loss: 0.1404 - val_accuracy: 0.8200 - val_loss: 0.4672
Bidirectional LSTM Model Accuracy: 82.00%


**Explanation:**

---

This section demonstrates the implementation of a Bidirectional LSTM model. Bidirectional layers allow the model to process the sequence in both forward and backward directions, which can improve the model’s ability to capture the context of words in a sentence. The rest of the model structure remains similar to the previous ones.

**Step 5:** GRU and Bidirectional GRU Model

In [7]:
# Import GRU layer
from keras.layers import GRU
import numpy as np

# Set a random seed for reproducibility
np.random.seed(42)

# Define a fraction of the dataset to use
fraction = 0.1  # Use only 10% of the dataset

# Sample a smaller portion of the training and test sets
train_size = int(len(X_train_pad) * fraction)
test_size = int(len(X_test_pad) * fraction)

X_train_sample = X_train_pad[:train_size]
y_train_sample = y_train[:train_size]
X_test_sample = X_test_pad[:test_size]
y_test_sample = y_test[:test_size]

# Build a GRU model
model_gru = Sequential()
model_gru.add(Embedding(max_words, 64, input_length=max_len))
model_gru.add(GRU(100))
model_gru.add(Dense(1, activation='sigmoid'))

# Compile the model
model_gru.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the GRU model on the smaller dataset
model_gru.fit(X_train_sample, y_train_sample, epochs=5, batch_size=64, validation_data=(X_test_sample, y_test_sample))

# Evaluate the GRU model
loss_gru, accuracy_gru = model_gru.evaluate(X_test_sample, y_test_sample, verbose=0)
print(f"GRU Model Accuracy: {accuracy_gru * 100:.2f}%")


Epoch 1/5
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 940ms/step - accuracy: 0.5541 - loss: 0.6878 - val_accuracy: 0.6540 - val_loss: 0.6451
Epoch 2/5
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 861ms/step - accuracy: 0.7287 - loss: 0.6005 - val_accuracy: 0.6980 - val_loss: 0.5731
Epoch 3/5
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 824ms/step - accuracy: 0.8298 - loss: 0.4205 - val_accuracy: 0.7760 - val_loss: 0.4621
Epoch 4/5
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 823ms/step - accuracy: 0.9250 - loss: 0.2060 - val_accuracy: 0.7770 - val_loss: 0.5140
Epoch 5/5
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 828ms/step - accuracy: 0.9503 - loss: 0.1436 - val_accuracy: 0.8010 - val_loss: 0.5866
GRU Model Accuracy: 80.10%


In [8]:
# Build a Bidirectional GRU model
from keras.layers import Bidirectional, GRU
import numpy as np

# Set a random seed for reproducibility
np.random.seed(42)

# Define a fraction of the dataset to use
fraction = 0.1  # Use only 10% of the dataset

# Sample a smaller portion of the training and test sets
train_size = int(len(X_train_pad) * fraction)
test_size = int(len(X_test_pad) * fraction)

X_train_sample = X_train_pad[:train_size]
y_train_sample = y_train[:train_size]
X_test_sample = X_test_pad[:test_size]
y_test_sample = y_test[:test_size]

# Build a Bidirectional GRU model
model_bgru = Sequential()
model_bgru.add(Embedding(max_words, 64, input_length=max_len))
model_bgru.add(Bidirectional(GRU(100)))
model_bgru.add(Dense(1, activation='sigmoid'))

# Compile the model
model_bgru.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the Bidirectional GRU model on the smaller dataset
model_bgru.fit(X_train_sample, y_train_sample, epochs=5, batch_size=64, validation_data=(X_test_sample, y_test_sample))

# Evaluate the Bidirectional GRU model
loss_bgru, accuracy_bgru = model_bgru.evaluate(X_test_sample, y_test_sample, verbose=0)
print(f"Bidirectional GRU Model Accuracy: {accuracy_bgru * 100:.2f}%")


Epoch 1/5
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m112s[0m 2s/step - accuracy: 0.5372 - loss: 0.6871 - val_accuracy: 0.6710 - val_loss: 0.5893
Epoch 2/5
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m138s[0m 2s/step - accuracy: 0.8016 - loss: 0.4525 - val_accuracy: 0.7920 - val_loss: 0.4231
Epoch 3/5
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m103s[0m 2s/step - accuracy: 0.9040 - loss: 0.2530 - val_accuracy: 0.7940 - val_loss: 0.4674
Epoch 4/5
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m103s[0m 2s/step - accuracy: 0.9402 - loss: 0.1824 - val_accuracy: 0.7810 - val_loss: 0.5203
Epoch 5/5
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 2s/step - accuracy: 0.9469 - loss: 0.1490 - val_accuracy: 0.7890 - val_loss: 0.6176
Bidirectional GRU Model Accuracy: 78.90%


**Explanation:**

---


In this final section, we implement both GRU and Bidirectional GRU models. GRU (Gated Recurrent Unit) is another type of RNN that is similar to LSTM but with fewer parameters, making it faster to train while achieving comparable results. Both unidirectional and bidirectional versions of GRU are implemented here and their accuracy on the test data is evaluated.

**Step 6**: Evaluation of Models (LSTM, BLSTM, GRU, BGRU) in Terms of Precision, Recall, and F1-Score

In [9]:
# Import necessary libraries for performance metrics
from sklearn.metrics import classification_report, precision_recall_fscore_support

# Function to evaluate models and print precision, recall, f1
def evaluate_model(model, X_test_pad, y_test, model_name):
    y_pred = model.predict(X_test_pad)
    y_pred = (y_pred > 0.5).astype(int)  # Convert probabilities to class labels (0 or 1)

    # Generate the classification report
    report = classification_report(y_test, y_pred, target_names=['Negative', 'Positive'], output_dict=True)
    print(f"Performance metrics for {model_name}:\n")
    print(classification_report(y_test, y_pred, target_names=['Negative', 'Positive']))

    # Return the main metrics (precision, recall, f1) for Positive class
    return report['Positive']['precision'], report['Positive']['recall'], report['Positive']['f1-score']

# Evaluate all models and tabulate results
models = {
    "LSTM": model,
    "Bidirectional LSTM": model_blstm,
    "GRU": model_gru,
    "Bidirectional GRU": model_bgru
}

performance_metrics = {}

for name, model in models.items():
    precision, recall, f1_score = evaluate_model(model, X_test_pad, y_test, name)
    performance_metrics[name] = [precision, recall, f1_score]

# Display results in a tabular format using pandas
performance_df = pd.DataFrame(performance_metrics, index=['Precision', 'Recall', 'F1-score'])
performance_df = performance_df.T  # Transpose to have models as rows
print(performance_df)


[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 101ms/step
Performance metrics for LSTM:

              precision    recall  f1-score   support

    Negative       0.85      0.77      0.80      4961
    Positive       0.79      0.86      0.82      5039

    accuracy                           0.81     10000
   macro avg       0.82      0.81      0.81     10000
weighted avg       0.82      0.81      0.81     10000

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 187ms/step
Performance metrics for Bidirectional LSTM:

              precision    recall  f1-score   support

    Negative       0.79      0.87      0.83      4961
    Positive       0.86      0.77      0.81      5039

    accuracy                           0.82     10000
   macro avg       0.82      0.82      0.82     10000
weighted avg       0.82      0.82      0.82     10000

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 102ms/step
Performance metrics for GRU:

      

**Explanation:**

---


*   The evaluate_model function is used to predict the sentiment using the trained model and compute the Precision, Recall, and F1-Score for each model.
*   The predictions are made on the test data (X_test_pad), and we convert probabilities to class labels (0 for negative and 1 for positive).

*  The classification_report function from sklearn provides a detailed breakdown of performance metrics for both classes (Negative and Positive). We focus on the metrics for the Positive class (since it's a binary classification problem).
*   We loop through all models (LSTM, BLSTM, GRU, BGRU) to generate the metrics and store them in a dictionary. Finally, we tabulate the results using pandas.



