<a href="https://colab.research.google.com/github/Uma29-10/TNSIF_AIML_DBIT/blob/main/LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# ==============================================
# 📘 IMDB Sentiment Classification using LSTM
# ==============================================

# -----------------------------
# 1. Import required libraries
# -----------------------------
import numpy as np
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

# -----------------------------
# Dataset Preparation
# -----------------------------
# 2. Load the IMDB dataset with top 5000 words
max_features = 5000
max_len = 500  # maximum length of review

print("Loading IMDB dataset...")
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

print("Training samples:", len(x_train))
print("Testing samples:", len(x_test))

# 4. Pad all sequences to max_len
x_train = sequence.pad_sequences(x_train, maxlen=max_len)
x_test = sequence.pad_sequences(x_test, maxlen=max_len)

# 5. Print shapes
print("x_train shape:", x_train.shape)
print("x_test shape:", x_test.shape)

# -----------------------------
# Model Building
# -----------------------------
model = Sequential()

# 7. Add Embedding layer
model.add(Embedding(input_dim=max_features, output_dim=128, input_length=max_len))

# 8. Add LSTM layer with 100 units
model.add(LSTM(100))

# 9. Add Dense output layer
model.add(Dense(1, activation='sigmoid'))

# -----------------------------
# Compilation
# -----------------------------
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# 11. Print model summary
model.summary()

# -----------------------------
# Training
# -----------------------------
print("Number of training samples:", len(x_train))
history = model.fit(
    x_train, y_train,
    batch_size=64,
    epochs=2,
    validation_data=(x_test, y_test)
)
print("Number of test samples:", len(x_test))

# -----------------------------
# Evaluation
# -----------------------------
test_loss, test_acc = model.evaluate(x_test, y_test, verbose=0)
print(f"\nTest Accuracy: {test_acc:.4f}")

# -----------------------------
# Prediction on a Single Review
# -----------------------------
review = x_test[0].reshape(1, max_len)
pred_prob = model.predict(review)[0][0]
print(f"\nPredicted Probability of Positive Sentiment: {pred_prob:.4f}")
print(f"Actual Label: {y_test[0]}")

# ==============================================
# 🔁 Experiments / Modifications
# ==============================================

# 19. Change max_features to 10,000
print("\n=== Experiment 1: max_features = 10,000 ===")
max_features = 10000
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
x_train = sequence.pad_sequences(x_train, maxlen=max_len)
x_test = sequence.pad_sequences(x_test, maxlen=max_len)

model2 = Sequential([
    Embedding(max_features, 128, input_length=max_len),
    LSTM(100),
    Dense(1, activation='sigmoid')
])

model2.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model2.summary()

history2 = model2.fit(
    x_train, y_train,
    batch_size=64,
    epochs=2,
    validation_data=(x_test, y_test)
)

# 20. Increase epochs to 5
print("\n=== Experiment 2: Increasing epochs to 5 ===")
model3 = Sequential([
    Embedding(max_features, 128, input_length=max_len),
    LSTM(100),
    Dense(1, activation='sigmoid')
])

model3.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
history3 = model3.fit(
    x_train, y_train,
    batch_size=64,
    epochs=5,
    validation_data=(x_test, y_test)
)

# 21. Change number of LSTM units (50, 150)
print("\n=== Experiment 3A: LSTM Units = 50 ===")
model4 = Sequential([
    Embedding(max_features, 128, input_length=max_len),
    LSTM(50),
    Dense(1, activation='sigmoid')
])
model4.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model4.fit(x_train, y_train, batch_size=64, epochs=2, validation_data=(x_test, y_test))

print("\n=== Experiment 3B: LSTM Units = 150 ===")
model5 = Sequential([
    Embedding(max_features, 128, input_length=max_len),
    LSTM(150),
    Dense(1, activation='sigmoid')
])
model5.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model5.fit(x_train, y_train, batch_size=64, epochs=2, validation_data=(x_test, y_test))


Loading IMDB dataset...
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Training samples: 25000
Testing samples: 25000
x_train shape: (25000, 500)
x_test shape: (25000, 500)




Number of training samples: 25000
Epoch 1/2
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m533s[0m 1s/step - accuracy: 0.7326 - loss: 0.5085 - val_accuracy: 0.7353 - val_loss: 0.5189
Epoch 2/2
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m545s[0m 1s/step - accuracy: 0.8422 - loss: 0.3623 - val_accuracy: 0.8187 - val_loss: 0.3918
Number of test samples: 25000

Test Accuracy: 0.8187
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 354ms/step

Predicted Probability of Positive Sentiment: 0.2627
Actual Label: 0

=== Experiment 1: max_features = 10,000 ===


Epoch 1/2
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m514s[0m 1s/step - accuracy: 0.7307 - loss: 0.5134 - val_accuracy: 0.8635 - val_loss: 0.3283
Epoch 2/2
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m554s[0m 1s/step - accuracy: 0.8878 - loss: 0.2840 - val_accuracy: 0.8650 - val_loss: 0.3282

=== Experiment 2: Increasing epochs to 5 ===
Epoch 1/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m536s[0m 1s/step - accuracy: 0.6941 - loss: 0.5593 - val_accuracy: 0.8045 - val_loss: 0.4358
Epoch 2/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m555s[0m 1s/step - accuracy: 0.8704 - loss: 0.3167 - val_accuracy: 0.8474 - val_loss: 0.3552
Epoch 3/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m523s[0m 1s/step - accuracy: 0.9099 - loss: 0.2342 - val_accuracy: 0.8628 - val_loss: 0.3614
Epoch 4/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m553s[0m 1s/step - accuracy: 0.9370 - loss: 0.1747 - val_accuracy: 0.8604 -

<keras.src.callbacks.history.History at 0x7debe7d7a030>