# NLP Sentiment Analysis on IMDB Movie Reviews
### Using RNN | LSTM | GRU | BiLSTM | BiGRU | Best LSTM (KerasTuner)

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences

## Load & Preprocess IMDB Dataset

In [None]:
# Load IMDB dataset
max_features = 10000
maxlen = 500

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)

print(f"Train shape: {x_train.shape}, Test shape: {x_test.shape}")

### Simple RNN

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense

model_rnn = Sequential([
    Embedding(max_features, 32),
    SimpleRNN(32),
    Dense(1, activation='sigmoid')
])
model_rnn.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model_rnn.fit(x_train, y_train, epochs=3, batch_size=64, validation_split=0.2)

### LSTM

In [None]:
from tensorflow.keras.layers import LSTM

model_lstm = Sequential([
    Embedding(max_features, 64),
    LSTM(64),
    Dense(1, activation='sigmoid')
])
model_lstm.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model_lstm.fit(x_train, y_train, epochs=3, batch_size=64, validation_split=0.2)

### GRU

In [None]:
from tensorflow.keras.layers import GRU

model_gru = Sequential([
    Embedding(max_features, 64),
    GRU(64),
    Dense(1, activation='sigmoid')
])
model_gru.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model_gru.fit(x_train, y_train, epochs=3, batch_size=64, validation_split=0.2)


### Bidirectional LSTM with GloVe

In [None]:
from tensorflow.keras.layers import Bidirectional

# Load GloVe embeddings
embedding_dim = 100
embeddings_index = {}
glove_path = "data/glove.6B.100d.txt"
with open(glove_path, encoding='utf-8') as f:
    for line in f:
        values = line.split()
        word = values[0]
        coefs = np.asarray(values[1:], dtype='float32')
        embeddings_index[word] = coefs

# Prepare embedding matrix
word_index = imdb.get_word_index()
embedding_matrix = np.zeros((max_features, embedding_dim))
for word, i in word_index.items():
    if i < max_features:
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[i] = embedding_vector

model_bilstm = Sequential([
    Embedding(input_dim=max_features,
              output_dim=embedding_dim,
              weights=[embedding_matrix],
              input_length=maxlen,
              trainable=False),
    Bidirectional(LSTM(64)),
    Dense(1, activation='sigmoid')
])
model_bilstm.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model_bilstm.fit(x_train, y_train, epochs=3, batch_size=64, validation_split=0.2)

### Bidirectional GRU with GloVe

In [None]:
model_bigru = Sequential([
    Embedding(input_dim=max_features,
              output_dim=embedding_dim,
              weights=[embedding_matrix],
              input_length=maxlen,
              trainable=False),
    Bidirectional(GRU(64)),
    Dense(1, activation='sigmoid')
])
model_bigru.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model_bigru.fit(x_train, y_train, epochs=3, batch_size=64, validation_split=0.2)

### Best LSTM using KerasTuner

In [None]:
import keras_tuner as kt

def model_builder(hp):
    model = Sequential()
    model.add(Embedding(max_features, hp.Int('embedding_output', min_value=32, max_value=128, step=32)))
    model.add(LSTM(hp.Int('lstm_units', min_value=32, max_value=128, step=32)))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

tuner = kt.Hyperband(model_builder,
                     objective='val_accuracy',
                     max_epochs=5,
                     factor=3,
                     directory='kerastuner_dir',
                     project_name='lstm_tuning')

tuner.search(x_train, y_train, epochs=5, validation_split=0.2)
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
print(f"Best embedding output: {best_hps.get('embedding_output')}")
print(f"Best LSTM units: {best_hps.get('lstm_units')}")

# Build and train the best model
model_best_lstm = tuner.hypermodel.build(best_hps)
model_best_lstm.fit(x_train, y_train, epochs=2, validation_split=0.2)

## Model Evaluation & Visualization

In [None]:
# Example evaluation placeholder
# Replace `model` with your trained model

# y_pred = (model.predict(x_test) > 0.5).astype("int32")
# print(classification_report(y_test, y_pred))

# cm = confusion_matrix(y_test, y_pred)
# disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=["Negative", "Positive"])
# disp.plot(cmap='Blues')
# plt.title("Confusion Matrix")
# plt.show()


## Future Enhancements
- Integrate BERT via Hugging Face
- Add explainability with SHAP/LIME
- Deploy with Streamlit & Docker