<a href="https://colab.research.google.com/github/ashaduzzaman-sarker/Text-classification-Sentiment-Analysis/blob/main/Bidirectional_LSTM_on_the_IMDB_movie_review_sentiment_classification_dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Train a 2-layer bidirectional LSTM on the IMDB movie review sentiment classification dataset

## Imports

In [None]:
!pip install --upgrade keras tensorflow

In [2]:
import numpy as np
import keras
from keras import layers

max_features = 20000  # Only consider the top 20k words
maxlen = 200  # Only consider the first 200 words of each

## Build the Bidirectional Model

In [3]:
# Input for variable-length sequences of integers
inputs = keras.Input(shape=(None,), dtype="int32")

# Embed each integer in a 128-dimensional vector
x = layers.Embedding(max_features, 128)(inputs)

# Add 2 bidirectional LSTMs
x = layers.Bidirectional(layers.LSTM(64, return_sequences=True))(x)
x = layers.Bidirectional(layers.LSTM(64))(x)

# Add a classifier
outputs = layers.Dense(1, activation="sigmoid")(x)
model = keras.Model(inputs, outputs)
model.summary()

## Load the IMDB movie review sentiment analysis dataset

In [4]:
(x_train, y_train), (x_val, y_val) = keras.datasets.imdb.load_data(num_words=max_features)

print(len(x_train), "Training sequences")
print(len(x_val), "Validation sequences")

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
25000 Training sequences
25000 Validation sequences


In [5]:
# Use pad_sequence to standardize sequence length:
# This will truncate sequences longer than 200 words and
# zero-pad sequences shorter than 200 words

x_train = keras.utils.pad_sequences(x_train, maxlen=maxlen)
x_val = keras.utils.pad_sequences(x_val, maxlen=maxlen)

## Train and Evaluate the Model

In [6]:
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

model.fit(
    x_train,
    y_train,
    batch_size=32,
    epochs=5,
    validation_data=(x_val, y_val)
)

Epoch 1/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m477s[0m 600ms/step - accuracy: 0.7271 - loss: 0.5156 - val_accuracy: 0.8421 - val_loss: 0.3474
Epoch 2/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m455s[0m 582ms/step - accuracy: 0.9019 - loss: 0.2441 - val_accuracy: 0.8662 - val_loss: 0.3224
Epoch 3/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m454s[0m 581ms/step - accuracy: 0.9418 - loss: 0.1601 - val_accuracy: 0.8648 - val_loss: 0.3600
Epoch 4/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m455s[0m 582ms/step - accuracy: 0.9707 - loss: 0.0869 - val_accuracy: 0.8580 - val_loss: 0.3910
Epoch 5/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m504s[0m 585ms/step - accuracy: 0.9834 - loss: 0.0522 - val_accuracy: 0.8591 - val_loss: 0.4516


<keras.src.callbacks.history.History at 0x7ca7f665b520>