# LSTM IMDB Classifier

In [9]:
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Embedding, Dense
from keras.preprocessing import sequence
from keras.datasets import imdb
import matplotlib.pyplot as plt

### Dataset

In [2]:
# Load the IMDB dataset
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=5000)

# Pad sequences to a maximum length
max_review_length = 500
x_train = sequence.pad_sequences(x_train, maxlen=max_review_length)
x_test = sequence.pad_sequences(x_test, maxlen=max_review_length)

### LSTM

#### Embedding Layer

- The model has an embedding layer with a vocabulary size of 5000 words.
- Each word is represented by a dense vector of length 32.
- Input sequences (reviews) are expected to have a length of 'max_review_length'.

In [3]:
vocabulary_length = 5000
embedding_length = 32

In [4]:
# Define the LSTM model
model = Sequential()
model.add(Embedding(vocabulary_length, embedding_length, input_length=max_review_length))
model.add(LSTM(100))
model.add(Dense(1, activation='sigmoid'))

In [6]:
# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

### Train and Evaluate

In [7]:
# Train the model
history = model.fit(x_train, y_train, epochs=3, batch_size=64, validation_data=(x_test, y_test))

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [8]:
# Evaluate the model
scores = model.evaluate(x_test, y_test, verbose=0)
print("Test Accuracy: %.2f%%" % (scores[1]*100))

Test Accuracy: 83.51%
