In [10]:
import numpy as np
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding, SpatialDropout1D
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [2]:
# Load the IMDb dataset
max_words = 10000  # Consider only the top 10,000 most frequent words
maxlen = 200  # Maximum sequence length
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_words)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [12]:
# Pad sequences to ensure uniform length
max_length = 200
X_train = pad_sequences(X_train, maxlen=maxlen)
X_test = pad_sequences(X_test, maxlen=maxlen)

In [13]:
# Define the LSTM model
embedding_size = 128
model = Sequential([
    Embedding(input_dim=max_words, output_dim=embedding_size, input_length=max_length),
    SpatialDropout1D(0.2),
    LSTM(100),
    Dense(1, activation='sigmoid')
])

In [14]:
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [15]:
# Train the model
batch_size = 64
epochs = 5
model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test), verbose=2)

Epoch 1/5
391/391 - 182s - loss: 0.3892 - accuracy: 0.8230 - val_loss: 0.3022 - val_accuracy: 0.8738 - 182s/epoch - 466ms/step
Epoch 2/5
391/391 - 219s - loss: 0.2374 - accuracy: 0.9089 - val_loss: 0.3159 - val_accuracy: 0.8689 - 219s/epoch - 559ms/step
Epoch 3/5
391/391 - 218s - loss: 0.1934 - accuracy: 0.9280 - val_loss: 0.3942 - val_accuracy: 0.8171 - 218s/epoch - 556ms/step
Epoch 4/5
391/391 - 218s - loss: 0.1382 - accuracy: 0.9508 - val_loss: 0.3781 - val_accuracy: 0.8526 - 218s/epoch - 556ms/step
Epoch 5/5
391/391 - 218s - loss: 0.1237 - accuracy: 0.9556 - val_loss: 0.4808 - val_accuracy: 0.8494 - 218s/epoch - 557ms/step


<keras.src.callbacks.History at 0x7dd317bb7850>

In [17]:
# Evaluate the model on test data
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

Test Loss: 0.4808140993118286
Test Accuracy: 0.8493599891662598


In [8]:
# Make predictions on the test data
predictions = model.predict(X_test)

<module 'keras.api._v2.keras.datasets.imdb' from '/usr/local/lib/python3.10/dist-packages/keras/api/_v2/keras/datasets/imdb/__init__.py'>

In [None]:
# Convert predictions to class labels
predicted_labels = np.round(predictions).flatten().astype(int)

In [None]:
# Print the first 10 predictions along with the corresponding true labels
print("Predicted Labels:", predicted_labels[:10])
print("True Labels:", y_test[:10])