In [1]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Set hyperparameters
max_features = 10000  # Number of words to consider as features
maxlen = 100  # Cut texts after this number of words (padding to this length)
batch_size = 32
embedding_dim = 128

# Load IMDb dataset
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

# Pad sequences to ensure consistent input size
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)

# Build LSTM Model
model = Sequential([
    Embedding(input_dim=max_features, output_dim=embedding_dim, input_length=maxlen),
    LSTM(64, dropout=0.2, recurrent_dropout=0.2),
    Dense(1, activation='sigmoid')  # Binary classification (positive/negative sentiment)
])

# Compile Model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train Model
model.fit(x_train, y_train, batch_size=batch_size, epochs=5, validation_data=(x_test, y_test))

# Evaluate Model
score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
print(f"Test Loss: {score}")
print(f"Test Accuracy: {acc}")

# Predict Sentiment on a Sample Review
sample_review = "The movie was absolutely terrible! The performances were disappointing."

# Convert review to IMDb format (tokenized)
from tensorflow.keras.preprocessing.text import Tokenizer

tokenizer = Tokenizer(num_words=max_features)
tokenizer.fit_on_texts([""])  # Empty fit just to instantiate

# Tokenize and pad sample review
sample_seq = tokenizer.texts_to_sequences([sample_review])
sample_seq_padded = pad_sequences(sample_seq, maxlen=maxlen)

# Predict Sentiment
prediction = model.predict(sample_seq_padded)
print("Predicted Sentiment: Positive" if prediction[0][0] > 0.5 else "Predicted Sentiment: Negative")

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 2us/step




Epoch 1/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m182s[0m 210ms/step - accuracy: 0.7183 - loss: 0.5370 - val_accuracy: 0.8367 - val_loss: 0.3770
Epoch 2/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m164s[0m 209ms/step - accuracy: 0.8693 - loss: 0.3240 - val_accuracy: 0.8428 - val_loss: 0.3602
Epoch 3/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m138s[0m 177ms/step - accuracy: 0.9028 - loss: 0.2525 - val_accuracy: 0.8456 - val_loss: 0.3640
Epoch 4/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m93s[0m 118ms/step - accuracy: 0.9236 - loss: 0.2030 - val_accuracy: 0.8291 - val_loss: 0.4067
Epoch 5/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m115s[0m 147ms/step - accuracy: 0.9436 - loss: 0.1526 - val_accuracy: 0.8417 - val_loss: 0.4420
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 28ms/step - accuracy: 0.8399 - loss: 0.4496
Test Loss: 0.44202712178230286
Test Accuracy: 0.8417199850082397
