In [18]:
# imports
from tensorflow.keras.utils import set_random_seed
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing import sequence

In [3]:
# load the dataset but only keep the top n words, zero the rest
top_words = 5000
max_review_length = 500
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=top_words)

# truncate and pad input sequences
X_train = sequence.pad_sequences(x_train, maxlen=max_review_length)
X_test = sequence.pad_sequences(x_test, maxlen=max_review_length)

In [4]:
print(x_train.shape, X_train.shape)

(25000,) (25000, 500)


In [16]:
# Printing the number of dimensions in each word vector before and after padding
print(len(x_train[0]), len(x_train[-1]))
print(len(X_train[0]), len(X_train[-1]))

218 153


In [20]:
# create the model
set_random_seed(1)
embedding_vecor_length = 32

model = Sequential()
model.add(Embedding(input_dim=top_words, 
                    output_dim=embedding_vecor_length, 
                    input_length=max_review_length))

model.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=3, batch_size=64, verbose=1)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.src.callbacks.History at 0x1d497ed1c50>

In [25]:
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=1)
print(f"Accuracy: {(scores[1]*100):.2f}")
print(f"Loss: {(scores[0]*100):.2f}")

Accuracy: 87.44
Loss: 31.04
