In [1]:
import numpy as np
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense, LSTM, Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from gensim.models import KeyedVectors
from gensim import downloader

np.random.seed(75)

In [2]:
# Load the IMDb dataset
max_words = 10000
maxlen = 100
(train_dataset_x, train_dataset_y), (test_dataset_x, test_dataset_y) = imdb.load_data(num_words=max_words)

# Pad sequences to have equal length
train_dataset_x = pad_sequences(train_dataset_x, maxlen=maxlen)
test_dataset_x = pad_sequences(test_dataset_x, maxlen=maxlen)

print(len(train_dataset_x),len(test_dataset_x))

25000 25000


In [3]:
#Downloads the word2vec model if not exist and stores in a path
model_path = downloader.load('word2vec-google-news-300', return_path=True)

#Load the model from the downloaded path
word2vec = KeyedVectors.load_word2vec_format(model_path, binary=True)

In [4]:


embedding_matrix = np.zeros((max_words, word2vec.vector_size))
for word, index in imdb.get_word_index().items():
    if index < max_words:
        try:
            embedding_matrix[index] = word2vec[word]
        except KeyError:
            embedding_matrix[index] = np.random.normal(0, 1, word2vec.vector_size)


In [5]:

# Define the LSTM model architecture
model = Sequential()
model.add(Embedding(max_words, word2vec.vector_size, weights=[embedding_matrix], input_length=maxlen, trainable=False))
model.add(LSTM(256, dropout=0.2, recurrent_dropout=0.2,return_sequences=True))
model.add(LSTM(64, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(train_dataset_x, train_dataset_y, epochs=30, batch_size=32)

Epoch 1/30
  8/782 [..............................] - ETA: 6:43 - loss: 0.7240 - accuracy: 0.4414

KeyboardInterrupt: 

In [None]:
loss, accuracy = model.evaluate(test_dataset_x, test_dataset_y)
print(loss,accuracy)