In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
import numpy as np
from keras.datasets import imdb


In [None]:
# Define data
# set the maximum number of words to use
max_num_words = 10000

# set the maximum length of each sequence
max_sequence_length = 500

# load the IMDB dataset
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_num_words)

# pad the sequences to a fixed length
x_train = pad_sequences(x_train, maxlen=max_sequence_length)
x_test = pad_sequences(x_test, maxlen=max_sequence_length)

# print the shapes of the data
print('x_train shape:', x_train.shape)
print('y_train shape:', y_train.shape)
print('x_test shape:', x_test.shape)
print('y_test shape:', y_test.shape)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
x_train shape: (25000, 500)
y_train shape: (25000,)
x_test shape: (25000, 500)
y_test shape: (25000,)


In [None]:
# Define input shape
input_shape = (500,)

# Define input layer
inputs = Input(shape=input_shape)

# Define embedding layer
embedding_size = 128
vocab_size = 10000
embeddings = Embedding(input_dim=vocab_size, output_dim=embedding_size)(inputs)

# Define LSTM layer
lstm_units = 64
lstm_out = LSTM(units=lstm_units)(embeddings)

# Define output layer
outputs = Dense(1, activation='sigmoid')(lstm_out)

# Define model
model = Model(inputs=inputs, outputs=outputs)


In [None]:
# Compile model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# print the model summary
model.summary()

# train the model
model.fit(x_train, y_train, epochs=10, batch_size=32, validation_split=0.2, verbose=1)

# Evaluate model
loss, accuracy = model.evaluate(x_test, y_test, batch_size=1)
print(f'Test loss: {loss}, Test accuracy: {accuracy}')


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 500)]             0         
                                                                 
 embedding (Embedding)       (None, 500, 128)          1280000   
                                                                 
 lstm (LSTM)                 (None, 64)                49408     
                                                                 
 dense (Dense)               (None, 1)                 65        
                                                                 
Total params: 1,329,473
Trainable params: 1,329,473
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss: 0.5998098254203796, Test accuracy: 0.8612800240516663
