In [1]:
# importing required libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing import sequence

# fix random seed for reproducibility
np.random.seed(7)

In [2]:
# load the dataset but only keep the top n words (5000 in this case), zero the rest

top_words = 5000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)

In [3]:
# only considering maximum review lenth to 500 words
max_review_length = 500

# Truncate and pad the input sequences so that they are all the same length for modeling i.e., 500

X_train = sequence.pad_sequences(X_train, maxlen=max_review_length)
X_test = sequence.pad_sequences(X_test, maxlen=max_review_length)

In [4]:
# creating LSTM model

# The first layer is the Embedded layer that uses 32 length vectors to represent each word
# The next layer is the LSTM layer with 100 memory units (smart neurons).

embedding_vector_length = 32
model = Sequential()
model.add(Embedding(input_dim=top_words, output_dim=embedding_vector_length, input_length=max_review_length))
model.add(LSTM(units=100)) # rest properties have the default property values
model.add(Dense(units=1,activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())

model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=3, batch_size=64)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 500, 32)           160000    
_________________________________________________________________
lstm (LSTM)                  (None, 100)               53200     
_________________________________________________________________
dense (Dense)                (None, 1)                 101       
Total params: 213,301
Trainable params: 213,301
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x17b0199cd90>

In [5]:
# Final Evaluation of the model

scores = model.evaluate(X_test, y_test, verbose=1)
print("Accuracy: %.2f%%" % (scores[1]*100))

Accuracy: 83.96%


# Improvements

## LSTM for Sequence Classification with Dropouts

In [6]:
# creating model with dropouts

from tensorflow.keras.layers import Dropout

model_1 = Sequential()
model_1.add(Embedding(input_dim=top_words, output_dim=embedding_vector_length, input_length=max_review_length))
model_1.add(Dropout(rate=0.2))
model_1.add(LSTM(units=100))
model_1.add(Dropout(rate=0.2))
model_1.add(Dense(units=1, activation='sigmoid'))
model_1.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

print(model_1.summary())

model_1.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=3, batch_size=64)

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 500, 32)           160000    
_________________________________________________________________
dropout (Dropout)            (None, 500, 32)           0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 100)               53200     
_________________________________________________________________
dropout_1 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 101       
Total params: 213,301
Trainable params: 213,301
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x17b042dec70>

In [7]:
# Final evaluation of the model

scores = model_1.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

Accuracy: 86.56%


## LSTM for Sequence Classification with Recurrent  Dropouts

In [None]:
# creating model with dropouts

from tensorflow.keras.layers import Dropout

model_2 = Sequential()
model_2.add(Embedding(input_dim=top_words, output_dim=embedding_vector_length, input_length=max_review_length))
model_2.add(LSTM(units=100, dropout=0.2, recurrent_dropout=0.2))
model_2.add(Dense(units=1, activation='sigmoid'))
model_2.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

print(model_2.summary())

model_2.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=6, batch_size=64)

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 500, 32)           160000    
_________________________________________________________________
lstm_2 (LSTM)                (None, 100)               53200     
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 101       
Total params: 213,301
Trainable params: 213,301
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/6

In [None]:
# Final evaluation of the model

scores = model_2.evaluate(X_test, y_test, verbose=1)
print("Accuracy: %.2f%%" % (scores[1]*100))