In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Bidirectional
from tensorflow.keras.layers import LSTM, SimpleRNN
from tensorflow.keras.layers import Embedding
from tensorflow.keras.layers import Dropout,Flatten
from tensorflow.keras.layers import Attention, BatchNormalization
from tensorflow.keras.preprocessing import sequence
from keras.callbacks import ReduceLROnPlateau, EarlyStopping

import matplotlib.pyplot as plt
tf.random.set_seed(7)

## IMDB movie review sentiment classification dataset
This is a dataset of 25,000 movies reviews from IMDB, labeled by sentiment (positive/negative). Reviews have been preprocessed, and each review is encoded as a list of word indexes (integers). For convenience, words are indexed by overall frequency in the dataset, so that for instance the integer "3" encodes the 3rd most frequent word in the data. This allows for quick filtering operations such as: "only consider the top 10,000 most common words, but eliminate the top 20 most common words".

[Keras API reference / Built-in small datasets / IMDB movie review sentiment classification dataset](https://keras.io/api/datasets/imdb/)

In [None]:
# load the dataset but only keep the top n words, zero the rest
top_words = 5000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)

In [None]:
# truncate and pad input sequences
max_review_length = 500
X_train = sequence.pad_sequences(X_train, maxlen=max_review_length)
X_test = sequence.pad_sequences(X_test, maxlen=max_review_length)

In [None]:
X_train.shape, X_test.shape

In [None]:
X_train[:5]

In [None]:
def plot_history(history, metrics, model_name:str):
    fig = plt.figure(figsize=[8, 4])
    for metric in metrics:
        plt.plot(history.history[metric], label='train '+ metric)
        plt.plot(history.history['val_' + metric], label='val ' + metric)        
    plt.legend()    
    plt.xlabel('epoch')
    plt.ylabel(" ".join(metrics))
    plt.xticks(list(range(0, len(history.history[metrics[0]]), 2)))
    plt.grid(visible=True)
    plt.title(model_name + " training history [" + " ".join(metrics) + "]")

## Model 1 
- 2 LSTM layer

In [None]:
embedding_vecor_length = 32

In [None]:
# create the model

model1 = Sequential([
    Embedding(top_words, embedding_vecor_length, input_length=max_review_length),
    Dropout(0.2),
    LSTM(100, return_sequences=True),
    LSTM(100, return_sequences=True),
    LSTM(100, return_sequences=True),
    
    Flatten(),
    Dense(64),
    BatchNormalization(),
    Dropout(0.2),
    Dense(32),
    BatchNormalization(),
    Dropout(0.2),
    Dense(16),
    BatchNormalization(),
    Dropout(0.1),

    Dense(1, activation='sigmoid')
])


model1.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model1.summary())

In [None]:
reduce_lr = ReduceLROnPlateau(
    monitor="val_loss",
    patience=3                              
)
early_stop = EarlyStopping(
    monitor="val_loss",
    patience=3,
    restore_best_weights=True
)
callbacks=[reduce_lr, early_stop]

In [None]:
history1 = model1.fit(
    X_train, y_train, 
    validation_split=0.2, 
    epochs=15, 
    batch_size=64,
    callbacks=callbacks
)

In [None]:
plot_history(history=history1, metrics=["accuracy"],model_name="")
plot_history(history=history1, metrics=["loss"],model_name="")

In [None]:
# Final evaluation of the model
scores = model1.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))
print("Loss: %.2f%%" % (scores[0]*100))

## Model 2
- LSTM with Attention

In [None]:
# create the model

model2 = Sequential([
    Embedding(top_words, embedding_vecor_length, input_length=max_review_length),
    Dropout(0.2),
    LSTM(100, return_sequences=True),
    Attention(),
    LSTM(100, return_sequences=True),
    Attention(),
    LSTM(100, return_sequences=True),
    Attention(),
    
    Flatten(),
    Dense(64),
    BatchNormalization(),
    Dropout(0.2),
    Dense(32),
    BatchNormalization(),
    Dropout(0.2),
    Dense(16),
    BatchNormalization(),
    Dropout(0.1),

    Dense(1, activation='sigmoid')
])


model2.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model2.summary())

In [None]:
history2 = model2.fit(
    X_train, y_train, 
    validation_split=0.2, 
    epochs=15, 
    batch_size=64,
    callbacks=callbacks
)

In [None]:
# Final evaluation of the model
scores2 = model2.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores2[1]*100))

In [None]:
plot_history(history=history2, metrics=["accuracy"],model_name="")
plot_history(history=history2, metrics=["loss"],model_name="")