In [None]:
import numpy as np
from math import sqrt
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, CSVLogger
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional
from tensorflow.keras.datasets import imdb

In [None]:
max_features = 20000
# cut texts after this number of words
# (among top max_features most common words)
maxlen = 200

print('Loading data...')
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')

In [None]:
imin = 999999
imax, iavg = 0, 0
for i in x_train:
    l = len(i)
    if l > imax: imax = l
    if l < imin: imin = l
    iavg += l
iavg /= len(x_train)

std = 0
for i in x_train:
    std += len(i) - iavg
std = sqrt(std*std/len(x_train))

print("max: {}, min: {}, avg: {}, std: {:.15f}".format(imax, imin, iavg, std))

In [None]:
print('Pad sequences (samples x time)')
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)
y_train = np.array(y_train)
y_test = np.array(y_test)

model = Sequential()
model.add(Embedding(max_features, 256, input_length=maxlen))
model.add(Bidirectional(LSTM(128,return_sequences=True)))
model.add(Bidirectional(LSTM(128)))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

# try using different optimizers and different optimizer configs
model.compile('adam', 'binary_crossentropy', metrics=['accuracy'])

In [None]:
# Define Callbacks
filepath = "imdb-best.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1,
                             save_best_only=True, mode='max')
early_stop = EarlyStopping(monitor='val_acc', mode='max', verbose=1, patience=50)
csv_history = 'imdb-history.csv'
csv_logger = CSVLogger(csv_history, append=False)
callbacks = [checkpoint, early_stop, csv_logger]

In [None]:
print('Train...')
batch_size = 1024
num_epochs = 150
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=num_epochs,
          validation_data=[x_test, y_test],
          callbacks=callbacks)