In [1]:
# import essential packages

import tensorflow as tf
from tensorflow.keras import datasets, layers, models, preprocessing

import os 
import math 
import numpy as np

In [2]:
# define global values
max_len = 200
n_words = 10000
dim_embedding = 256 
EPOCHS = 20
BATCH_SIZE=500

def load_data():
    # load data
    (X_train, y_train), (X_test, y_test) = datasets.imdb.load_data(num_words=n_words)

    # Pad sequences with max_len
    X_train = preprocessing.sequence.pad_sequences(X_train, maxlen=max_len)
    X_test = preprocessing.sequence.pad_sequences(X_test, maxlen=max_len)

    return (X_train, y_train), (X_test, y_test)

In [3]:
# build model
def build_model():
    model = models.Sequential()
    # Input: Embedding Layer
    # The model will take as input aninteger matrix of size (batch, input_length)
    # The model will output dimension (input_length, dim_embedding)
    # The largest integer in the input should be no larger than n_words (vocabulary size)
    model.add(layers.Embedding(n_words, dim_embedding, input_length=max_len))
    model.add(layers.Dropout(0.3))

    # take the maximum value of either feature vector from each of the n_words features
    model.add(layers.GlobalMaxPooling1D())
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(1, activation='sigmoid'))

    return model

In [4]:
(X_train, y_train), (X_test, y_test) = load_data()
model = build_model()
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 200, 256)          2560000   
_________________________________________________________________
dropout (Dropout)            (None, 200, 256)          0         
_________________________________________________________________
global_max_pooling1d (Global (None, 256)               0         
_________________________________________________________________
dense (Dense)                (None, 128)               32896     
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 129       
Total params: 2,593,025
Trainable params: 2,593,025
Non-trainable params: 0
______________________________________________

In [6]:
# run model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

score = model.fit(X_train, y_train,
                epochs=EPOCHS,
                batch_size=BATCH_SIZE,
                validation_data=(X_test, y_test))

score = model.evaluate(X_test, y_test, batch_size=BATCH_SIZE)
print('Test score: {:1.5f} | Test accuracy: {:1.5f}\n'.format(score[0], score[1]))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Test score: 0.85977 | Test accuracy: 0.83648

