# IMDB: recurrent neural networks

## Data preprocessing

### Required imports

In [1]:
from tensorflow import keras
from keras.datasets import imdb
from keras.preprocessing import sequence
import numpy as np
from sklearn.model_selection import train_test_split

Using TensorFlow backend.


### Processing

Load the training and test data.  To limit computation time, we restrict the number of words to 5,000.

In [2]:
num_words = 5_000
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=num_words)

Since the review vary in length, and we prefer to limit the computation time, we will base the classification on the first 100 features of each input sequence.

In [3]:
feature_length = 100
x_train = sequence.pad_sequences(x_train, maxlen=feature_length)
x_test = sequence.pad_sequences(x_test, maxlen=feature_length)

Now the training and test input are 2D arrays. We split the training set into a subset for actual training, and one for validation.  First we seed the random number generator to ensure reproducibility. In this case, we will use part of the 25000 test examples as valiation data.

In [4]:
np.random.seed(1234)

In [5]:
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train)

## GRU

### Required imports & model definition

In [6]:
from keras.layers import Activation, Dense, Dropout
from keras.layers import Embedding
from keras.layers import GRU
from keras.models import Sequential
from keras.optimizers import Adam

Again, to limit training times, we restrict ourselfs to using a limited number of features.

In [8]:
vector_length = 64
num_units = 64
model = Sequential()
model.add(Embedding(num_words, vector_length, mask_zero=True,
                    input_length=feature_length))
model.add(GRU(num_units))
model.add(Dropout(rate=0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))

In [9]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 100, 64)           320000    
_________________________________________________________________
gru_2 (GRU)                  (None, 64)                24768     
_________________________________________________________________
dropout_2 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 65        
_________________________________________________________________
activation_2 (Activation)    (None, 1)                 0         
Total params: 344,833
Trainable params: 344,833
Non-trainable params: 0
_________________________________________________________________


In [10]:
model.compile(loss='binary_crossentropy', optimizer=Adam(),
              metrics=['accuracy'])

###    Training

In [11]:
history = model.fit(x_train, y_train, batch_size=64, epochs=10,
                    validation_data=(x_val, y_val))

Instructions for updating:
Use tf.cast instead.
Train on 18750 samples, validate on 6250 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


The training accuracy is much better than the validation accurcy, so the model is likely heavily overtrained.

### Testing

In [12]:
model.evaluate(x_test, y_test)



[0.6901801983165741, 0.82452]

## LSTM

### Required imports & model definition

In [13]:
from keras.layers import LSTM

Again, to limit training times, we restrict ourselfs to using a limited number of features.

In [14]:
vector_length = 64
num_units = 64
model = Sequential()
model.add(Embedding(num_words, vector_length, mask_zero=True,
                    input_length=feature_length))
model.add(LSTM(num_units))
model.add(Dropout(rate=0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))

In [15]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, 100, 64)           320000    
_________________________________________________________________
lstm_1 (LSTM)                (None, 64)                33024     
_________________________________________________________________
dropout_3 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 65        
_________________________________________________________________
activation_3 (Activation)    (None, 1)                 0         
Total params: 353,089
Trainable params: 353,089
Non-trainable params: 0
_________________________________________________________________


In [16]:
model.compile(loss='binary_crossentropy', optimizer=Adam(),
              metrics=['accuracy'])

###    Training

In [17]:
history = model.fit(x_train, y_train, batch_size=64, epochs=10,
                    validation_data=(x_val, y_val))

Train on 18750 samples, validate on 6250 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


The training accuracy is much better than the validation accurcy, so the model is likely heavily overtrained.

### Testing

In [18]:
model.evaluate(x_test, y_test)



[0.6544363550662995, 0.81168]