The following experiments will help convince you that the architecture choices you’ve made are all fairly reasonable, although there’s still room for improvement:

* You used two hidden layers. Try using one or three hidden layers, and see how doing so affects validation and test accuracy.
* Try using layers with more hidden units or fewer hidden units: 32 units, 64 units, and so on.
* Try using the ```mse``` loss function instead of ```binary_crossentropy```.
* Try using the ```tanh``` activation (an activation that was popular in the early days of neural networks) instead of relu.

In [3]:
import numpy as np
from pickle import dump, load
from keras import models
from keras import layers
from keras import optimizers

download imdb sentiment dataset

In [1]:
from keras.datasets import imdb

(train_data, train_labels), (test_data, test_labels) = imdb.load_data(
    num_words=10000)

Using TensorFlow backend.


Downloading data from https://s3.amazonaws.com/text-datasets/imdb.npz


In [8]:
def vectorize_sequences(sequences, dimension=10000):
    results = np.zeros((len(sequences), dimension))
    for i, sequence in enumerate(sequences):
        results[i, sequence] = 1.
    return results

x_train = vectorize_sequences(train_data)
x_test = vectorize_sequences(test_data)

y_train = np.asarray(train_labels).astype('float32')
y_test = np.asarray(test_labels).astype('float32')

1 layer

In [10]:
model1 = models.Sequential()
model1.add(layers.Dense(16, activation='relu', input_shape=(10000,)))
model1.add(layers.Dense(1, activation='sigmoid'))

3 layers

In [12]:
model3 = models.Sequential()
model3.add(layers.Dense(16, activation='relu', input_shape=(10000,)))
model3.add(layers.Dense(16, activation='relu'))
model3.add(layers.Dense(16, activation='relu'))
model3.add(layers.Dense(1, activation='sigmoid'))

Validation

In [16]:
x_val = x_train[:10000]
partial_x_train = x_train[10000:]
y_val = y_train[:10000]
partial_y_train = y_train[10000:]

In [22]:
len(x_val)

10000

In [20]:
len(partial_x_train)

15000

In [24]:
model1.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['acc'])

history1 = model1.fit(partial_x_train,
                    partial_y_train,
                    epochs=20,
                    batch_size=512,
                    validation_data=(x_val, y_val))

Train on 15000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [25]:
model3.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['acc'])

history3 = model3.fit(partial_x_train,
                    partial_y_train,
                    epochs=20,
                    batch_size=512,
                    validation_data=(x_val, y_val))

Train on 15000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [27]:
results1 = model1.evaluate(x_test, y_test)
print(results1)
results3 = model3.evaluate(x_test, y_test)
print(results3)

[0.5237779613924026, 0.8562]
[0.9000247318983078, 0.84484]


Change number of layer units

In [31]:
model_16un = models.Sequential()
model_16un.add(layers.Dense(16, activation='relu', input_shape=(10000,)))
model_16un.add(layers.Dense(16, activation='relu'))
model_16un.add(layers.Dense(1, activation='sigmoid'))

In [48]:
model_32un = models.Sequential()
model_32un.add(layers.Dense(32, activation='relu', input_shape=(10000,)))
model_32un.add(layers.Dense(32, activation='relu'))
model_32un.add(layers.Dense(1, activation='sigmoid'))

In [33]:
model_64un = models.Sequential()
model_64un.add(layers.Dense(64, activation='relu', input_shape=(10000,)))
model_64un.add(layers.Dense(64, activation='relu'))
model_64un.add(layers.Dense(1, activation='sigmoid'))

In [37]:
model_16un.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['acc'])

history_16un = model_16un.fit(partial_x_train,
                    partial_y_train,
                    epochs=20,
                    batch_size=512,
                    validation_data=(x_val, y_val))

Train on 15000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [49]:
model_32un.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['acc'])

history_32un = model_32un.fit(partial_x_train,
                    partial_y_train,
                    epochs=20,
                    batch_size=512,
                    validation_data=(x_val, y_val))

Train on 15000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [50]:
model_64un.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['acc'])

history_64un = model_64un.fit(partial_x_train,
                    partial_y_train,
                    epochs=20,
                    batch_size=512,
                    validation_data=(x_val, y_val))

Train on 15000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [51]:
results_16un = model_16un.evaluate(x_test, y_test)
print(results_16un)
results_32un = model_32un.evaluate(x_test, y_test)
print(results_32un)
results_64un = model_64un.evaluate(x_test, y_test)
print(results_64un)

[0.756811765446663, 0.84924]
[0.8425805935013294, 0.85188]
[1.3210625355386734, 0.85376]


MSE vs. binary crossentropy

In [53]:
model_mse = models.Sequential()
model_mse.add(layers.Dense(32, activation='relu', input_shape=(10000,)))
model_mse.add(layers.Dense(32, activation='relu'))
model_mse.add(layers.Dense(1, activation='sigmoid'))

In [54]:
model_mse.compile(optimizer='rmsprop',
              loss='mse',
              metrics=['acc'])

history_mse = model_mse.fit(partial_x_train,
                    partial_y_train,
                    epochs=20,
                    batch_size=512,
                    validation_data=(x_val, y_val))

Train on 15000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [55]:
print(results_32un)
results_mse = model_mse.evaluate(x_test, y_test)
print(results_mse)

[0.8425805935013294, 0.85188]
[0.12283033714711666, 0.8532]


Activation functions

In [62]:
model_tanh.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['acc'])

history_tanh = model_tanh.fit(partial_x_train,
                    partial_y_train,
                    epochs=20,
                    batch_size=512,
                    validation_data=(x_val, y_val))

Train on 15000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [63]:
print(results_32un)
results_tanh = model_tanh.evaluate(x_test, y_test)
print(results_tanh)

[0.8425805935013294, 0.85188]
[1.0302472790443897, 0.84496]
