In [1]:
import numpy as np
import keras
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.preprocessing.text import Tokenizer
import matplotlib.pyplot as plt

%matplotlib inline
np.random.seed(42)

Using TensorFlow backend.


## Loading the data

The dataset comes preloaded in Keras, which means I don't need to open or read any files manually and one simple command will get us training and testing data. The command to load the data will actually split the words into training and testing sets and labels. There is a parameter for how many words we want to look at. I am setting it at 1000.

In [2]:
# load the data(it's comes preloaded with Keras)
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=1000)

print(x_train.shape)
print(x_test.shape)

(25000,)
(25000,)


## Examining the data
Notice that the data has been already pre-processed, where all the words have numbers, and the reviews come in as a vector with the words that the review contains. For example, if the word 'the' is the first one in our dictionary, and a review contains the word 'the', then there is a 1 in the corresponding vector.

The output comes as a vector of 1's and 0's, where 1 is a positive sentiment for the review, and 0 is negative.

In [3]:
print(x_train[0])
print(y_train[0])

[1, 14, 22, 16, 43, 530, 973, 2, 2, 65, 458, 2, 66, 2, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 2, 2, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2, 19, 14, 22, 4, 2, 2, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 2, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2, 2, 16, 480, 66, 2, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 2, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 2, 15, 256, 4, 2, 7, 2, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 2, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2, 56, 26, 141, 6, 194, 2, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 2, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 2, 88, 12, 16, 283, 5, 16, 2, 113, 103, 32, 15, 16, 2, 19, 178, 32]
1


## One-hot encoding the data
Now, let's turn the input vectors into (0,1)-vectors. For example, if the pre-processed vector contains the number 14, then in the processed vector, the 14th entry will be 1.

In [4]:
# one-hot encoding the input into vector mode, each of length 1000
tokenizer = Tokenizer(num_words=1000)
x_train = tokenizer.sequences_to_matrix(x_train, mode='binary')
x_test = tokenizer.sequences_to_matrix(x_test, mode='binary')
print(x_train[0])

[0. 1. 1. 0. 1. 1. 1. 1. 1. 1. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 0.
 0. 1. 1. 0. 1. 0. 1. 0. 1. 1. 0. 1. 1. 0. 1. 1. 0. 0. 0. 1. 0. 0. 1. 0.
 1. 0. 1. 1. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 1. 1. 0. 0. 0. 0. 1.
 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 1. 0. 0. 0.
 0. 0. 1. 0. 1. 0. 0. 1. 1. 0. 1. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 1. 0. 0.
 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 1. 0. 0.
 1. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.
 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.
 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.

And we'll also one-hot encode the output.

In [5]:
# one-hot encoding the output
num_classes = 2
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
print(y_train.shape)
print(y_test.shape)

(25000, 2)
(25000, 2)


## Building the model architecture

In [6]:
# build the model architecture with one layer of length 100
model = Sequential()
model.add(Dense(512, activation='relu', input_dim=1000))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))
model.summary()

# compile the model using categorical_crossentropy loss, and rmsprop optimizer.
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

W0719 18:29:40.188429  6340 deprecation_wrapper.py:119] From c:\users\aditya\appdata\local\programs\python\python37\lib\site-packages\keras\backend\tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0719 18:29:40.672129  6340 deprecation_wrapper.py:119] From c:\users\aditya\appdata\local\programs\python\python37\lib\site-packages\keras\backend\tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0719 18:29:40.812438  6340 deprecation_wrapper.py:119] From c:\users\aditya\appdata\local\programs\python\python37\lib\site-packages\keras\backend\tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0719 18:29:40.968436  6340 deprecation_wrapper.py:119] From c:\users\aditya\appdata\local\programs\python\python37\lib\site-packages\keras\backend\tensorflow_backend.py:133: The name tf.placeholder_with_default 

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 512)               512512    
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 1026      
Total params: 513,538
Trainable params: 513,538
Non-trainable params: 0
_________________________________________________________________


## Training the model

In [7]:
# train the model
hist = model.fit(x_train, y_train,
          batch_size=32,
          epochs=10,
          validation_data=(x_test, y_test), 
          verbose=2)

W0719 18:56:13.029227  6340 deprecation.py:323] From c:\users\aditya\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\ops\math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Train on 25000 samples, validate on 25000 samples
Epoch 1/10
 - 33s - loss: 0.3982 - acc: 0.8268 - val_loss: 0.3549 - val_acc: 0.8478
Epoch 2/10
 - 29s - loss: 0.3335 - acc: 0.8668 - val_loss: 0.3393 - val_acc: 0.8611
Epoch 3/10
 - 30s - loss: 0.3218 - acc: 0.8750 - val_loss: 0.3571 - val_acc: 0.8621
Epoch 4/10
 - 30s - loss: 0.3126 - acc: 0.8838 - val_loss: 0.4341 - val_acc: 0.8425
Epoch 5/10
 - 30s - loss: 0.3058 - acc: 0.8897 - val_loss: 0.4027 - val_acc: 0.8563
Epoch 6/10
 - 26s - loss: 0.2948 - acc: 0.8960 - val_loss: 0.4026 - val_acc: 0.8604
Epoch 7/10
 - 22s - loss: 0.2905 - acc: 0.9009 - val_loss: 0.4245 - val_acc: 0.8589
Epoch 8/10
 - 23s - loss: 0.2766 - acc: 0.9088 - val_loss: 0.4318 - val_acc: 0.8593
Epoch 9/10
 - 25s - loss: 0.2673 - acc: 0.9151 - val_loss: 0.4607 - val_acc: 0.8585
Epoch 10/10
 - 22s - loss: 0.2514 - acc: 0.9210 - val_loss: 0.5081 - val_acc: 0.8553


## Evaluating the model

In [8]:
# evaluate the model
score = model.evaluate(x_test, y_test, verbose=0)
print("Accuracy: ", score[1])

Accuracy:  0.85532


The trained model has an accuracy of 85.53%. Let's make some changes in our model architecture to improve the accuracy. It might be possible by adding one more hidden layer and dropout to reduce overfitting. Let's explore now.

In [9]:
# build the model architecture with one layer of length 100
model = Sequential()
model.add(Dense(512, activation='relu', input_dim=1000))
model.add(Dropout(0.5))
model.add(Dense(256, activation='relu')) # newly added layer 
model.add(Dropout(.3))                   # added dropout regularization of 0.3
model.add(Dense(num_classes, activation='softmax'))
model.summary()

# compile the model using categorical_crossentropy loss, and rmsprop optimizer.
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 512)               512512    
_________________________________________________________________
dropout_2 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 256)               131328    
_________________________________________________________________
dropout_3 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 2)                 514       
Total params: 644,354
Trainable params: 644,354
Non-trainable params: 0
_________________________________________________________________


In [10]:
# train the model
hist = model.fit(x_train, y_train,
          batch_size=32,
          epochs=10,
          validation_data=(x_test, y_test), 
          verbose=2)

Train on 25000 samples, validate on 25000 samples
Epoch 1/10
 - 29s - loss: 0.4207 - acc: 0.8124 - val_loss: 0.3674 - val_acc: 0.8474
Epoch 2/10
 - 26s - loss: 0.3571 - acc: 0.8575 - val_loss: 0.3489 - val_acc: 0.8595
Epoch 3/10
 - 26s - loss: 0.3342 - acc: 0.8680 - val_loss: 0.3371 - val_acc: 0.8624
Epoch 4/10
 - 27s - loss: 0.3219 - acc: 0.8742 - val_loss: 0.3659 - val_acc: 0.8626
Epoch 5/10
 - 29s - loss: 0.3154 - acc: 0.8789 - val_loss: 0.3689 - val_acc: 0.8623
Epoch 6/10
 - 29s - loss: 0.3092 - acc: 0.8860 - val_loss: 0.3564 - val_acc: 0.8591
Epoch 7/10
 - 30s - loss: 0.3009 - acc: 0.8921 - val_loss: 0.3561 - val_acc: 0.8608
Epoch 8/10
 - 28s - loss: 0.2891 - acc: 0.9012 - val_loss: 0.4220 - val_acc: 0.8618
Epoch 9/10
 - 29s - loss: 0.2762 - acc: 0.9055 - val_loss: 0.3971 - val_acc: 0.8592
Epoch 10/10
 - 30s - loss: 0.2587 - acc: 0.9150 - val_loss: 0.4278 - val_acc: 0.8574


In [11]:
# evaluate the model
score = model.evaluate(x_test, y_test, verbose=0)
print("Accuracy: ", score[1])

Accuracy:  0.85736


Although small, the new model with an extra hidden and a dropout layer shows the accuracy of 85.73% which is higher than the previous model.

Let's experiment with applying reduced dropout of 0.2 and 0.1 to the corresponding dropout layers.

In [14]:
# build the model architecture with one layer of length 100
model = Sequential()
model.add(Dense(512, activation='relu', input_dim=1000))
model.add(Dropout(0.2))                  # changed dropout to 0.2 from 0.5
model.add(Dense(256, activation='relu')) # newly added layer 
model.add(Dropout(0.1))                   # changed dropout to 0.1 from 0.3
model.add(Dense(num_classes, activation='softmax'))
model.summary()

# compile the model using categorical_crossentropy loss, and rmsprop optimizer.
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_9 (Dense)              (None, 512)               512512    
_________________________________________________________________
dropout_6 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_10 (Dense)             (None, 256)               131328    
_________________________________________________________________
dropout_7 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_11 (Dense)             (None, 2)                 514       
Total params: 644,354
Trainable params: 644,354
Non-trainable params: 0
_________________________________________________________________


In [15]:
# train the model
hist = model.fit(x_train, y_train,
          batch_size=32,
          epochs=10,
          validation_data=(x_test, y_test), 
          verbose=2)

Train on 25000 samples, validate on 25000 samples
Epoch 1/10
 - 28s - loss: 0.3951 - acc: 0.8311 - val_loss: 0.4281 - val_acc: 0.8294
Epoch 2/10
 - 26s - loss: 0.3339 - acc: 0.8664 - val_loss: 0.3739 - val_acc: 0.8537
Epoch 3/10
 - 29s - loss: 0.3001 - acc: 0.8839 - val_loss: 0.3587 - val_acc: 0.8608
Epoch 4/10
 - 29s - loss: 0.2608 - acc: 0.9058 - val_loss: 0.4144 - val_acc: 0.8500
Epoch 5/10
 - 29s - loss: 0.2155 - acc: 0.9285 - val_loss: 0.4353 - val_acc: 0.8513
Epoch 6/10
 - 27s - loss: 0.1708 - acc: 0.9478 - val_loss: 0.5275 - val_acc: 0.8510
Epoch 7/10
 - 27s - loss: 0.1335 - acc: 0.9652 - val_loss: 0.6446 - val_acc: 0.8369
Epoch 8/10
 - 28s - loss: 0.1184 - acc: 0.9730 - val_loss: 0.8396 - val_acc: 0.8350
Epoch 9/10
 - 32s - loss: 0.1054 - acc: 0.9782 - val_loss: 0.7660 - val_acc: 0.8441
Epoch 10/10
 - 29s - loss: 0.0916 - acc: 0.9817 - val_loss: 0.9427 - val_acc: 0.8448


In [16]:
# evaluate the model
score = model.evaluate(x_test, y_test, verbose=0)
print("Accuracy: ", score[1])

Accuracy:  0.8448


It can be observed that the acuracy takes a hit, being reduced to 84.48% from 85.73%, when the dropout values are changed from 0.5 to 0.2 for the first dropout layer and from 0.3 to 0.1 for the second dropout layer. For the chosen network configuration, reducing the dropout rate in the hidden layers did not lift performance. In fact, accuracy was worse than the baseline.