In [25]:
from theano.sandbox import cuda
from __future__ import print_function
import numpy as np
np.random.seed(1337)

In [9]:
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Activation, Embedding
from keras.layers import LSTM
from keras.datasets import imdb

In [10]:
max_features = 20000
maxlen = 80
batch_size = 32

In [11]:
print('Loading Data...')
(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features)
print(len(X_train), 'train sequences')
print(len(X_test), 'test sequences')

Loading Data...
25000 train sequences
25000 test sequences


In [28]:
print(type(max_features))
# print(max_feautres[0])

<type 'int'>


In [18]:
print('Padding sequences (samples x time)')
# X_train(number of sequences) = 25000
# maxlen(Maximum length of each sequence) = 80
# sequence.pad_sequences() returns a 2d numpy array of shape (nb_samples, nb_timesteps)
X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
print('X_train shape:', X_train.shape)
print('X_test shape:', X_test.shape)

Padding sequences (samples x time)
X_train shape: (25000, 80)
X_test shape: (25000, 80)


In [20]:
print('Build model...')
model = Sequential()
# The embedding layer can only be used as the first layer in a model
# The embedding layer turns indices into dense vectors of a fixed size
# In this case, the embedding layer turns each character in the 20000 long max_features array into 128 dimensional vectors
model.add(Embedding(max_features, 128, dropout=0.2))
# LSTM output dimension = 128
# Dropout with respect to the input of the layer
# Dropout with respect to the output of the layer.
model.add(LSTM(128, dropout_W=0.2, dropout_U=0.2))
# Dense refers tot the fully connected layer and it has an output of 1 dimension.
model.add(Dense(1))
# A sigmoid activation is acted upon the Dense layer output above.
model.add(Activation('sigmoid'))

Build model...


In [30]:
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
embedding_2 (Embedding)          (None, None, 128)     2560000     embedding_input_2[0][0]          
____________________________________________________________________________________________________
lstm_1 (LSTM)                    (None, 128)           131584      embedding_2[0][0]                
____________________________________________________________________________________________________
dense_3 (Dense)                  (None, 1)             129         lstm_1[0][0]                     
____________________________________________________________________________________________________
activation_3 (Activation)        (None, 1)             0           dense_3[0][0]                    
Total params: 2691713
_____________________________________________________________________

In [22]:
# Compile receives 3 arguments loss, optimizer and metrics
# Loss function is the function that the model tries to minimize.
# The optimizer is generally a string optimizer.
# Metric is a way to see the effectiveness of a model.
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [26]:
print('Training Model...')
# "fit" trains the model for a certain number of epochs.
# parameters X_train = input data
# y_train = output data
# batch_size = number of samples per gradient update
# nb_epoch = number of epochs to train the model
# Validation data is the data on which the model is validated, in this case the test data
model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=10, validation_data=(X_test, y_test))

# Evaluate calculates the loss on some input data batch by batch.
score, acc = model.evaluate(X_test, y_test, batch_size = batch_size)

print('Test Score:', score)
print('Test Accuracy:', acc)

Training Model...
Train on 25000 samples, validate on 25000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Score: 0.660858399563
Test Accuracy: 0.80808
