In [39]:
import numpy 
from keras.datasets import imdb
from keras.preprocessing import sequence
from matplotlib import pyplot

top_words = 10000
max_words = 1000

(xTrain, yTrain) , (xTest, yTest) = imdb.load_data(nb_words = top_words)
X = numpy.concatenate((xTrain, xTest), axis = 0)
y = numpy.concatenate((yTrain, yTest), axis = 0)

xTrain = sequence.pad_sequences(xTrain, maxlen = max_words)
xTest = sequence.pad_sequences(xTest, maxlen = max_words)



In [40]:
X.shape

(50000,)

In [41]:
y.shape

(50000,)

In [42]:
numpy.unique(y)

array([0, 1])

In [43]:
print("Number of words: ",  len(numpy.unique(numpy.hstack(X))))

Number of words:  9998


In [0]:
# CNN for the IMDB problem
import numpy
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers.convolutional import Convolution1D
from keras.layers.convolutional import MaxPooling1D
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from keras.layers import Dropout
from keras.optimizers import SGD
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)

In [62]:

model = Sequential()
model.add(Embedding(top_words, 32, input_length=max_words))
model.add(Convolution1D(nb_filter=64, filter_length=3, border_mode= 'same' ,
activation= 'relu' ))
model.add(MaxPooling1D(pool_length=2))
model.add(Dropout(0.2))


model.add(Flatten())
model.add(Dense(250, activation= 'relu' ))
model.add(Dense(1, activation= 'sigmoid' ))

learning_rate = 0.1
epochs = 20
decay_rate = learning_rate /epochs
momentum = 0.8

sgd = SGD(lr = learning_rate, momentum = momentum, decay = decay_rate, nesterov = False)

model.compile(loss= 'binary_crossentropy' , optimizer= sgd , metrics=[ 'accuracy' ])
print(model.summary())

Model: "sequential_15"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_15 (Embedding)     (None, 1000, 32)          320000    
_________________________________________________________________
conv1d_21 (Conv1D)           (None, 1000, 64)          6208      
_________________________________________________________________
max_pooling1d_21 (MaxPooling (None, 500, 64)           0         
_________________________________________________________________
dropout_23 (Dropout)         (None, 500, 64)           0         
_________________________________________________________________
flatten_14 (Flatten)         (None, 32000)             0         
_________________________________________________________________
dense_27 (Dense)             (None, 250)               8000250   
_________________________________________________________________
dense_28 (Dense)             (None, 1)               

  """
  


In [63]:
from keras.callbacks import ModelCheckpoint

filepath = 'weights.best.hdf5'
checkpoint = ModelCheckpoint(filepath, monitor = 'val_acc', verbose = 1, save_best_only = True, mode = 'max')
callbacks_list = [checkpoint]

model.fit(xTrain , yTrain, validation_data = (xTest, yTest), nb_epoch = epochs, batch_size = 128, verbose = 0, callbacks = callbacks_list)

  import sys



Epoch 00001: val_acc improved from -inf to 0.50000, saving model to weights.best.hdf5

Epoch 00002: val_acc improved from 0.50000 to 0.53596, saving model to weights.best.hdf5

Epoch 00003: val_acc improved from 0.53596 to 0.61688, saving model to weights.best.hdf5

Epoch 00004: val_acc improved from 0.61688 to 0.67504, saving model to weights.best.hdf5

Epoch 00005: val_acc did not improve from 0.67504

Epoch 00006: val_acc improved from 0.67504 to 0.77596, saving model to weights.best.hdf5

Epoch 00007: val_acc did not improve from 0.77596

Epoch 00008: val_acc improved from 0.77596 to 0.83424, saving model to weights.best.hdf5

Epoch 00009: val_acc improved from 0.83424 to 0.85960, saving model to weights.best.hdf5

Epoch 00010: val_acc did not improve from 0.85960

Epoch 00011: val_acc did not improve from 0.85960

Epoch 00012: val_acc improved from 0.85960 to 0.87236, saving model to weights.best.hdf5

Epoch 00013: val_acc improved from 0.87236 to 0.87300, saving model to weights

<keras.callbacks.History at 0x7f73cfd25cf8>

In [64]:
# Final evaluation of the model
scores = model.evaluate(xTest, yTest, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

Accuracy: 87.63%
