In [3]:
from keras.datasets import imdb

#loading the data
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=10000)

#preparing the data
#Encoding the integer sequences present in each review of dataset into a binary matrix
import numpy as np

#defining a function to binarise/vectorise the integer reviews
#function takes no of reviews(sequences) and 10000 element in each review as I/p matrix (???)
def vectorize_sequences(sequences, dimension=10000):
    results = np.zeros((len(sequences), dimension)) #creates an all zero matrix of same i/p matrix size
    for i, sequence in enumerate(sequences):        #for loop counts by enumerating over each review/sequences to check sequence/integer inside it
        results[i, sequence] = 1.                   #inside result matrix it sets specific indices corresponding to the integer/sequence inside i/sequences to 1
    return results                                  #all other indices in result will be zero => a binary matrix

x_train = vectorize_sequences(train_data)           #calling fn to binarise the train_data
x_test = vectorize_sequences(test_data)             #calling fn to binarise the test_data

y_train = np.asarray(train_labels).astype('float32')#vectorising train data/review labels
y_test = np.asarray(test_labels).astype('float32')  #vectorising test data/review labels

#setting aside validation data set for monitoring the accuracy of the model on data it has never seen before while the training is ongoing
x_val = x_train[:10000]                            #takes first 10000 sequences/reviews for validation
partial_x_train = x_train[10000:]                  #takes the remaining sequences/revies for training 

y_val = y_train[:10000]                            #takes first 10000 targets for validation
partial_y_train = y_train[10000:]                  #takes the remaining targets for training


In [15]:
#building the neural network/ defining the network layers
#we use 3 Dense type layer, first 2 with o/p size 16(no of neurons) having relu fn and the last one 
#with o/p size 1 having sigmoid fn giving a probability info btwn 0 and 1 i.e, -ve and +ve review probability
from keras import models
from keras import layers

model = models.Sequential()
model.add(layers.Dense(16, activation='relu', input_shape=(10000,))) #i/p shape is 10000 since we created the binary input with each review vector(results) having 10000 elements
model.add(layers.Dense(16, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

#introducing KerasCallbacks - earlystopping and modelceckpoint
import keras

callbacks_list = [                                              #describes a list of callbacks 
    keras.callbacks.EarlyStopping(                              #earlystopping stops   
        monitor='val_acc',                                          #when the metric(acc here) being observed
        patience=1),                                            #stops improving for more than 2 epochs i.e, 3
    keras.callbacks.ModelCheckpoint(                            #saving the model and its weights
        filepath='new_model.h5',                                 #in this filepath
        monitor='val_loss',                                     #will not update the saved model if the val_loss 
        save_best_only=True)                                    #is not improving
                                        
]

#configuring learning process with loss function-rmsprop, optimiser-bin crossentropy and metrics-accuracy
model.compile(optimizer='rmsprop',
             loss='binary_crossentropy',
             metrics=['acc'])

#network training- iterate on the traning data by calling the fit method of the model
#training is done with 20 epochs/iterations over all samples in partial x&y train datas and
# in mini-batches of 512 samples at the same time training is validated by checking
#loss and accuracy on 10000 samples set aside as validation data set
history = model.fit(partial_x_train,
                   partial_y_train,
                   epochs=20,
                   batch_size=512,
                   callbacks=callbacks_list, 
                   validation_data=(x_val, y_val))

Train on 15000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20


In [13]:
#we asked the model to run 20 epochs but as soon as the val_loss stops improving the training 
#stopped because of keras callback early stopping
#then the model with the best weights are stored in the my_model.h5 file
#now loading the saved model and evaluating it to see if it gives good results and its a cross check here to see if the model with best weights are saved

from keras.models import load_model

saved_model = load_model('new_model.h5')                          #loading the saved model
saved_model.summary()


Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_13 (Dense)             (None, 16)                160016    
_________________________________________________________________
dense_14 (Dense)             (None, 16)                272       
_________________________________________________________________
dense_15 (Dense)             (None, 1)                 17        
Total params: 160,305
Trainable params: 160,305
Non-trainable params: 0
_________________________________________________________________


In [14]:
test_loss, test_acc = model.evaluate(x_test, y_test)
print('test_loss: {} \n test_acc: {}'.format(test_loss,test_acc*100))

test_loss: 0.294358599811 
 test_acc: 88.1839990616
