In [1]:
# code for loading the format for the notebook
import os

# path : store the current path to convert back to it later
path = os.getcwd()
os.chdir('../notebook_format')
from formats import load_style
load_style()

In [2]:
os.chdir(path)
import numpy as np
from keras.datasets import mnist
from keras.utils import np_utils
from keras.models import Sequential 
from keras.optimizers import SGD, RMSprop
from keras.layers.core import Dense, Dropout, Activation

Using Theano backend.


# Keras Basics

Basic Keras API to build a simple multi-layer neural network.

In [3]:
n_classes = 10
n_features = 784

# load the dataset and some preprocessing step that can be skipped
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape( 60000, n_features )
X_test = X_test.reshape( 10000, n_features )
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255

print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

60000 train samples
10000 test samples


In [4]:
# convert class vectors to binary class matrices (one-hot encoding)
# note: you HAVE to to this step
Y_train = np_utils.to_categorical( y_train, n_classes )
Y_test  = np_utils.to_categorical( y_test , n_classes )

Basics of trainining a model:

you use `Sequential` to build models in keras, which basically represents we're going to later use the `.add()` method to stack layers together in sequence to build up our network.

- You start with `Dense` (fully-connected layers), where you specify how many nodes you wish to have for the layer. Since the first layer that we're going to add is the input layer, we have to make sure that the `input_dim` parameter matches the number of features (columns) in the training set. Then after the first layer, you don't need to specify the size of the input anymore.
- Then you specify the `Activation` function for that layer, and add a `Dropout` layer if you wish.
- For the last `Dense` and `Activation` layer you need to specify the number of class as the output and softmax to tell it to output the predicted class's probability.

In [5]:
# train a model:
model = Sequential()
model.add(Dense( 512, input_dim = n_features ))
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(n_classes))
model.add(Activation('softmax'))

In [6]:
# you can check the summary to check the number of parameters
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
dense_1 (Dense)                  (None, 512)           401920      dense_input_1[0][0]              
____________________________________________________________________________________________________
activation_1 (Activation)        (None, 512)           0           dense_1[0][0]                    
____________________________________________________________________________________________________
dropout_1 (Dropout)              (None, 512)           0           activation_1[0][0]               
____________________________________________________________________________________________________
dense_2 (Dense)                  (None, 512)           262656      dropout_1[0][0]                  
___________________________________________________________________________________________

Once your model looks good, configure its learning process with `.compile()`, where you need to specify which `optimizer` to use, and the `loss` function ( `categorical_crossentropy` is the typical one for multi-class classification) and the `metrics` to track. 

Finally, `.fit()` the model by passing in the training, validation set, the number of epochs and batch size. For the batch size, you typically specify this number to be power of 2 for computing efficiency (computers works faster for numbers that are powers of 2?)

In [7]:
model.compile( loss = 'categorical_crossentropy', optimizer = RMSprop(), metrics = ['accuracy'] )

n_epoch = 10
batch_size = 128 
history = model.fit(
    X_train, 
    Y_train,
    batch_size = batch_size, 
    nb_epoch = n_epoch,
    verbose = 1, # set it to 0 if you do not want to have progess bars
    validation_data = ( X_test, Y_test )
)

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [8]:
# history attribute stores the training and validation score and loss
history.history

{'acc': [0.92478333330154416,
  0.96976666663487754,
  0.97771666663487755,
  0.98285,
  0.98548333330154414,
  0.98580000003178914,
  0.98874999996821089,
  0.98983333333333334,
  0.99070000000000003,
  0.99243333333333328],
 'loss': [0.24438897920648256,
  0.10114821036259333,
  0.074991628624002132,
  0.059861840716004373,
  0.051351515497515599,
  0.045534527353631955,
  0.038285859880285961,
  0.034420892811256147,
  0.03123444435711329,
  0.027620144065003842],
 'val_acc': [0.96860000000000002,
  0.9738,
  0.97819999999999996,
  0.98260000000000003,
  0.98250000000000004,
  0.98170000000000002,
  0.98180000000000001,
  0.98040000000000005,
  0.98309999999999997,
  0.98119999999999996],
 'val_loss': [0.10000225012749434,
  0.086733379594981674,
  0.076576751895993944,
  0.071907446014927698,
  0.072575151316914707,
  0.07798802139485779,
  0.080525846969275158,
  0.087841106514598866,
  0.081288466346391947,
  0.09558340725901944]}

In [9]:
# .evaluate gives the loss and metric evaluation score (acc) for the dataset,
# here the result matches the validation set's history above
score = model.evaluate( X_test, Y_test, verbose = 0 )
score

[0.095583408100687217, 0.98119999999999996]

In [10]:
# stores the weight of the model,
# it's a list, note that the length is 6 because we have 3 dense layer
# and each one has it's associated bias term
weights = model.get_weights()
print( len(weights) )

# W1 should have 784, 512 for the 784
# feauture column and the 512 the number 
# of dense nodes that we've specified
W1, b1, W2, b2, W3, b3 = weights
print(W1.shape)
print(b1.shape)

6
(784, 512)
(512,)


In [11]:
# predict the accuracy
y_pred = model.predict_classes( X_test, verbose = 0 )
accuracy = np.sum( y_test == y_pred ) / X_test.shape[0]
print( 'valid accuracy: %.2f%%' % ( accuracy * 100 ) )

valid accuracy: 98.12%


## Saving and loading the models

It is not recommended to use pickle or cPickle to save a Keras model.

In [16]:
# 1. to_json saves the model's architecture
# 2. save_weights, save the weights learned by the model to a .h5 file
json_string = model.to_json()
open('my_model_architecture.json', 'w').write(json_string)
model.save_weights('my_model_weights.h5')

# reload the model, weights that corresponds to the same architecture
from keras.models import model_from_json
model1 = model_from_json( open('my_model_architecture.json').read() )
model1.load_weights('my_model_weights.h5')

In [19]:
# note that you still need to compile the model again,
# this time use another kind of optimizer, note that you can also
# pass in a string to it, e.g. 'sgd' and the default parameter will be
# used. Here the different optimizer doesn't affect anything, since
# we're only compiling it
sgd = SGD( lr = 0.001, decay = 1e-7, momentum = .9 )
model1.compile( loss = 'categorical_crossentropy', optimizer = sgd, metrics = ['accuracy'] )

In [20]:
# testing: predict the accuracy using the loaded model
y_pred = model1.predict_classes( X_test, verbose = 0 )
accuracy = np.sum( y_test == y_pred ) / X_test.shape[0]
print( 'valid accuracy: %.2f%%' % ( accuracy * 100 ) )

valid accuracy: 98.51%


## Reference

- [Keras Documentation](http://keras.io/) 
- [mnist_mlp example](https://github.com/fchollet/keras/blob/master/examples/mnist_mlp.py)
- [Saving Keras Model](http://keras.io/getting-started/faq/#how-can-i-save-a-keras-model)