In [3]:
import numpy
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.utils import np_utils
from keras import backend as K
K.set_image_dim_ordering('th')

%pylab inline

Using Theano backend.


Populating the interactive namespace from numpy and matplotlib


In [4]:
seed = 9103
numpy.random.seed(seed)

In Keras, the layers used for two-dimensional convolutions expect pixel values with the dimensions [pixels][width][height].

In [5]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()
# reshape to be [samples][pixels][width][height]
X_train = X_train.reshape(X_train.shape[0], 1, 28, 28).astype('float32')
X_test = X_test.reshape(X_test.shape[0], 1, 28, 28).astype('float32')

In [6]:
# normalize inputs from 0-255 to 0-1
X_train = X_train / 255
X_test = X_test / 255
# one hot encode outputs
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)
num_classes = y_test.shape[1]

In [12]:
def cnn_model():
    model = Sequential()
    model.add(Convolution2D(
        32, 10, 10, 
        border_mode='valid',
        input_shape=(1, 28,28)))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(40, activation='relu'))

    model.add(Dense(num_classes, activation='softmax'))
    
    model.compile(
        loss='categorical_crossentropy',
        optimizer = 'adam',
        metrics=['accuracy'])
    
    return model

In [13]:
# build the model
model = cnn_model()
# Fit the model
model.fit(X_train, y_train, validation_data=(X_test, y_test), nb_epoch=10, batch_size=200, verbose=2)
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("CNN Error: %.2f%%" % (100-scores[1]*100))

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
39s - loss: 0.2950 - acc: 0.9134 - val_loss: 0.1001 - val_acc: 0.9687
Epoch 2/10
37s - loss: 0.0963 - acc: 0.9713 - val_loss: 0.0704 - val_acc: 0.9777
Epoch 3/10
34s - loss: 0.0695 - acc: 0.9784 - val_loss: 0.0581 - val_acc: 0.9814
Epoch 4/10
35s - loss: 0.0541 - acc: 0.9830 - val_loss: 0.0548 - val_acc: 0.9817
Epoch 5/10
35s - loss: 0.0432 - acc: 0.9863 - val_loss: 0.0561 - val_acc: 0.9823
Epoch 6/10
38s - loss: 0.0354 - acc: 0.9888 - val_loss: 0.0505 - val_acc: 0.9855
Epoch 7/10
34s - loss: 0.0295 - acc: 0.9905 - val_loss: 0.0521 - val_acc: 0.9844
Epoch 8/10
34s - loss: 0.0243 - acc: 0.9921 - val_loss: 0.0478 - val_acc: 0.9858
Epoch 9/10
34s - loss: 0.0225 - acc: 0.9924 - val_loss: 0.0581 - val_acc: 0.9828
Epoch 10/10
34s - loss: 0.0197 - acc: 0.9932 - val_loss: 0.0502 - val_acc: 0.9846
CNN Error: 1.54%


In [14]:
def cnn_model_jh():
    model = Sequential()
    #First convolutional filter
    model.add(Convolution2D(
        32, 10, 10,
        border_mode='valid',
        input_shape=(1, 28,28)))
    model.add(MaxPooling2D(pool_size=(2,2)))
    
    model.add(Dropout(0.2))
    model.add(Flatten())
    
    model.add(Dense(128, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))

    model.compile(
        loss='categorical_crossentropy',
        optimizer = 'adam',
        metrics=['accuracy'])

    return model

In [15]:
# build the model
model_jh = cnn_model_jh()

# Fit the model
model_jh.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    nb_epoch=16, batch_size=50, verbose=2)

# Final evaluation of the model
scores = model_jh.evaluate(X_test, y_test, verbose=0)
print("Improved CNN Error: %.2f%%" % (100-scores[1]*100))

Train on 60000 samples, validate on 10000 samples
Epoch 1/16
45s - loss: 0.1824 - acc: 0.9455 - val_loss: 0.0745 - val_acc: 0.9771
Epoch 2/16
66s - loss: 0.0760 - acc: 0.9767 - val_loss: 0.0596 - val_acc: 0.9821
Epoch 3/16
60s - loss: 0.0545 - acc: 0.9830 - val_loss: 0.0600 - val_acc: 0.9823
Epoch 4/16
60s - loss: 0.0435 - acc: 0.9865 - val_loss: 0.0473 - val_acc: 0.9864
Epoch 5/16
60s - loss: 0.0340 - acc: 0.9894 - val_loss: 0.0550 - val_acc: 0.9844
Epoch 6/16
59s - loss: 0.0296 - acc: 0.9902 - val_loss: 0.0556 - val_acc: 0.9850
Epoch 7/16
60s - loss: 0.0269 - acc: 0.9915 - val_loss: 0.0728 - val_acc: 0.9818
Epoch 8/16
60s - loss: 0.0257 - acc: 0.9918 - val_loss: 0.0706 - val_acc: 0.9826
Epoch 9/16
59s - loss: 0.0227 - acc: 0.9926 - val_loss: 0.0727 - val_acc: 0.9843
Epoch 10/16
61s - loss: 0.0194 - acc: 0.9939 - val_loss: 0.0612 - val_acc: 0.9862
Epoch 11/16
65s - loss: 0.0197 - acc: 0.9936 - val_loss: 0.0857 - val_acc: 0.9824
Epoch 12/16
63s - loss: 0.0187 - acc: 0.9940 - val_loss: 

16 epochs, 200 batch size - 1.39%  
10 epochs, 10 x 10 size - 1.38%  
10 epochs, 20 x 20 size - 1.85%  
two conv layers, 10 x 10 and 3 x 3 - 2.16%
two conv layers, 10 x 10 and 8 x 8 - 1.79%
10 x 10, Dropout = 0.8