In [3]:
"""
keras_mnist_cnn_val.py

Demonstrating convolution neural networks
using Keras with a TensorFlow backend. Keras
is a high level machine learning package
which supports convolution, recurrent, and
standard neural networks, as well as allowing
you to define your own layer.
"""
import numpy as np

import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from keras.optimizers import SGD
from keras.callbacks import EarlyStopping
from tensorflow.examples.tutorials.mnist import input_data

import matplotlib.pyplot as plt

batch_size = 128
num_classes = 10
epochs = 10

train_ex = 1000
img_h, img_w = 28, 28

def get_data():
    """
    Loads the data in, choose the number of training
    examples we want, and reshape the x data to the
    correct shape (28x28x1).
    """
    mnist = input_data.read_data_sets('MNIST_data/', one_hot=True)

    if train_ex is not None:
        assert mnist.train.images.shape[0] >= train_ex, \
            'Cannot train with more examples than you have'
        x_train = mnist.train.images[:train_ex]
        y_train = mnist.train.labels[:train_ex]

    x_tune = mnist.validation.images
    x_test = mnist.test.images

    x_train = x_train.reshape(x_train.shape[0], img_h, img_w, 1)
    x_tune = x_tune.reshape(x_tune.shape[0], img_h, img_w, 1)
    x_test = x_test.reshape(x_test.shape[0], img_h, img_w, 1)

    y_tune = mnist.validation.labels
    y_test = mnist.test.labels

    return x_train, y_train, x_tune, y_tune, x_test, y_test


def convolution():
    """
    Keras follows the layers principle, where each layer
    is independent and can be stacked and merged together.
    The Sequential model assumes that there is one long
    stack, with no branching.
    """
    x_train, y_train, x_tune, y_tune, x_test, y_test = get_data()

    model = Sequential()

    """
    filters gives us the number of filters in the layer,the
    more filters we have, the more information we can learn

    kernel_size is the size of the convolution filter

    activation is the activation function on each node,
    we use relu, could also use sigmoid

    input_shape is the shape of the image. We reshaped
    the data above to get it in the right shape. The 1
    represents a grayscale image. If you had a colour
    image (RGB), the last dimension would be 3.
    """
    model.add(Conv2D(filters=32, kernel_size=(3, 3),
                     activation='relu',
                     input_shape=(img_h, img_w, 1)))

    """
    MaxPooling takes an NxM rectangle and find the maxiumum
    value in that square, and discards the rest. Since we are
    doing 2x2 pooling, it has the effect of halving the height
    and width of the image.
    """
    model.add(MaxPooling2D(pool_size=(2, 2)))

    # Sets a random 25% of nodes to 0 to prevent overfitting
    model.add(Dropout(0.25))

    # Note we don't need to give the shape between the first and
    # second layer, Keras figures that out for us.
    model.add(Conv2D(32, kernel_size=(2, 2),
        activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    # Before we had 6x6x32, now we have a flat 1152
    model.add(Flatten())

    # your standard fully connected NN layer
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))

    # Stochastic Gradient Descent
    sgd = SGD(lr=0.01, momentum=0.9)
    es = EarlyStopping(monitor='val_loss',
                       patience=5,  # epochs to wait after min loss
                       min_delta=0.0001)  # anything less than this counts as no change

    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=sgd,
                  metrics=['accuracy'])

    model.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              verbose=1,
              validation_data=(x_tune, y_tune),
              callbacks=[es])

    score = model.evaluate(x_test, y_test)
    print('Test loss: {0}'.format(score[0]))
    print('Test accuracy: {0}'.format(score[1]))


    plt.figure('Predictions on MNIST', facecolor='gray')
    plt.set_cmap('gray')

    predictions = model.predict(x_test, verbose=0)

    for i in range(5):
        subplt = plt.subplot(int(i / 5) + 1, 5, i + 1)
        # no sense in showing labels if they don't match the letter
        hot_index = np.argmax(predictions[i])
        subplt.set_title('Prediction: {0}'.format(hot_index))
        subplt.axis('off')
        letter = x_test[i]
        subplt.matshow(np.reshape(letter, [img_h, img_w]))
        plt.draw()

if __name__ == '__main__':
    convolution()
    plt.show()


Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Train on 1000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10

KeyboardInterrupt: ignored