# MNIST digit classification using Convolutional Neural Networks

In [9]:
import keras
from keras.datasets import mnist

## Load the training and testing dataset

In [2]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [3]:
print("x_train.shape = ", x_train.shape)
print("x_test.shape = ", x_test.shape)

x_train.shape =  (60000, 28, 28)
x_test.shape =  (10000, 28, 28)


In [10]:
num_training_samples = x_train.shape[0]
num_testing_samples = x_test.shape[0]

# dimensions of each image
img_height, img_width = 28, 28

# there are 10 digits in total from 0 to 9
num_classes = 10

#### Based on backend (theano or tensorflow), x_train and x_test has to be reshaped accordingly

In [7]:
from keras import backend as K

# since the images that we are using are in grayscale format, there is only 1 channel in the image
# In a RGB image, num_channels would be 3 since there are 3 color channels- R, G and B
num_channels = 1

# if backend is tensorflow the we have to specify the image channels at the end
if K.image_data_format() == 'channels_last':
    x_train = x_train.reshape(num_training_samples, img_height, img_width, num_channels).astype('float32')
    x_test = x_test.reshape(num_testing_samples, img_height, img_width, num_channels).astype('float32')
    input_shape = (img_height, img_width, num_channels)
else: # if the backend is theano then image_data_format is 'channels_first'
    x_train = x_train.reshape(num_training_samples, num_channels, img_height, img_width).astype('float32')
    x_test = x_test.reshape(num_testing_samples, num_channels, img_height, img_width).astype('float32')
    input_shape = (num_channels, img_height, img_width)

## Normalize the pixel values

In [8]:
# the max value of a pixel is 255. We divide every pixel in all training samples
# by 255 so that the pixel values now are between 0 and 1
x_train /= 255
x_test /= 255

## Convert labels (y_train, y_test) to one-hot encoded vectors

In [11]:
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

## Prepare the model

In [13]:
from keras.models import Sequential
from keras.layers import Dense, Convolution2D, Flatten, MaxPooling2D

In [14]:
model = Sequential()
model.add(Convolution2D(
        filters=32, 
        kernel_size=(3,3),
        # apply filter to only full parts of image i.e. do not go over the border
        padding='valid',
        activation='relu', 
        input_shape=input_shape)
         )
model.add(Convolution2D(filters=64, kernel_size=(3,3), activation='relu'))
# apply max pooling to summarize the features extracted so far
model.add(MaxPooling2D(pool_size=(2,2)))

# flatten the data for 1D layers
model.add(Flatten())

# dense layers
model.add(Dense(128, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 26, 26, 32)        320       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 24, 24, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 12, 12, 64)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 9216)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 128)               1179776   
_________________________________________________________________
dense_2 (Dense)              (None, 128)               16512     
_________________________________________________________________
dense_3 (Dense)              (None, 10)                1290      
Total para

In [15]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

## Train the model

In [16]:
model.fit(x_train, y_train, batch_size=256, epochs=5, validation_split=0.2)

Train on 48000 samples, validate on 12000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x1e14cd74cf8>

## Evaluate model on testing set

In [18]:
score = model.evaluate(x_test, y_test)
print()
print("Test loss = ", score[0])
print("Test accuracy = ", score[1])

Test loss =  0.0367503705916
Test accuracy =  0.988
