### Import the libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [3]:
!pip install mnist

Collecting mnist
  Downloading https://files.pythonhosted.org/packages/c6/c4/5db3bfe009f8d71f1d532bbadbd0ec203764bba3a469e4703a889db8e5e0/mnist-0.2.2-py2.py3-none-any.whl
Installing collected packages: mnist
Successfully installed mnist-0.2.2


In [4]:
import keras
import mnist

### Load and prepare data 

In [5]:
train_images = mnist.train_images()
train_labels = mnist.train_labels()

In [6]:
print(train_images.shape)
print(train_labels.shape)

(60000, 28, 28)
(60000,)


In [7]:
import numpy as np
import mnist

train_images = mnist.train_images()
train_labels = mnist.train_labels()
test_images = mnist.test_images()
test_labels = mnist.test_labels()

In [8]:
# We’ll normalize the image pixel values from [0, 255] to [-0.5, 0.5] to make our network easier to train 
train_images = (train_images / 255) - 0.5
test_images = (test_images / 255) - 0.5

In [9]:
# We’ll also reshape each image from (28, 28) to (28, 28, 1) because Keras requires the third dimension
train_images = np.expand_dims(train_images, axis=3)
test_images = np.expand_dims(test_images, axis=3)

In [10]:
print(train_images.shape)
print(test_images.shape)

(60000, 28, 28, 1)
(10000, 28, 28, 1)


### Building the model

In [16]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten


In [17]:
num_filters = 8 # Number of filters to be used for convolution
filter_size = 3 # size of filter 3x3
pool_size = 2 # size of pooling matrix is 2x2

In [18]:
# Output softmax layer has 10 nodes, one for each digit
model = Sequential([
  Conv2D(num_filters, filter_size, input_shape=(28, 28, 1)),
  MaxPooling2D(pool_size=pool_size),
  Flatten(),
  Dense(10, activation='softmax'),
])

In [19]:
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])

In [20]:
from keras.utils import to_categorical

In [21]:
# Keras expects the training targets to be 10-dimensional vectors, since there are 10 nodes in our Softmax output layer
# Conveniently, Keras has a utility method that fixes this exact issue: to_categorical
# It turns our array of class integers into an array of one-hot vectors instead. For example, 2 would become [0, 0, 1, 0, 0, 0, 0, 0, 0, 0] (it’s zero-indexed)
model.fit(train_images, to_categorical(train_labels),epochs=3, validation_data=(test_images, to_categorical(test_labels)))

Train on 60000 samples, validate on 10000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.callbacks.History at 0x7fe4a0366ef0>

In [22]:
model.save_weights('cnn.h5')

In [23]:
# Predict on the first 5 test images.
predictions = model.predict(test_images[:5])

In [25]:
# The output of our network is 10 probabilities (because of softmax), so we’ll use np.argmax() to turn those into actual digits
print(np.argmax(predictions, axis=1))


[7 2 1 0 4]


In [26]:
# Check our predictions against the ground truths.
print(test_labels[:5])

[7 2 1 0 4]


In [27]:
# Few other experiments
# What happens if we add or remove Convolutional layers?
# What if we tried adding Dropout layers, which are commonly used to prevent overfitting?
# What if we add fully-connected layers between the Convolutional outputs and the final Softmax layer?
# What if we play with the Conv2D parameters? 