# MNIST Data

1. Imports

In [1]:
import keras # overall package API for deep learning

# for data loading/prep
from keras.datasets import mnist
from keras.utils import np_utils

# for training
from keras.models import Sequential # for building sequential, not recurrent, networks
from keras.layers import Conv2D, MaxPooling2D, Activation, Flatten, Dense # for the components of a lenet5-like network

2. Try for reproducibility

In [2]:
!PYTHONHASHSEED=0

In [3]:
import numpy as np
import tensorflow as tf
import random as python_random

SEED = 12345

np.random.seed(SEED)
python_random.seed(SEED)
tf.random.set_seed(SEED)

3. Load and format MNIST data

In [4]:
# load data
(x_train, y_train), (x_valid, y_valid) = mnist.load_data()

# normalize data 
x_train = x_train/255
x_valid = x_valid/255

# ensure y's are treated as categories
y_train = np_utils.to_categorical(y_train)
y_valid = np_utils.to_categorical(y_valid)
num_classes = y_train.shape[1]

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


4. Examine data

In [5]:
print(np.array2string(x_train[0], max_line_width=150, precision=2)) # pixel intensities for first training image
print()
print(y_train[0]) # label for first training image

[[0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.  ]
 [0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.  ]
 [0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.  ]
 [0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.  ]
 [0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.  ]
 [0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.01 0.07 0.07 0.07 0.49 0.53 0.69 0.1  0.65 1.   0.97 0.5  0.   0.   0.   0.  ]
 [0.   0.   0.   0.   0.   0.   0.   0.   0.12 0.14 0.37 0.6  0.67 0.99 0.99 0.99 0.99 0.99 0.88 0.67 0.99 0.95 0.76 0.25 0.   0.   0.   0.  ]

# Build Model

5. Initialize a sequential Keras model

In [6]:
model = Sequential()

6. Define a smaller version of Lenet-5 that uses the standard 28x28 MNIST images

In [7]:
from keras.backend import tanh
from keras.activations import sigmoid
# C1
# 1 image in, 6 feature maps out (1 image represented *in each of* the 6 feature maps)
# 28 x 28 x 1 inputs
# 1 image goes into 6 filters with (1 x 2 x 2 + 1) = 5 weights for 30 total weights in C1 layer
# resulting in ((28 - 2 + 0)/1) + 1 = 27 x 27 x 6 output volume
model.add(Conv2D(filters=6, kernel_size=(2,2), input_shape=(28,28,1)))

# S2
# 6 feature maps in, 6 activation maps out
# 27 x 27 x 6 input volume 
# goes into 0 weights in S2 layer
# pooling implies kernel size 4, stride 2 and padding of 0.5 b/c of odd input size
# resulting in a ((27 - 4 + 1)/2) + 1 = 13 x 13 x 6 output volume
model.add(MaxPooling2D(pool_size=2)) 
model.add(Activation('sigmoid')) # REQUIRES STUDENT INPUT

# C3
# 6 activation maps in, 16 feature maps out (6 activation maps represented *in each of* the 16 feature maps)
# 13 x 13 x 6 input volume
# 6 activation maps go into 16 filters with (6 x 5 x 5 + 1) = 151 weights for a total of 2416 weights in C3 layer
# resulting in ((13 - 5 + 0)/1) + 1 = 9 x 9 x 16 output volume
model.add(Conv2D(filters=16, kernel_size=(5,5)))

# S4
# 16 feature maps in, 16 activation maps out
# 9 x 9 x 16 input volume
# goes into 0 weights in S2 layer
# pooling implies kernel size 4, stride 2 and padding of 0.5 b/c of odd input size
# resulting in (9 - 4 + 1)/2) + 1 = 4 x 4 x 16 output volume
model.add(MaxPooling2D(pool_size=2))
model.add(Activation('sigmoid')) # REQUIRES STUDENT INPUT

# C5
# 16 activation maps in, 120 feature maps out (16 activation maps represented *in each of* the 120 feature maps)
# 4 x 4 16 input volume
# 16 activation maps go into 120 filters with (16 x 4 x 4 + 1) = 257 weights for a total of 30,840 weights in C5 layer
# resulting in a  (4 - 4 + 0)/1) + 1 = 1 x 1 x 120 output volume 
model.add(Conv2D(filters=120, kernel_size=(4,4))) # REQUIRES STUDENT INPUT

# F6
# 120 input elements are flattened and connected to 84 weights with biases
# resulting in a 120 x 84 + 84 = 10,164 weights in F6 and a 84 x 1 x 1 output volume
model.add(Flatten())
model.add(Dense(84)) # REQUIRES STUDENT INPUT
model.add(Activation('tanh')) # REQUIRES STUDENT INPUT

# Output
# each of 84 units in F6 feed into 10 softmax output units
# there are 84 x 10 + 10 = 850 weights in the output layer resulting in a 10 x 1 x 1 output volume
# output unit with highest output "probability" is the prediction for the image
model.add(Dense(10)) # REQUIRES STUDENT INPUT
model.add(Activation('softmax'))

7. View a summary of the model architecture

In [8]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 27, 27, 6)         30        
                                                                 
 max_pooling2d (MaxPooling2D  (None, 13, 13, 6)        0         
 )                                                               
                                                                 
 activation (Activation)     (None, 13, 13, 6)         0         
                                                                 
 conv2d_1 (Conv2D)           (None, 9, 9, 16)          2416      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 4, 4, 16)         0         
 2D)                                                             
                                                                 
 activation_1 (Activation)   (None, 4, 4, 16)          0

8. Precompile model for faster training

In [9]:
model.compile(loss="categorical_crossentropy", optimizer="sgd", metrics=["accuracy"])

9. Train model

In [10]:
# restart and run notebook for reproducible results
# running this cell multiple times will result in irreproducible results
_ = model.fit(x_train, y_train, epochs=50, batch_size=128)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


10. Evaluate model performance on validation data

In [11]:
model.evaluate(x_valid, y_valid, batch_size=128)



[0.09812403470277786, 0.9702000021934509]