In [5]:
# note: using Tensorflow version 1.15
import keras
from keras.datasets import mnist
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
import datetime
import numpy as np

In [6]:
# initialize variables
epochs = 10
batch_size = 50

In [7]:
# input image dimensions
img_rows, img_cols = 28, 28

# Import data
# Gives 60000 training images, 10000 test images
# https://keras.io/datasets/#mnist-database-of-handwritten-digits
(x_train, y_train), (x_test, y_test) = mnist.load_data()


print(y_train[0])

# Split train into 80% training, 20% validation
# https://medium.com/@mjbhobe/mnist-digits-classification-with-keras-ed6c2374bd0e
val_percent = 0.2
val_count = int(val_percent * x_train.shape[0])

(x_val, y_val) = (x_train[:val_count], y_train[:val_count])
(x_train, y_train) = (x_train[val_count:], y_train[val_count:])

# Reshape the data
x_train = x_train.reshape(48000,img_rows, img_cols,1)
x_val = x_val.reshape(val_count,img_rows, img_cols,1)
x_test = x_test.reshape(10000,img_rows, img_cols,1)

print(x_train[0].dtype)
print(x_train.shape, 'train samples')
print(x_val.shape[0], 'validation samples')
print(x_test.shape[0], 'test samples')

5
uint8
(48000, 28, 28, 1) train samples
12000 validation samples
10000 test samples


In [8]:
# one-hot encode the labels - we have 10 output classes (0,1,2,...,9)
num_classes = 10

y_train = keras.utils.np_utils.to_categorical(y_train, num_classes)
y_val = keras.utils.np_utils.to_categorical(y_val, num_classes)
y_test = keras.utils.np_utils.to_categorical(y_test, num_classes)
# print(y_train[0:5]) 

[[0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]


# Model 1

In [9]:
# Build the Convolutional Neural Network
# https://medium.com/@mjbhobe/mnist-digits-classification-with-keras-ed6c2374bd0e\

def build_model_1():
    model = Sequential()
    #Convolutions
    # use 32 5x5 filters
    model.add(Conv2D(32, kernel_size=(5, 5), activation='relu', input_shape=(img_rows,img_cols,1)))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    # use 64 5x5 filters
    model.add(Conv2D(64, (5, 5), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    #Fully Connected Layer
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))

    #Output layer
    model.add(Dense(num_classes, activation='softmax'))

    #Adam optimizer, crossentropy as loss function
    model.compile(loss=keras.losses.categorical_crossentropy, optimizer='adam',metrics=['accuracy'])
    return model

# Kept getting error messages without this line
import keras.backend as K
K.clear_session()

model_1 = build_model_1()  

# Make Tensorboard logs
# https://www.tensorflow.org/tensorboard/get_started
log_dir="logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

#Save best models
model_checkpoint = keras.callbacks.ModelCheckpoint('best_mnist_model.hdf5', monitor='val_loss', save_best_only=True, period=1)

results_1 = model_1.fit(x_train, y_train, 
                        batch_size=batch_size, 
                        epochs=epochs, 
                        verbose=1, 
                        validation_data=(x_val, y_val),
                        callbacks=[tensorboard_callback, model_checkpoint])


score = model_1.evaluate(x_test, y_test, verbose=0)




2022-01-23 21:04:23.268838: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [13]:
print('Training accuracy = ', results_1.history['accuracy'][epochs-1])
print('Validation accuracy = ', results_1.history['val_accuracy'][epochs-1])
print('Testing accuracy = ', score[1])

Training accuracy =  0.9860000014305115
Validation accuracy =  0.9855833053588867
Testing accuracy =  0.988099992275238


Minimizing the cross-entropy loss using the Adam optimizer, we find the weights in the Network using the training data, and check hyperparameters on the validation data. Since 10 EPOCH's was enough to get the accuracies to converge, we stopped the training. In the last EPOCH we get:
\begin{align*}
    \text{Training Accuracy} &= 98.6\% \\
    \text{Validation Accuracy} &= 98.5\% \\
    \text{Testing Accuracy} &= 98.8\%
\end{align*}

# Model 2

In [63]:
# Build the network
# https://medium.com/@mjbhobe/mnist-digits-classification-with-keras-ed6c2374bd0e
# https://medium.com/datadriveninvestor/image-processing-for-mnist-using-keras-f9a1021f6ef0

def build_model_2():
    model = Sequential()
    #Convolutions
    # use 32 5x5 filters
    model.add(Conv2D(32, kernel_size=(5, 5), activation='relu', padding='same', input_shape=(img_rows,img_cols,1)))
    BatchNormalization(axis=-1)
    model.add(MaxPooling2D(pool_size=(2, 2)))
    # use 64 5x5 filters
    model.add(Conv2D(64, (5, 5), activation='relu', padding='same'))
    BatchNormalization(axis=-1)
    model.add(MaxPooling2D(pool_size=(2, 2)))
    # use 32 3x3 filters
    model.add(Conv2D(32, (3, 3), activation='relu', padding='same'))
    BatchNormalization(axis=-1)
    model.add(MaxPooling2D(pool_size=(2, 2)))
    # use 32 3x3 filters
    model.add(Conv2D(32, (3, 3), activation='relu', padding='same'))
    BatchNormalization(axis=-1)
    model.add(MaxPooling2D(pool_size=(2, 2)))

    #Fully connected layer
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))

    #Output layer
    model.add(Dense(num_classes, activation='softmax'))

    # Adam optimizer, crossentropy loss
    model.compile(loss=keras.losses.categorical_crossentropy, optimizer='adam',metrics=['accuracy'])
    return model


K.clear_session()

model_2 = build_model_2()


#Make tensorboard logs
log_dir="logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)


#Save best models
model_checkpoint = keras.callbacks.ModelCheckpoint('best_mnist_model_2.hdf5', monitor='val_loss', save_best_only=True, period=1)


results_2 = model_2.fit(x_train, y_train, 
                        batch_size=batch_size, 
                        epochs=epochs,
                        verbose=1,
                        validation_data=(x_val, y_val), 
                        callbacks=[tensorboard_callback, model_checkpoint])


score = model_2.evaluate(x_test, y_test, verbose=0)


Train on 48000 samples, validate on 12000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [64]:
print('Training accuracy = ', results_2.history['acc'][epochs-1])
print('Validation accuracy = ', results_2.history['val_acc'][epochs-1])
print('Testing accuracy = ', score[1])
# Note these are the accuracies in the final EPOCH

Training accuracy =  0.9877500037352244
Validation accuracy =  0.9832500040531158
Testing accuracy =  0.9882


We find on the last EPOCH:
\begin{align*}
    \text{Training Accuracy} &= 98.8\% \\
    \text{Validation Accuracy} &= 98.3\% \\
    \text{Testing Accuracy} &= 98.8\%
\end{align*}
These results show our Network will be extremely accurate, and fairly similar to the results of Model 1.

We'll now use Tensorboard to plot the training and validation accuracies with respect to the number of epochs. 

In [14]:
# https://www.tensorflow.org/tensorboard/tensorboard_in_notebooks
# https://theffork.com/how-to-use-tensorboard-in-jupyter-notebook/

# Run the following in command line (without #):
# pip install jupyter-tensorboard
# Load TENSORBOARD
%load_ext tensorboard
# Start TENSORBOARD
%tensorboard --logdir=./logs/

# Alternate method
# On a command line, run: tensorboard --logdir=ENTERLOGFOLDERPATH --host localhost --port 6066
# Then open http://localhost:6066 in your browser

Using Tensorboard we are able to generate Figures loss2.png and val_loss2.png to represent the accuracies and losses throughout training. 
Comparing them side-by-side we can see the training loss for Model 2 was consistently lower than Model 1. However, we also see the validation loss for Model 2 was worse than Model 1.

Comparing the models, Model 1 starts off with a much higher training loss than validation loss, meaning we were underfitting the data. However, around the 5th EPOCH they become equal, and then the training loss is less than validation. 
Model 2 had the training loss and validation loss being fairly close, indicating that our data is being more properly fit. Around the 4th EPOCH our losses are equal, and afterwards the training loss is lower than validation loss. 
To avoid the risk of overfitting in both models we should stop training around when the losses are equal, or the validation starts increasing. 