<h2><center>MNIST classification using <i>LeNet5</i></center></h2>

In [None]:
# As a first step, we may want to switch to a GPU-acceperated VM
# In the menu: Runtime->Change runtime type->Hardware Accelerator->GPU.
#
# This will thest if we have a GPU-equipped VM and return some useful system-level information
#!nvidia-smi

# Which GNU/Linux distribution is installed on our VM ?
#!lsb_release -a

# Which version of the Linux kernel our VM has ?
#!uname -a

# How much free memory our VM has ?
#!free -h

# Which storage facilities our VM has ?
#!mount

# Which python version our VM has installed ?
#!python --version

# Importing Keras

In [None]:
# Importing the Keras main module and the tensorflow backend
import tensorflow as tf
import keras
print("Using tensorflow version " + str(tf.__version__))
print("Using keras version " + str(keras.__version__))

# Loading and preparing the MNIST dataset

In [None]:
#@title
# The MNSIT dataset is ready to be imported from Keras into RAM
# Warning: you cannot do that for larger databases (e.g., ImageNet)
from keras.datasets import mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
print("Shape of the train images is ", train_images.shape)
print("Shape of the train labels is ", train_labels.shape)
print("Shape of the test images is ", test_images.shape)
print("Shape of the test labels is ", test_labels.shape)

In [None]:
# Let us visualize the first training sample using the Gnuplot library
from matplotlib import pyplot as plt
imageIndex = 0
print("Label for " + str(imageIndex) + "-th train image is: " + str(train_labels[0]))
plt.imshow(train_images[0])

In [None]:
# The ground truth labels need to be converted to the one-hot encoding format via to_categorical
from keras.utils.np_utils import to_categorical
imageIndex = 0
#print("This is the native " + str(imageIndex) + "-th train label: " + str(train_labels[0]))
train_labels = to_categorical(train_labels)
#print("This is the one-hot encoding of the " + str(imageIndex) + "-th train label: " + str(train_labels[0]))
test_labels = to_categorical(test_labels)

In [None]:
# We need to reshape to proper images with 1 color channel according to the tensorflow backend NWHC scheme
img_rows, img_cols = train_images.shape[1], train_images.shape[2]
train_images = train_images.reshape(train_images.shape[0], img_rows, img_cols, 1)
test_images = test_images.reshape(test_images.shape[0], img_rows, img_cols,1 )
print('train_images shape:', train_images.shape)
print('test_images shape:', test_images.shape)

# Casting pixels from uint8 to float32 to allow normalization
train_images = train_images.astype('float32')
test_images = test_images.astype('float32')

# First we compute the mean pixel intensity and variance over the training set
train_mean = train_images.mean()
train_std = train_images.std()

# then we normalize the images over the train set staistics
train_images = (train_images - train_mean)/train_std
test_images = (test_images - train_mean)/train_std


# Defining the neural network architecture (i.e., the network model)
Create a LeNet5-like convolutional neural network taking in input the images as matrices of pixels and suitable to classify each image across 10 different classes.

In [None]:
# The Sequential module is sort of a container for more complex NN elements and
# defines a loop-less NN architecture
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D

input_shape = (img_rows, img_cols, 1)
output_shape = 10

# size of pooling area for max pooling
pool_size = (2, 2)
# convolution kernel size
kernel_size = (5, 5)
# Number of filters in first convolutional layer
num_kernel_first_conv_layer = 6
# Number of filters in second convolutional layer
num_kernel_second_conv_layer = 16


model = Sequential()

#First Convolve-and-pool block
model.add(Convolution2D(num_kernel_first_conv_layer, (kernel_size[0], kernel_size[1]), input_shape=input_shape))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=pool_size))

#Second Convolve-and-pool block
model.add(Convolution2D(num_kernel_second_conv_layer, (kernel_size[0], kernel_size[1])))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=pool_size))

# Turns the sequence of featuremaps into a linear array of features
model.add(Flatten())

# First fully connected hidden layer with 120 neurons
model.add(Dense(120))
model.add(Activation('relu'))

# Second fully connected hidden layer with 84 neurons
model.add(Dense(84))
model.add(Activation('relu'))

# Output layer
model.add(Dense(output_shape))
model.add(Activation('softmax'))

Instantiate a SGD optimizer with a tentative LR of 10^-4 and using the appropriate loss function and compile the model.

In [None]:
# The optimizers module provides a number of optimization algorithms for updating
# a netwok parameters accoridng to the computed error gradints
from keras import optimizers

# Defining our optimizer as the standard stochastic gradient optimizer
optimizer=optimizers.SGD(lr=1e-4)

# This initializes the SGD optimizer above for the LeNet5 architecture
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

# Let us have a look at the model topology after compiling the model
model.summary()

# Training the network

In [None]:
# This is where the actual training-testing happens
# Dimension of the batch size (number of images over which error gradients are averaged)
batch_size = 64
# Number of epochs we want to train
epochs = 10

# This structure holds the training history for later plotting
history = {}
history['loss'] = []
history['val_loss'] = []
history['acc'] = []
history['val_acc'] = []


# Creating a batch preprocessor for feeding the train and test data in batches
from keras.preprocessing.image import ImageDataGenerator
myDatagen = ImageDataGenerator()
# Compute quantities required for feature-wise normalization
myDatagen.fit(train_images)

# Cycling through the epochs
for e in range(epochs):
    lossEpochTrain = 0
    lossEpochTest = 0
    accuracyEpochTrain = 0
    accuracyEpochTest = 0

    # Training over the training samlpes, batch by batch
    batchCntTrain = 0
    for images_batch, labels_batch in myDatagen.flow(train_images, train_labels, batch_size=batch_size):
        batch_history = model.train_on_batch(images_batch, labels_batch)
        lossEpochTrain += batch_history[0]
        accuracyEpochTrain += batch_history[1]
        batchCntTrain += 1
        # break the loop or generator loops indefinitely
        if batchCntTrain >= len(train_images) / batch_size:
            break

    # Testing over the training samlpes, batch by batch
    batchCntTest = 0
    for images_batch, labels_batch in myDatagen.flow(test_images, test_labels, batch_size=batch_size):
        batch_history = model.test_on_batch(images_batch, labels_batch)
        lossEpochTest += batch_history[0]     
        accuracyEpochTest += batch_history[1]
        batchCntTest += 1
        if batchCntTest >= len(test_images) / batch_size:
            break

    print ('Epoch %d / %d lossTrain %.3f lossTest %.3f accuracyTrain %.3f accuracyTest %.3f' %(int(e), epochs, lossEpochTrain/batchCntTrain, lossEpochTest/batchCntTest, accuracyEpochTrain/batchCntTrain, accuracyEpochTest/batchCntTest))
    history['loss'].append(lossEpochTrain/batchCntTrain)
    history['val_loss'].append(lossEpochTest/batchCntTest)
    history['acc'].append(accuracyEpochTrain/batchCntTrain)
    history['val_acc'].append(accuracyEpochTest/batchCntTest)

# Visualizing the network performance

In [None]:
# We now want to plot the train and validation loss functions and accuracy curves
from matplotlib import pyplot as plt
#print(history.history.keys())

# summarize history for loss
plt.plot(history['loss'])
plt.plot(history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper right')
plt.show()

# summarize history for accuracy
plt.plot(history['acc'])
plt.plot(history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='lower right')
plt.show()


# Experiments

Note down the performance of the trained network in terms of training and validation accuracy as a reference. Then, experiment as follow and compare performance with the reference scenario.

*   **Filter size**: experiment with square filters of different size and compare performance with reference scenario.
*   **Number of filters**: experiment increasing the number of filters in the first and second layer and find the maximum number of filters the network can tolerate before overfitting to the training samples.
* **Padding**: experiment withnarrow and wide convolutions: what changes in terms of featuremap size ?
*  **Pooling layers**: expeirment with different pooling layers (maxpooling and avgpooling): which one yield the best performance ?
What happens if the pooling layers are removed altogether in terms of comlexity-performance tradeoff ?
* **Pooling-less architectures**: Modify the network architecture to obtain a twofold reduction of each featuremap without resorting to pooling layers (hint: take insipiration from the ResNet architecture).
* **Confusion analysis**: Using the proper metric  from sklearn, check which character is most frequently confused with which: can you explain why ?





In [None]:
# We now want to plot the confusion matrix using sklearn.metrics
from sklearn.metrics import confusion_matrix
predictions = model.predict(test_images)
# Mind that confusion_matrix requires
matrix = confusion_matrix(test_labels.argmax(axis=1), predictions.argmax(axis=1))
print (matrix)

#Saving the training results

Save the best trained model (topology, parameters), and all the related side information required to deploy the trained model later on.


In [None]:
# Create a directory for saving both the trained model and side information
import os
save_dir = os.path.join(os.getcwd(), 'trained_lenet5_mnist')
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)

# Save model and weights
model_name = 'model.h5'
model_path = os.path.join(save_dir, model_name)
model.save(model_path)
print('Saved trained model at %s ' % model_path)

# Saving mean and standard deviation information as a CSV file
import csv
model_name = 'std_dev.csv'
model_path = os.path.join(save_dir, model_name)
w = csv.writer(open(model_path, "w"))
dict={}
dict['mean'] = train_mean
dict['std'] = train_std
for key, val in dict.items():
    w.writerow([key, val])
print('Saved side information at %s ' % model_path)