# 5.6 Training a Deep Neural Network for classification of grayscale images of handwritten digits
This examples illustrates two DNN models in Tensorflow/Keras, trained to classify handwritten digits from their grayscale images. It uses the well-known MNIST dataset. The second and more powerfull DNN model has  **Normalization Layers** that are helpful for the stability of the learning process.

In [None]:
# Mount GDrive, change directory and check contents of folder.

import os
from google.colab import drive
from google.colab import files

PROJECT_FOLDER = "/content/gdrive/My Drive/Colab Notebooks/CS345_SP22/5. DNN"

drive.mount('/content/gdrive/')
os.chdir(PROJECT_FOLDER)
print("Current dir: ", os.getcwd())

# Settings and Basic Package Imports

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from mllib.utils import RandomSeed

# __________ | Settings | __________
IS_PLOTING_DATA         = True
IS_DEBUGABLE            = False
IS_RETRAINING           = True
RandomSeed(2022)

# Hyperparameters
For each training experiment, we define all the model/training hyperparameters inside a Python dictionary.

In [None]:
CONFIG_SHALLOW = {
            "ModelName": "MNIST1_SHALLOW"  
           ,"DNN.InputFeatures": 28*28
           ,"DNN.LayerNeurons": [512,10]
           ,"DNN.Classes": 10
           ,"Training.MaxEpoch": 10
           ,"Training.BatchSize": 512
           ,"Training.LearningRate": 0.2
          }

We choose the hyperparameter set for the current model training experiment

In [None]:
CONFIG = CONFIG_SHALLOW

# MNIST
This [MNIST dataset](http://yann.lecun.com/exdb/mnist/) dataset, that dates back to 1998, has become a standard toy dataset to understand the image classification task. It contains 70000 grayscale images of 28x28 dimensions for the handwritten digits 0,1,..9. 
It is already splitted into a training set of 60000 images, while the rest 10000 are used to validate the model

# Dataset loading and previewing
We are reusing an existing dataset in Tensorflow format. We load the data and extract them as numpy arrays to view some images, and later use the target class labels for evaluation.

In [None]:
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt

(oTSData, oVSData), oDataSetInfo = tfds.load(
    'mnist',
    split=['train', 'test'],
    shuffle_files=True,
    as_supervised=True,
    with_info=True,
)
  
# Takes one minibatch out of the dataset. Here the size of the minibatch is the total count of samples
for tImages, tLabels in oVSData.batch(oDataSetInfo.splits['test'].num_examples).take(1):
    nImages            = tImages.numpy()
    nTargetClassLabels = tLabels.numpy()  

print("VS image features tensor shape:" , nImages.shape)
print("VS image targets vector shape :", nTargetClassLabels.shape)

if IS_PLOTING_DATA:
    for nIndex, nSample in enumerate(nImages):
      nLabel = nTargetClassLabels[nIndex]
      if (nIndex >= 0 and nIndex <= 20):
           
        if nIndex == 0:
            print("Image sample shape            :", nSample.shape)
        nImage =  nSample.astype(np.uint8) 
        plt.imshow(nImage[:,:,0], cmap="gray") #https://matplotlib.org/stable/tutorials/colors/colormaps.html
        #plt.imshow(nImage[4:22, 0:15, :], cmap="gray") #https://matplotlib.org/stable/tutorials/colors/colormaps.html
        plt.title("Digit %d" % nLabel)
        plt.show()    

# Tensorflow/Keras data feeding pipelines
We are going to create a pipeline that will feed our training process with data. There is a method used with `map()` that is called for each sample to normalize the value, reshape its features shape and create one-hot encodings for its label. The pipeline uses
* `cache`: To cache the data in memory
* `shuffle`: To shuffly the samples at each step
* `batch`: To create mini-batches of samples
* `prefetch`: To use multi-threading for loading the samples into the learning process.

In [None]:
# -----------------------------------------------------------------------------------
def NormalizeAndReshapeImage(p_tImage, p_tLabel):
    # Normalizes color component values from `uint8` to `float32`.
    tNormalizedImage = tf.cast(p_tImage, tf.float32) / 255.
    # Reshapes the 3D tensor of the image (28x28x1) into a 782-dimensional vector
    tNormalizedImage = tf.reshape(tNormalizedImage, [CONFIG["DNN.InputFeatures"]])
    # Target class labels into one-hot encoding
    tTargetOneHot = tf.one_hot(p_tLabel, CONFIG["DNN.Classes"])
    
    return tNormalizedImage, tTargetOneHot
# -----------------------------------------------------------------------------------

nBatchSize = CONFIG["Training.BatchSize"]

# Training data feed pipeline
oTSData = oTSData.map(NormalizeAndReshapeImage, num_parallel_calls=tf.data.AUTOTUNE)
oTSData = oTSData.cache()
oTSData = oTSData.shuffle(oDataSetInfo.splits['train'].num_examples)
oTSData = oTSData.batch(nBatchSize)
oTSData = oTSData.prefetch(tf.data.AUTOTUNE)
print("Training data feed object:", oTSData)

# Validation data feed pipeline
oVSData = oVSData.map(NormalizeAndReshapeImage, num_parallel_calls=tf.data.AUTOTUNE)
oVSData = oVSData.cache()
oVSData = oVSData.batch(oDataSetInfo.splits['test'].num_examples)
oVSData = oVSData.prefetch(tf.data.AUTOTUNE)
print("Validation data feed object:", oTSData)


# Deep Neural Network Class
We declare the class for a **Fully Connected Deep Neural Network (FC-DNN)** that has a variable depth of hidden layers. 

In [None]:
from tensorflow import keras
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.layers import Activation, Softmax\

# =========================================================================================================================
class CDNNWithNormalization(keras.Model):
    # --------------------------------------------------------------------------------------
    def __init__(self, p_oConfig):
        super(CDNNWithNormalization, self).__init__(p_oConfig)
        # ..................... Object Attributes ...........................
        self.Config = p_oConfig
        
        self.ClassCount   = self.Config["DNN.LayerNeurons"][-1]
        self.LayerNeurons = self.Config["DNN.LayerNeurons"][:-1]
        self.HiddenLayers = [None]*len(self.LayerNeurons)
        self.NormalizationLayers = [None]*len(self.LayerNeurons)
        self.OutputLayer  = None
        self.SoftmaxActivation = None
        self.Input        = None
        # ...................................................................
        
        if "DNN.ActivationFunction" not in self.Config:
            self.Config["DNN.ActivationFunction"] = "relu"
                    
        self.Create()
        
    # --------------------------------------------------------------------------------------
    def Create(self):
        for nIndex, nLayerNeuronCount in enumerate(self.LayerNeurons):
            self.HiddenLayers[nIndex] = Dense(nLayerNeuronCount, activation=self.Config["DNN.ActivationFunction"], use_bias=True)
            self.NormalizationLayers[nIndex] = BatchNormalization()
            
        self.OutputLayer = Dense(self.ClassCount, use_bias=True)
        self.SoftmaxActivation = Softmax() 
    # --------------------------------------------------------------------------------------
    def call(self, p_tInput):
        self.Input = p_tInput
        
        # Feed forward to the next layer
        tA = p_tInput
        for nIndex, oHiddenLayer in enumerate(self.HiddenLayers):
            oNormalizationLayer = self.NormalizationLayers[nIndex]
            tA = oHiddenLayer(tA)
            tA = oNormalizationLayer(tA)

        tA = self.OutputLayer(tA)
        # Using the Softmax activation function for the neurons of the output layer 
        tA = self.SoftmaxActivation(tA)
        
        return tA    
    # --------------------------------------------------------------------------------------
# =========================================================================================================================

# Create the Neural Network model and training algorithm objects


In [None]:
# __________ // Create the Machine Learning model and training algorithm objects \\ __________
from DNN import CDNNBasic, CDNNWithNormalization

oNN = CDNNBasic(CONFIG)

nInitialLearningRate    = CONFIG["Training.LearningRate"]

oCostFunction   = tf.keras.losses.CategoricalCrossentropy(from_logits=False)
oOptimizer      = tf.keras.optimizers.SGD(learning_rate=nInitialLearningRate)

# Learning Rate Scheduling


In [None]:
# -----------------------------------------------------------------------------------
def LRSchedule(epoch, lr):
    if epoch == 5:
        nNewLR = lr * 0.5
        print("Setting LR to %.5f" % nNewLR)
        return nNewLR
    else:
        return lr
# -----------------------------------------------------------------------------------    
    
oLearningRateSchedule = tf.keras.callbacks.LearningRateScheduler(LRSchedule)   

# Training Process

In [None]:
sModelFolderName = CONFIG["ModelName"]
        
if (not os.path.isdir(sModelFolderName)) or IS_RETRAINING:
    oNN.compile(loss=oCostFunction, optimizer=oOptimizer, metrics=["accuracy"])

    if IS_DEBUGABLE:
        oNN.run_eagerly = True
        
    oProcessLog = oNN.fit(  oTSData, batch_size=nBatchSize
                            ,epochs=CONFIG["Training.MaxEpoch"]
                            ,validation_data=oVSData
                            ,callbacks=[oLearningRateSchedule] 
                          )
    oNN.summary()          
    oNN.save(sModelFolderName)      
else:
    # The model is trained and its state is saved (all the trainable parameters are saved). We load the model to recall the samples 
    oNN = keras.models.load_model(sModelFolderName)
    oProcessLog = None
    oNN.summary()    

# Learning Process Overview

In [None]:
if oProcessLog is not None: # [PYTHON] Checks that object reference is not Null
    # list all data in history
    print("Keys of Keras training process log:", oProcessLog.history.keys())
    
    # Plot the accuracy during the training epochs
    plt.plot(oProcessLog.history['accuracy'])
    plt.plot(oProcessLog.history['val_accuracy'])
    plt.title('MLP Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()
    
    # Plot the error during the training epochs
    sCostFunctionNameParts = oCostFunction.name.split("_")                           # [PYTHON]: Splitting string into an array of strings
    sCostFunctionNameParts = [x.capitalize() + " " for x in sCostFunctionNameParts]  # [PYTHON]: List comprehension example 
    sCostFunctionName = " ".join(sCostFunctionNameParts)                             # [PYTHON]: Joining string in a list with the space between them
    
    
    plt.plot(oProcessLog.history['loss'])
    plt.plot(oProcessLog.history['val_loss'])
    plt.title('MLP ' + sCostFunctionName + " Error")
    plt.ylabel('Error')
    plt.xlabel('Epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()

# Inference
Recalling samples to predict their class (i.e. classify).


In [None]:
nPredictedProbabilities = oNN.predict(oVSData)
nPredictedClassLabels  = np.argmax(nPredictedProbabilities, axis=1)

for nIndex, nProbs in enumerate(nPredictedProbabilities):
  if nIndex < 5:
    print("#%.2d Predicted:%d (Probabilities:%s) Actual:%d" % (nIndex+1, nPredictedClassLabels[nIndex], nProbs, nTargetClassLabels[nIndex])) # [PYTHON] Format string example
    if nIndex == 0:
      print("Sum of all output neuron activations:%.3f" % np.sum(nProbs))


# Evaluation

In [None]:
from mllib.evaluation import CEvaluator
from mllib.visualization import CPlotConfusionMatrix

# We create an evaluator object that will produce several metrics
oEvaluator = CEvaluator(nTargetClassLabels, nPredictedClassLabels)

oEvaluator.PrintConfusionMatrix()
print("Per Class Recall (Accuracy)  :", oEvaluator.Recall)
print("Per Class Precision          :", oEvaluator.Precision)
print("Average Accuracy: %.4f" % oEvaluator.AverageRecall)
print("Average F1 Score: %.4f" % oEvaluator.AverageF1Score)
      
oConfusionMatrixPlot = CPlotConfusionMatrix(oEvaluator.ConfusionMatrix)
oConfusionMatrixPlot.Show()      
