# 5.5 Training a Deep Neural Network for multiclass classification
This examples illustrates a basic DNN in Tensorflow/Keras that is trained for a multiclass classification task. It has the ability to create a variable count of layers, given a python list of the neuron count in each layer. The activation function for hidden layers is the **Rectified Linear Unit (ReLU)** and for the output layer is the **Softmax**. In the output each neuron is a proper class probability and the sum of all neuron outputs is 1.

In [None]:
# Mount GDrive, change directory and check contents of folder.

import os
from google.colab import drive
from google.colab import files

PROJECT_FOLDER = "/content/gdrive/My Drive/Colab Notebooks/CS345_SP22/5. DNN"

drive.mount('/content/gdrive/')
os.chdir(PROJECT_FOLDER)
print("Current dir: ", os.getcwd())

# Settings and Basic Package Imports

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from mllib.utils import RandomSeed

# __________ | Settings | __________
IS_PLOTING_DATA         = True
IS_DEBUGABLE            = False
IS_RETRAINING           = True
RandomSeed(2022)

sColorScheme = ["darkseagreen", "royalblue", "crimson", "goldenrod"] # https://matplotlib.org/3.1.0/gallery/color/named_colors.html

# Hyperparameters
For each training experiment, we define all the model/training hyperparameters inside a Python dictionary.

In [None]:
# 2 Layers -> 22180 Parameters -> 0.95 Accuracy
CONFIG_BASELINE = {
            "ModelName": "QPEDS2"  
           ,"DNN.InputFeatures": 72
           ,"DNN.LayerNeurons": [288, 4]
           ,"DNN.Classes": 4
           ,"Training.MaxEpoch": 200
           ,"Training.BatchSize": 160
           ,"Training.LearningRate": 0.2
          }

# 3 Layers -> 10804 Parameters -> 0.97 Accuracy
CONFIG_GOOD_3 = {
            "ModelName": "QPEDS3"  
           ,"DNN.InputFeatures": 72
           ,"DNN.LayerNeurons": [72,72,4]
           ,"DNN.Classes": 4
           ,"Training.MaxEpoch": 400
           ,"Training.BatchSize": 160
           ,"Training.LearningRate": 0.1
          }



# 4 Layers -> 1780 Parameters -> 0.98 Accuracy
CONFIG_BEST_4 = {
            "ModelName": "QPEDS4"  
           ,"DNN.InputFeatures": 72
           ,"DNN.LayerNeurons": [16,16,16,4]
           ,"DNN.Classes": 4
           ,"Training.MaxEpoch": 400
           ,"Training.BatchSize": 160
           ,"Training.LearningRate": 0.1
          }

# 5 Layers -> 2052 Parameters -> 0.98 Accuracy
CONFIG_BEST_5 = {
            "ModelName": "QPEDS5"  
           ,"DNN.InputFeatures": 72
           ,"DNN.LayerNeurons": [16,16,16,16,4]
           ,"DNN.Classes": 4
           ,"Training.MaxEpoch": 400
           ,"Training.BatchSize": 120
           ,"Training.LearningRate": 0.1
          }


We choose the hyperparameter set for the current model training experiment

In [None]:
CONFIG = CONFIG_BASELINE

# Dataset loading, preprocessing and splitting
We create the dataset, normalize the feature values, split into training and validation set and visualizing two features.

# Quadrapeds
This old dataset from 1989, generates features for 4 quadraped animals, following an normal distribution that is representative of each type of animal:

* dogs 
* cats 
* horses 
* giraffes

Instances have 8 components: neck, four legs, torso, head, and tail.  Each component is represented as a simplified/generalized cylinder. Each cylinder is itself described by 9 features: 
* 3D location (3 featureS)
* 3D axis (3 features)
* height, 
* radius,
* texture

Thus we have a **72-dimensional vector** for each sample to classify it to **4 classes**

In [None]:
from datasets.quadrapeds import CQuadrapedsDataSet
from sklearn import preprocessing
from mllib.visualization import CPlot

oDataset = CQuadrapedsDataSet(1000)
oMinMaxScaler = preprocessing.MinMaxScaler().fit(oDataset.Samples)
oDataset.Samples = oMinMaxScaler.transform(oDataset.Samples)
print("Minmax normalized sample #1:", oDataset.Samples[0])
oDataset.Split(0.2)

if IS_PLOTING_DATA:
  # Plot the training set 
  oPlot = CPlot("Dataset", oDataset.Samples[:,6:8], oDataset.Labels
                ,p_sLabelDescriptions=oDataset.ClassNames, p_sColors=sColorScheme
                ,p_sXLabel="Feature 6", p_sYLabel="Feature 7"
                )
  oPlot.Show(p_bIsMinMaxScaled=False)
                 
                 
  # Plot the validation set
  oPlot = CPlot("Validation Set", oDataset.VSSamples[:,6:8], oDataset.VSLabels
                ,p_sLabelDescriptions=oDataset.ClassNames, p_sColors=sColorScheme
                ,p_sXLabel="Feature 6", p_sYLabel="Feature 7"
                )
  oPlot.Show(p_bIsMinMaxScaled=False)


# ... Create the Tensorflow/Keras objects for feeding the data into the training algorithm
nBatchSize = CONFIG["Training.BatchSize"]  

### One hot encoding
For each sample we have a label 0 or 1 to indicate the different class. We will use two neurons for the output of the Neural Network instead of one. So each neuron should fire 1 if it detects that the sample belongs to its class. The targets for training will be changed to **one-hot encoding**.

In [None]:
tTSLabelsOnehot = tf.one_hot(oDataset.TSLabels, CONFIG["DNN.Classes"])
tVSLabelsOnehot = tf.one_hot(oDataset.VSLabels, CONFIG["DNN.Classes"])

print("Labels:",oDataset.TSLabels.shape)
print("One-hot Encoding Vectors:", tTSLabelsOnehot.shape)
print("One-hot Target of Sample #400", tTSLabelsOnehot[399],  "Label:%d" % oDataset.TSLabels[399])

# Deep Neural Network Class
We declare the class for a **Fully Connected Deep Neural Network (FC-DNN)** that has a variable depth of hidden layers. 

In [None]:
from tensorflow import keras
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.layers import Activation, Softmax\

# =========================================================================================================================
class CDNNBasic(keras.Model):
    # --------------------------------------------------------------------------------------
    def __init__(self, p_oConfig):
        super(CDNNBasic, self).__init__(p_oConfig)
        # ..................... Object Attributes ...........................
        self.Config = p_oConfig
        
        self.ClassCount   = self.Config["DNN.LayerNeurons"][-1]
        self.LayerNeurons = self.Config["DNN.LayerNeurons"][:-1]
        self.HiddenLayers = [None]*len(self.LayerNeurons)
        self.OutputLayer  = None
        self.SoftmaxActivation = None
        self.Input        = None
        # ...................................................................
        
        if "DNN.ActivationFunction" not in self.Config:
            self.Config["DNN.ActivationFunction"] = "relu"
                    
        self.Create()
        
    # --------------------------------------------------------------------------------------
    def Create(self):
        for nIndex, nLayerNeuronCount in enumerate(self.LayerNeurons):
            self.HiddenLayers[nIndex] = Dense(nLayerNeuronCount, activation=self.Config["DNN.ActivationFunction"], use_bias=True)
        self.OutputLayer = Dense(self.ClassCount, use_bias=True)
        self.SoftmaxActivation = Softmax() 
    # --------------------------------------------------------------------------------------
    def call(self, p_tInput):
        self.Input = p_tInput
        
        # Feed forward to the next layer
        tA = p_tInput
        for oHiddenLayer in self.HiddenLayers:
            tA = oHiddenLayer(tA)

        tA = self.OutputLayer(tA)
        # Using the Softmax activation function for the neurons of the output layer 
        tA = self.SoftmaxActivation(tA)
        
        return tA    
    # --------------------------------------------------------------------------------------
# =========================================================================================================================

# Create the Neural Network model and training algorithm objects


In [None]:
# __________ // Create the Machine Learning model and training algorithm objects \\ __________
from DNN import CDNNBasic

oNN = CDNNBasic(CONFIG)

nInitialLearningRate    = CONFIG["Training.LearningRate"]

oCostFunction   = tf.keras.losses.CategoricalCrossentropy(from_logits=False)
oOptimizer      = tf.keras.optimizers.SGD(learning_rate=nInitialLearningRate)

# Learning Rate Scheduling
We can create a method that is going to change the learning rate at specific epochs. This method receives the current lr and return the new learning rate that will be set for the algorithm. Then we create a callback Keras object.

In [None]:
# -----------------------------------------------------------------------------------
def LRSchedule(epoch, lr):
    if epoch == 100:
        nNewLR = lr * 0.5
        print("Setting LR to %.5f" % nNewLR)
        return nNewLR
    elif epoch == 200:
        nNewLR = lr * 0.5
        print("Setting LR to %.5f" % nNewLR)
        return nNewLR
    elif epoch == 300:
        nNewLR = lr * 0.5
        print("Setting LR to %.5f" % nNewLR)
        return nNewLR
    else:
        return lr
# -----------------------------------------------------------------------------------    
    
oLearningRateSchedule = tf.keras.callbacks.LearningRateScheduler(LRSchedule)   

# Training Process
We call `compile()` for a Keras model object, in order for the library to prepare all the needed Tensorflow objects, a.k.a **tensor objects**. We call `fit()` on the model to execute the training process.
### Callbacks
We can provide callback objects to `fit()`, that invoke event handling methods during the training process.

### Summary
After the training is finished we print a summary() that displays the count of object parameters, that are the **weights for each synapse** of our fully connected (dense) layers.

### Saving/Loading the state
When the training is finished we can save the **state** of our MLP model into a folder, that is saving all the values of its weights. Then we can load the state without having to retrain our model.

In [None]:
sModelFolderName = CONFIG["ModelName"]
        
if (not os.path.isdir(sModelFolderName)) or IS_RETRAINING:
    oNN.compile(loss=oCostFunction, optimizer=oOptimizer, metrics=["accuracy"])

    if IS_DEBUGABLE:
        oNN.run_eagerly = True
        
    oProcessLog = oNN.fit(  oDataset.TSSamples, tTSLabelsOnehot, batch_size=nBatchSize
                            ,epochs=CONFIG["Training.MaxEpoch"]
                            ,validation_data=(oDataset.VSSamples, tVSLabelsOnehot) 
                            ,callbacks=[oLearningRateSchedule]
                          )
    oNN.summary()          
    oNN.save(sModelFolderName)      
else:
    # The model is trained and its state is saved (all the trainable parameters are saved). We load the model to recall the samples 
    oNN = keras.models.load_model(sModelFolderName)
    oProcessLog = None
    oNN.summary()    

# Understanding the learning process
### Process Log  
After the training is finished the `fit()` methods returns a Python dictionary that keeps values for the error and the metrics for each training epoch.

In [None]:
if oProcessLog is not None: # [PYTHON] Checks that object reference is not Null
    # list all data in history
    print("Keys of Keras training process log:", oProcessLog.history.keys())
    
    # Plot the accuracy during the training epochs
    plt.plot(oProcessLog.history['accuracy'])
    plt.plot(oProcessLog.history['val_accuracy'])
    plt.title('MLP Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()
    
    # Plot the error during the training epochs
    sCostFunctionNameParts = oCostFunction.name.split("_")                           # [PYTHON]: Splitting string into an array of strings
    sCostFunctionNameParts = [x.capitalize() + " " for x in sCostFunctionNameParts]  # [PYTHON]: List comprehension example 
    sCostFunctionName = " ".join(sCostFunctionNameParts)                             # [PYTHON]: Joining string in a list with the space between them
    
    
    plt.plot(oProcessLog.history['loss'])
    plt.plot(oProcessLog.history['val_loss'])
    plt.title('MLP ' + sCostFunctionName + " Error")
    plt.ylabel('Error')
    plt.xlabel('Epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()

# Visualizing the Neural Activations
We will try to understand the transformation done by the hidden layer in an MLP by visualizing in 2D a combination of its neurons. This will shed some light on how the **Universal Approximation Theorem** works in practice.

In [None]:
if IS_PLOTING_DATA :
    HIDDEN_LAYER_INDEX = 0

    # Plot the validation set
    oPlot = CPlot("Training Set Input Features", oDataset.TSSamples[:,6:8], oDataset.TSLabels
                  ,p_sLabelDescriptions=oDataset.ClassNames, p_sColors=sColorScheme 
                  ,p_sXLabel="Feature 6", p_sYLabel="Feature 7" 
                  )
    oPlot.Show(p_bIsMinMaxScaled=False)
    
    
    tActivation = oNN.HiddenLayers[HIDDEN_LAYER_INDEX](oDataset.TSSamples)
    nTSSamplesTransformed = tActivation.numpy()
    sTitle = "Training Set Layer %d Hidden Neuron Activations" % (HIDDEN_LAYER_INDEX+1)

    # Plot the validation set
    oPlot = CPlot(sTitle, nTSSamplesTransformed[:,:2], oDataset.TSLabels
                  ,p_sLabelDescriptions=oDataset.ClassNames, p_sColors=sColorScheme                  
                  ,p_sXLabel="Neuron 1", p_sYLabel="Neuron 2" )
    oPlot.Show(p_bIsMinMaxScaled=False)

    if nTSSamplesTransformed.shape[1] > 2:    
        oPlot = CPlot(sTitle, nTSSamplesTransformed[:,1:3], oDataset.TSLabels
                      ,p_sLabelDescriptions=oDataset.ClassNames, p_sColors=sColorScheme                      
                      ,p_sXLabel="Neuron 2", p_sYLabel="Neuron 3" )
        oPlot.Show(p_bIsMinMaxScaled=False)
        
        oPlot = CPlot(sTitle, nTSSamplesTransformed[:,2:4], oDataset.TSLabels
                      ,p_sLabelDescriptions=oDataset.ClassNames, p_sColors=sColorScheme
                      ,p_sXLabel="Neuron 3", p_sYLabel="Neuron 4" )
        oPlot.Show(p_bIsMinMaxScaled=False)


# Recalling Samples and Predicting their Class
We can use `predict()` on a trained model to generate its output. Having two neurons (one for each class) with their values between 0 and 1, we can consider that they output **propabilities** for the sample belonging to a class.

In [None]:
nPredictedProbabilities = oNN.predict(oDataset.VSSamples)
nPredictedClassLabels  = np.argmax(nPredictedProbabilities, axis=1)

nTargetClassLabels     = oDataset.VSLabels   

for nIndex, nProbs in enumerate(nPredictedProbabilities):
  print("#%.2d Predicted:%d (Probabilities:%s) Actual:%d" % (nIndex+1, nPredictedClassLabels[nIndex], nProbs, nTargetClassLabels[nIndex])) # [PYTHON] Format string example
  if nIndex == 0:
    print("Sum of all output neuron activations:%.3f" % np.sum(nProbs))


# Evaluation
We evaluate a model using different metrics and also a **confusion matrix**. These help us understand if our model works properly

In [None]:
from mllib.evaluation import CEvaluator
from mllib.visualization import CPlotConfusionMatrix

# We create an evaluator object that will produce several metrics
oEvaluator = CEvaluator(nTargetClassLabels, nPredictedClassLabels)

oEvaluator.PrintConfusionMatrix()
print("Per Class Recall (Accuracy)  :", oEvaluator.Recall)
print("Per Class Precision          :", oEvaluator.Precision)
print("Average Accuracy: %.4f" % oEvaluator.AverageRecall)
print("Average F1 Score: %.4f" % oEvaluator.AverageF1Score)
      
oConfusionMatrixPlot = CPlotConfusionMatrix(oEvaluator.ConfusionMatrix)
oConfusionMatrixPlot.Show()      
