# 5.3 Training of an MLP Neural Network with Tensorflow / Keras
This examples illustrates a basic MLP Neural Network in Tensorflow/Keras that is trained for a binary classification task. It shows different uses of  Gradient Descent optimation that achieves learning through error backpropagation:
* Gradient Descent: **Whole** dataset at each step
* Fully Stochastic Gradient Descent: **One** sample at each step
* Stochastic Minibatch Gradient Descent: A **minibatch** with some samples at each step.





In [None]:
# Mount GDrive, change directory and check contents of folder.

import os
from google.colab import drive
from google.colab import files

PROJECT_FOLDER = "/content/gdrive/My Drive/Colab Notebooks/CS345_SP22/5. DNN"

drive.mount('/content/gdrive/')
os.chdir(PROJECT_FOLDER)
print("Current dir: ", os.getcwd())

# Settings and Basic Package Imports

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from mllib.utils import RandomSeed

# __________ | Settings | __________
IS_PLOTING_DATA         = True
IS_DEBUGABLE            = False
IS_RETRAINING           = True
RandomSeed(2022)

# Hyperparameters
For each training experiment, we define all the model/training hyperparameters inside a Python dictionary.

In [None]:
# __________ | Hyperparameters | __________
CONFIG_GD_LOCAL_MINIMA = {
            "ModelName": "MLP1_LOCAL_MINIMA"  
           ,"MLP.InputFeatures": 2
           ,"MLP.HiddenNeurons": 2
           ,"MLP.Classes": 2
           ,"Training.MaxEpoch": 40
           ,"Training.BatchSize": 160
           ,"Training.LearningRate": 0.1
           
          }

CONFIG_GD_GOOD = { 
            "ModelName": "MLP1_TRAIN2" 
           ,"MLP.InputFeatures": 2
           ,"MLP.HiddenNeurons": 2
           ,"MLP.Classes": 2
           ,"Training.MaxEpoch": 18
           ,"Training.BatchSize": 160
           ,"Training.LearningRate": 0.1
          }

CONFIG_FULLY_STOCHASTIC_GD_GOOD = {  
            "ModelName": "MLP1_TRAIN3"
           ,"MLP.InputFeatures": 2
           ,"MLP.HiddenNeurons": 2
           ,"MLP.Classes": 2
           ,"Training.MaxEpoch": 18
           ,"Training.BatchSize": 1
           ,"Training.LearningRate": 0.1
          }

CONFIG_STOCHASTIC_MINIBATCH_GD_GOOD = {
            "ModelName": "MLP1_TRAIN4"  
           ,"MLP.InputFeatures": 2
           ,"MLP.HiddenNeurons": 2
           ,"MLP.Classes": 2
           ,"Training.MaxEpoch": 18
           ,"Training.BatchSize": 10
           ,"Training.LearningRate": 0.15
          }


CONFIG_STOCHASTIC_MINIBATCH_GD_EXCELLENT = {
            "ModelName": "MLP1_BEST"  
           ,"MLP.InputFeatures": 2
           ,"MLP.HiddenNeurons": 2
           ,"MLP.Classes": 2
           ,"Training.MaxEpoch": 125
           ,"Training.BatchSize": 20
           ,"Training.LearningRate": 0.15
          }

CONFIG_STOCHASTIC_MINIBATCH_GD_OVERFITTING = {
           "ModelName": "MLP1_OVERFIT"  
           ,"MLP.InputFeatures": 2
           ,"MLP.HiddenNeurons": 2
           ,"MLP.Classes": 2
           ,"Training.MaxEpoch": 200
           ,"Training.BatchSize": 20
           ,"Training.LearningRate": 0.15
          }

CONFIG_STOCHASTIC_MINIBATCH_GD_UNDERFITTING = {
            "ModelName": "MLP1_UNDERFIT"  
           ,"MLP.InputFeatures": 2
           ,"MLP.HiddenNeurons": 2
           ,"MLP.Classes": 2
           ,"Training.MaxEpoch": 36
           ,"Training.BatchSize": 20
           ,"Training.LearningRate": 0.15
          }

CONFIG_MLP_4HIDDEN = {
            "ModelName": "MLP1_4HIDDEN"  
           ,"MLP.InputFeatures": 2
           ,"MLP.HiddenNeurons": 4
           ,"MLP.Classes": 2
           ,"Training.MaxEpoch": 125
           ,"Training.BatchSize": 20
           ,"Training.LearningRate": 0.15
          }

CONFIG_MLP_32HIDDEN = {
            "ModelName": "MLP1_32HIDDEN"  
           ,"MLP.InputFeatures": 2
           ,"MLP.HiddenNeurons": 32
           ,"MLP.Classes": 2
           ,"Training.MaxEpoch": 125
           ,"Training.BatchSize": 20
           ,"Training.LearningRate": 0.15
          }


We choose the hyperparameter set for the current model training experiment

In [None]:
CONFIG = CONFIG_STOCHASTIC_MINIBATCH_GD_UNDERFITTING

# Dataset loading, preprocessing and splitting
We create the dataset, normalize the feature values, split into training and validation set and visualizing two features.

In [None]:
# __________ // Create the data objects \\ __________
from datasets.randomdataset import CRandomDataset
from sklearn import preprocessing
from mllib.visualization import CPlot

oDataset = CRandomDataset(p_nSampleCount=200,p_nClustersPerClass=2,p_nClassSeperability=0.7)
oMinMaxScaler = preprocessing.MinMaxScaler().fit(oDataset.Samples)
oDataset.Samples = oMinMaxScaler.transform(oDataset.Samples)
print("Minmax normalized sample #1:", oDataset.Samples[0])
oDataset.Split(0.2)

if IS_PLOTING_DATA:
  # Plot the training set 
  oPlot = CPlot("Dataset", oDataset.Samples, oDataset.Labels)
  oPlot.Show(p_bIsMinMaxScaled=False)

  # Plot the validation set
  oPlot = CPlot("Validation Set", oDataset.VSSamples, oDataset.VSLabels)
  oPlot.Show(p_bIsMinMaxScaled=False)

# ... Create the Tensorflow/Keras objects for feeding the data into the training algorithm
nBatchSize = CONFIG["Training.BatchSize"]

### One hot encoding
For each sample we have a label 0 or 1 to indicate the different class. We will use two neurons for the output of the Neural Network instead of one. So each neuron should fire 1 if it detects that the sample belongs to its class. The targets for training will be changed to **one-hot encoding**:
* 0 -> [1,0]
* 1 -> [0,1]

If there was a third class that has the label 2 then the one-hot encoding vectors will have 3 values:

* 0 -> [1,0,0]
* 1 -> [0,1,0]
* 2 -> [0,0,1]

In [None]:
tTSLabelsOnehot = tf.one_hot(oDataset.TSLabels, CONFIG["MLP.Classes"])
tVSLabelsOnehot = tf.one_hot(oDataset.VSLabels, CONFIG["MLP.Classes"])

print("Labels:",oDataset.TSLabels.shape)
print("One-hot Encoding Vectors:", tTSLabelsOnehot.shape)
print("One-hot Target of Sample #1", tTSLabelsOnehot[0])

# MLP Neural Network Class
We declare the class for a Multilayer Perceptron Neural Network that has only two layers the hidden and the output. Then we create the object for the MLP.

In [None]:
from tensorflow import keras
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.layers import Activation, Softmax 

# =========================================================================================================================
class CMLPNeuralNetwork(keras.Model):
    # --------------------------------------------------------------------------------------
    def __init__(self, p_oConfig):
        super(CMLPNeuralNetwork, self).__init__(p_oConfig)
        # ..................... Object Attributes ...........................
        self.Config = p_oConfig
        
        self.HiddenLayer = None
        self.OutputLayer = None
        
        self.Input       = None
        # ...................................................................
        
        if "MLP.ActivationFunction" not in self.Config:
            self.Config["MLP.ActivationFunction"] = "sigmoid"
                    
        self.Create()
        
    # --------------------------------------------------------------------------------------
    def Create(self):
        self.HiddenLayer = Dense(self.Config["MLP.HiddenNeurons"], activation=self.Config["MLP.ActivationFunction"], use_bias=True)
        self.OutputLayer = Dense(self.Config["MLP.Classes"]      , activation=self.Config["MLP.ActivationFunction"], use_bias=True)
    # --------------------------------------------------------------------------------------
    def call(self, p_tInput):
        self.Input = p_tInput
        
        tA = self.HiddenLayer(p_tInput)
        tA = self.OutputLayer(tA)
        
        return tA    
    # --------------------------------------------------------------------------------------
# =========================================================================================================================

# Create the Neural Network model and training algorithm objects
We place the Python class for our model into the `MLP.py` file and we simply create the model's object here

In [None]:
# __________ // Create the Machine Learning model and training algorithm objects \\ __________
from MLP import CMLPNeuralNetwork
oNN = CMLPNeuralNetwork(CONFIG)

We create an object for the error (cost) function and an object for the training algorithm, a.k.a. **the optimizer**. We will experiment with different error functions using the Stochastic Gradient Descent optimizer.

In [None]:
nInitialLearningRate    = CONFIG["Training.LearningRate"]

#oCostFunction   = tf.keras.losses.MeanAbsoluteError() 
#oCostFunction   = tf.keras.losses.MeanSquaredError()
oCostFunction   = tf.keras.losses.CategoricalCrossentropy(from_logits=False)

oOptimizer      = tf.keras.optimizers.SGD(learning_rate=nInitialLearningRate)

# Training Process
We call `compile()` for a Keras model object, in order for the library to prepare all the needed Tensorflow objects, a.k.a **tensor objects**. We call `fit()` on the model to execute the training process.

### Summary
After the training is finished we print a summary() that displays the count of object parameters, that are the **weights for each synapse** of our fully connected (dense) layers.

### Saving/Loading the state
When the training is finished we can save the **state** of our MLP model into a folder, that is saving all the values of its weights. Then we can load the state without having to retrain our model.

In [None]:
sModelFolderName = CONFIG["ModelName"]
        
if (not os.path.isdir(sModelFolderName)) or IS_RETRAINING:
    oNN.compile(loss=oCostFunction, optimizer=oOptimizer, metrics=["accuracy"])

    if IS_DEBUGABLE:
        oNN.run_eagerly = True
        
    oProcessLog = oNN.fit(  oDataset.TSSamples, tTSLabelsOnehot, batch_size=nBatchSize
                            ,epochs=CONFIG["Training.MaxEpoch"]
                            ,validation_data=(oDataset.VSSamples, tVSLabelsOnehot) 
                          )
    oNN.summary()          
    oNN.save(sModelFolderName)      
else:
    # The model is trained and its state is saved (all the trainable parameters are saved). We load the model to recall the samples 
    oNN = keras.models.load_model(sModelFolderName)
    oProcessLog = None
    oNN.summary()    

# Understanding the learning process
### Process Log  
After the training is finished the `fit()` methods returns a Python dictionary that keeps values for the error and the metrics for each training epoch.

In [None]:
if oProcessLog is not None: # [PYTHON] Checks that object reference is not Null
    # list all data in history
    print("Keys of Keras training process log:", oProcessLog.history.keys())
    
    # Plot the accuracy during the training epochs
    plt.plot(oProcessLog.history['accuracy'])
    plt.plot(oProcessLog.history['val_accuracy'])
    plt.title('MLP Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()
    
    # Plot the error during the training epochs
    sCostFunctionNameParts = oCostFunction.name.split("_")                           # [PYTHON]: Splitting string into an array of strings
    sCostFunctionNameParts = [x.capitalize() + " " for x in sCostFunctionNameParts]  # [PYTHON]: List comprehension example 
    sCostFunctionName = " ".join(sCostFunctionNameParts)                             # [PYTHON]: Joining string in a list with the space between them
    
    
    plt.plot(oProcessLog.history['loss'])
    plt.plot(oProcessLog.history['val_loss'])
    plt.title('MLP ' + sCostFunctionName + " Error")
    plt.ylabel('Error')
    plt.xlabel('Epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()

# Visualizing the Neural Activations
We will try to understand the transformation done by the hidden layer in an MLP by visualizing in 2D a combination of its neurons. This will shed some light on how the **Universal Approximation Theorem** works in practice.

In [None]:
if IS_PLOTING_DATA :
    # Plot the validation set
    oPlot = CPlot("Training Set Input Features", oDataset.TSSamples, oDataset.TSLabels)
    oPlot.Show(p_bIsMinMaxScaled=False)
    
    
    tActivation = oNN.HiddenLayer(oDataset.TSSamples)
    nTSSamplesTransformed = tActivation.numpy()
    
    
    # Plot the validation set
    oPlot = CPlot("Training Set Hidden Neuron Activations", nTSSamplesTransformed[:,0:2], oDataset.TSLabels,
                   p_sXLabel="Neuron 1", p_sYLabel="Neuron 2" )
    oPlot.Show(p_bIsMinMaxScaled=False)

    if nTSSamplesTransformed.shape[1] > 2:    
        oPlot = CPlot("Training Set Hidden Neuron Activations", nTSSamplesTransformed[:,1:3], oDataset.TSLabels,
                       p_sXLabel="Neuron 2", p_sYLabel="Neuron 3" )
        oPlot.Show(p_bIsMinMaxScaled=False)
        
        oPlot = CPlot("Training Set Hidden Neuron Activations", nTSSamplesTransformed[:,2:4], oDataset.TSLabels,
                       p_sXLabel="Neuron 3", p_sYLabel="Neuron 4" )
        oPlot.Show(p_bIsMinMaxScaled=False)

# Recalling Samples and Predicting their Class
We can use `predict()` on a trained model to generate its output. Having two neurons (one for each class) with their values between 0 and 1, we can consider that they output **propabilities** for the sample belonging to a class.

In [None]:
nPredictedProbabilities = oNN.predict(oDataset.VSSamples)
nPredictedClassLabels  = np.argmax(nPredictedProbabilities, axis=1)

nTargetClassLabels     = oDataset.VSLabels   

for nIndex, nProbs in enumerate(nPredictedProbabilities):
  print("#%.2d Predicted:%d (Probabilities:%s) Actual:%d" % (nIndex+1, nPredictedClassLabels[nIndex], nProbs, nTargetClassLabels[nIndex])) # [PYTHON] Format string example

# Evaluation
We evaluate a model using different metrics and also a **confusion matrix**. These help us understand if our model works properly

In [None]:
from mllib.evaluation import CEvaluator


# We create an evaluator object that will produce several metrics
oEvaluator = CEvaluator(nPredictedClassLabels, nTargetClassLabels)

print("------------- Confusion Matrix  -----------------")
print(oEvaluator.ConfusionMatrix)
print("")
print("------------- Per Class Metrics ----------------")
print("Per Class Recall (Accuracy)  :", oEvaluator.Recall)
print("Per Class Precision          :", oEvaluator.Precision)
print("")
print("------------- Average Metrics  -----------------")
print("AverageF1Score: %.2f" % oEvaluator.AverageF1Score)
print("")