# 8.1 Creating a complex CNN architecture and use advanced Deep Learning training techniques
This examples illustrates a more advanced CNN architecture in Tensorflow/Keras, with the use of **custom Keras layers** that implement complex convolutional modules. It is trained on the well-known CIFAR10 dataset (32x32 color images of 10 classes), using a data feed pipeline that augments the dataset with new random samples.


 Except the usual convolutional and max pooling layer it incorporatens the **Global Average Pooling** layer before the **Logits Dense Layer** that are the current approach for the classifier part of a CNN.

In [None]:
# Mount GDrive, change directory and check contents of folder.

import os
from google.colab import drive
from google.colab import files

PROJECT_FOLDER = "/content/gdrive/My Drive/Colab Notebooks/CS345_SP22/6. CNN"

drive.mount('/content/gdrive/')
os.chdir(PROJECT_FOLDER)
print("Current dir: ", os.getcwd())

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from mllib.utils import RandomSeed

# __________ | Settings | __________
IS_PLOTING_DATA         = True
IS_DEBUGABLE            = False
IS_RETRAINING           = True
RandomSeed(2022)

# Hyperparameters
For each training experiment, we define all the model/training hyperparameters inside a Python dictionary.


**Deep Learning Techniques**:

* L2 Regularization with a given weight decay will try to keep small weights preventing overfitting. It can be applied to specific layers.
* Momentum can be using during training to include the error from the previous epoch.
* Learning Rate scheduling, a list that contains paris of [epoch,lr]. We set learning rate at a specific value at a given epoch


In [None]:
CONFIG_CUSTOM_CNN = {
                 "ModelName": "CIFAR10_MyCustomCNN"
                ,"CNN.InputShape": [32,32,3]
                ,"CNN.Classes": 10
                ,"CNN.ConvOutputFeatures": [32,32,32,32,32]
                ,"Training.MaxEpoch": 28
                ,"Training.BatchSize": 128
                ,"Training.LearningRate": 0.1
                ,"Training.LearningRateScheduling": [[10,0.05]]
                ,"Training.Momentum": 0.9    
                ,"Training.RegularizeL2": False
                ,"Training.WeightDecay": 1e-4
            }
                
CONFIG = CONFIG_CUSTOM_CNN
                     

We choose the hyperparameter set for the current model training experiment

In [None]:
CONFIG = CONFIG_CUSTOM_CNN

# CIFAR10
The [CIFAR10 dataset](https://www.cs.toronto.edu/~kriz/cifar.html) dataset, that dates back to 2009, has become a standard toy dataset to understand the image classification task. It contains 60000 tiny color images of 32x32 resolution for the classes
1. airplane, 2. automobile, 3. bird, 4. cat , 5. deer, 6. dog, 7. frog, 8. horse, 9. ship, 10. truck
                               
It is already splitted into a training set of 50000 images (5000 images per class), while the rest 10000 (1000 images per classe) are used to validate the model

# Dataset loading
We are using a custom implementation of the dataset that downloads and converts the images into Python pickle files. 

In [None]:
from datasets.cifar10.dataset import CCIFAR10DataSet

# ... // Create the data objects \\ ...
oDataset = CCIFAR10DataSet()
print("Training samples set shape:", oDataset.TSSamples.shape)
print("Validation samples set shape:", oDataset.VSSamples.shape)

# Data Feeding for Training and Validation
We create two data feeding objects that speed-up the loading of samples to the model's input. We use the `tf.data.Dataset` class and its method `from_tensor_slices()` to create the data feed object supplying as parameters the numpy arrays for the features of all TS samples `oDataset.TSSamples` and the corresponding class labels `oDataset.TSLabels`. We do the same for the VS.


In [None]:
# -----------------------------------------------------------------------------
def __normalizeImage(p_tImage):
    # Normalizes color component values from `uint8` to `float32`.
    return tf.cast(p_tImage, tf.float32) / 255.
# ------------------------------------------------------------------------------
def PreprocessImageAugmentDataset(p_tImageInTS, p_tLabelInTS):
    # Normalizes color component values from `uint8` to `float32`.
    tNormalizedImage = __normalizeImage(p_tImageInTS)
    # Calls the data augmentation function that add new random samples, i.e.augments the dataset. 
    tNewRandomImage = tf.image.random_flip_left_right(tNormalizedImage)
    
    # Target class labels into one-hot encoding
    tTargetOneHot = tf.one_hot(p_tLabelInTS, CONFIG["CNN.Classes"])
    
    return tNewRandomImage, tTargetOneHot
# -----------------------------------------------------------------------------------
nBatchSize = CONFIG["Training.BatchSize"]

# Training data feed pipeline
oTSData = tf.data.Dataset.from_tensor_slices((oDataset.TSSamples, oDataset.TSLabels))
oTSData = oTSData.map(PreprocessImageAugmentDataset, num_parallel_calls=tf.data.AUTOTUNE)
oTSData = oTSData.cache()
oTSData = oTSData.shuffle(oDataset.TSSampleCount)
oTSData = oTSData.batch(nBatchSize)
print("Will shuffle all the %d samples in the TS before splitting into batches with %d samples/batch" % (oDataset.TSSampleCount, nBatchSize))




# Validation data feed pipeline
# -----------------------------------------------------------------------------------
def PreprocessImage(p_tImageInVS, p_tLabelInVS):
    # Normalizes color component values from `uint8` to `float32`.
    tNormalizedImage = __normalizeImage(p_tImageInVS)
    # Target class labels into one-hot encoding
    tTargetOneHot = tf.one_hot(p_tLabelInVS, CONFIG["CNN.Classes"])
    
    return tNormalizedImage, tTargetOneHot
# -----------------------------------------------------------------------------------
oVSData = tf.data.Dataset.from_tensor_slices((oDataset.VSSamples, oDataset.VSLabels))
oVSData = oVSData.map(PreprocessImage, num_parallel_calls=tf.data.AUTOTUNE)
oVSData = oVSData.batch(oDataset.VSSampleCount)
print("One batch for all the %d samples in the VS" % oDataset.VSSampleCount)

print("."*50)
print("Training data feed object:", oTSData)
print("Validation data feed object:", oVSData)

For training there is a pre-processing pipeline:
* We normalize each sample in the scale [0,1]
* We do a random left-right flip to create a new random sample, out of the original one.
* We cache the TS (for speed)
* We shuffle the TS.
* We split the TS into minibatches, that will be fed input the model's input in each training steps of the epoch. 

For validation we must not do the random augmentation, no shuffling is needed but must do the same normalization.


**Deep Learning techniques**: The training pipeline implements 

* Input feature normalization.
* Dataset augmentation.
* Random minibatch sampling out of the available TS samples.


# Custom Complex CNN Architecture
We will declare a `keras.Model` descendant class to implement our complex CNN. We will create convolutional modules, from `keras.Layer` descendants that can contain multiple keras layers, which complex connections.

**Deep Learning Techniques**:

* We choose a proper weight initialization method, here `glorot_uniform`. You can try with `he_normal`.
* The convolutional kernels inside the modules are regularized.
* We are using the **DropOut** technique. We will randomly keep only a fraction of the input neurons (e.g. 60%) for the logits inside a training step. The rest 40% of the neurons are not trained. In the next step a different 60% of the neurons are trained.This mitigates overfitting because we have fewer neurons to train.

In [None]:
# __________ // Create the Machine Learning model and training algorithm objects \\ __________
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import regularizers
from models.ConvModules import CBasicConvModule
from mllib.helpers import CKerasModelStructure, CModelConfig

# =========================================================================================================================
class CMyCustomCNN(keras.Model):
    # --------------------------------------------------------------------------------------
    # Constructor
    def __init__(self, p_oConfig):
      super(CMyCustomCNN, self).__init__()
      
      # ..................... Object Attributes ...........................
      self.Config = CModelConfig(self, p_oConfig)
      
      self.ClassCount         = self.Config.Value["CNN.Classes"]
      self.ConvLayerFeatures  = self.Config.Value["CNN.ConvOutputFeatures"]
      self.Structure = None 
      # ......... Keras layers .........
      self.StemConv1              = None
      self.StemActivation1        = None
      self.StemBatchNorm1         = None
      
      self.StemConv2              = None
      self.StemActivation2        = None
      self.StemBatchNorm2         = None
      
      self.Module1                = None
      self.Module2                = None
      self.Module3                = None
      
      self.GlobalAveragePooling   = None
      self.DropOut                = None
      self.Logits                 = None
      self.SoftmaxActivation      = None
      # ...................................................................
      
      
      self.Create()
    # --------------------------------------------------------------------------------------------------------
    def createWeightRegulizer(self):
        if self.Config.Value["Training.RegularizeL2"]:
            oWeightRegularizer = regularizers.L2(self.Config.Value["Training.WeightDecay"])
        else:
            oWeightRegularizer = None
        return oWeightRegularizer          
    # --------------------------------------------------------------------------------------
    def Create(self): 
        self.StemConv1        = layers.Conv2D(self.ConvLayerFeatures[0], kernel_size=(3,3), strides=1, padding="same"
                                          , use_bias=False
                                          , kernel_initializer="glorot_uniform"
                                          , bias_initializer="zeros"
                                          , kernel_regularizer=self.createWeightRegulizer()
                                          )
        self.StemActivation1 = layers.Activation("relu")
        self.StemBatchNorm1  = layers.BatchNormalization()
        
        
        self.StemConv2        = layers.Conv2D(self.ConvLayerFeatures[1], kernel_size=(3,3), strides=2, padding="same"
                                          , use_bias=False
                                          , kernel_initializer="glorot_uniform"
                                          , bias_initializer="zeros"  
                                          , kernel_regularizer=self.createWeightRegulizer()
                                          )     
        self.StemActivation2        = layers.Activation("relu")
        self.StemBatchNorm2         = layers.BatchNormalization()
        

                
        # ..... PLACE YOUR CUSTOM ARCHITECTURE HERE .....
        oCommonModuleConfig={  "Convolution.Features"           : None 
                              ,"Convolution.PaddingSize"        : 1
                              ,"Convolution.WindowSize"         : 3
                              ,"Convolution.Stride"             : 1
                              ,"Convolution.KernelInitializer"  : "glorot_uniform"
                              ,"Convolution.HasBias"            : False
                              ,"Convolution.BiasInitializer"    : None
                              ,"Convolution.RegularizeL2"       : self.Config.Value["Training.RegularizeL2"]
                              ,"Convolution.WeightDecay"        : self.Config.Value["Training.WeightDecay"]
                              ,"ActivationFunction"             : "relu"
                              ,"Normalization"                  : "BatchNormalization"
                            }
      
        # ... = CBasicConvModule(self, oCommonModuleConfig, self.ConvLayerFeatures[3],p_bIsMaxPoolDownsampling=True)
        

      
        # Using Global Average Pooling to flatten the activation tensor into an average vector
        self.GlobalAveragePooling = layers.GlobalAveragePooling2D()
        
        # Using dropout to keep 60% of the neurons randomly in each step of the training process. This mitigates overfitting. 
        self.DropOut = layers.Dropout(rate=0.4)

        # Output layer with class neurons that will use the SoftMax activation function    
        self.Logits = layers.Dense(self.ClassCount
                                         , use_bias=True
                                         , kernel_initializer="glorot_uniform"
                                         , bias_initializer="zeros"
                                         , kernel_regularizer=self.createWeightRegulizer()
                                   )
        self.SoftmaxActivation = layers.Softmax()           
    # --------------------------------------------------------------------------------------------------------
    def call(self, p_tInput):
        # Lazy initialization of the model structure. Will run the logic of adding keras layer to the structure just once.
        bPrint = self.Structure is None
        if bPrint:
            self.Structure = CKerasModelStructure()
            
        # ....... Stem  .......
        tA = p_tInput
        if bPrint:
            self.Structure.Add(tA)
        
        # First learnable convolutional module
        tA = self.StemConv1(tA)
        if bPrint:
            self.Structure.Add(tA)
        
        tA = self.StemActivation1(tA)
        if bPrint:
            self.Structure.Add(tA)
        
        tA = self.StemBatchNorm1(tA)
        if bPrint:
            self.Structure.Add(tA)
        
        # Second learnable convolutional module
        tA = self.StemConv2(tA)
        if bPrint:
            self.Structure.Add(tA)
        
        tA = self.StemActivation2(tA)
        if bPrint:
            self.Structure.Add(tA)
        
        tA = self.StemBatchNorm2(tA)
        if bPrint:
            self.Structure.Add(tA)
        
        
        
        # ....... Core  .......
        # ..... PLACE YOUR CUSTOM ARCHITECTURE HERE .....
      
        
        # ....... Classifier  .......
        tA = self.GlobalAveragePooling(tA)
        if bPrint:
            self.Structure.Add(tA)

        tA = self.DropOut(tA)
        if bPrint:
            self.Structure.Add(tA)
        
        tA = self.Logits(tA)
        if bPrint:
            self.Structure.Add(tA)
        
        tA = self.SoftmaxActivation(tA)
        if bPrint:
            self.Structure.Add(tA)
        
        return tA
    # --------------------------------------------------------------------------------------------------------
# =========================================================================================================================

# Create the Neural Network model and training algorithm objects
**Deep Learning techniques**

*   Learning rate scheduling, for more fine-grained weight updates after some epoch
*   Momentum, to improve convergence and avoid local error minima.



In [None]:
oNN = CMyCustomCNN(CONFIG)

# -----------------------------------------------------------------------------------
def LRSchedule(epoch, lr):
    nNewLR = lr
    for nIndex,oSchedule in enumerate(CONFIG["Training.LearningRateScheduling"]):
        if epoch == oSchedule[0]:
            nNewLR = oSchedule[1]
            print("Schedule #%d: Setting LR to %.5f" % (nIndex+1,nNewLR))
            break
    return nNewLR
# -----------------------------------------------------------------------------------    

nInitialLearningRate    = CONFIG["Training.LearningRate"]  
  

oCostFunction   = tf.keras.losses.CategoricalCrossentropy(from_logits=False)
oOptimizer = tf.keras.optimizers.SGD(learning_rate=nInitialLearningRate, momentum=CONFIG["Training.Momentum"])
if CONFIG["Training.LearningRateScheduling"] is not None:
  oCallbacks = [tf.keras.callbacks.LearningRateScheduler(LRSchedule)]
else:
  oCallbacks = None

#### Inspect the model architecture

In [None]:
# Compile the model for training
sModelFolderName = CONFIG["ModelName"]
        
bIsCompiledForTraining = False
if not os.path.isdir(sModelFolderName) or IS_RETRAINING:
    oNN.compile(loss=oCostFunction, optimizer=oOptimizer, metrics=["accuracy"])
    oNN.predict(oVSData)
    oNN.Structure.Print("Model-Structure-%s.csv" % CONFIG["ModelName"])
    bIsCompiledForTraining = True

### Train and evalute the model

In [None]:
if bIsCompiledForTraining:
    # Train the model
    if IS_DEBUGABLE:
        oNN.run_eagerly = True
        
    oProcessLog = oNN.fit(  oTSData, batch_size=nBatchSize
                            ,epochs=CONFIG["Training.MaxEpoch"]
                            ,validation_data=oVSData
                            ,callbacks=oCallbacks
                          )
    oNN.summary()          
    oNN.save(sModelFolderName)      
else:
    # The model is trained and its state is saved (all the trainable parameters are saved). We load the model to recall the samples 
    oNN = keras.models.load_model(sModelFolderName)
    oProcessLog = None
    oNN.summary()    


# Learning Process Overview

In [None]:
if oProcessLog is not None: # [PYTHON] Checks that object reference is not Null
    # list all data in history
    print("Keys of Keras training process log:", oProcessLog.history.keys())
    
    # Plot the accuracy during the training epochs
    plt.plot(oProcessLog.history['accuracy'])
    plt.plot(oProcessLog.history['val_accuracy'])
    plt.title('CNN Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()
    
    # Plot the error during the training epochs
    sCostFunctionNameParts = oCostFunction.name.split("_")                           # [PYTHON]: Splitting string into an array of strings
    sCostFunctionNameParts = [x.capitalize() + " " for x in sCostFunctionNameParts]  # [PYTHON]: List comprehension example 
    sCostFunctionName = " ".join(sCostFunctionNameParts)                             # [PYTHON]: Joining string in a list with the space between them
    
    
    plt.plot(oProcessLog.history['loss'])
    plt.plot(oProcessLog.history['val_loss'])
    plt.title('CNN ' + sCostFunctionName + " Error")
    plt.ylabel('Error')
    plt.xlabel('Epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()

# Inference / Evaluation

In [None]:
from mllib.evaluation import CEvaluator
from mllib.visualization import CPlotConfusionMatrix

# Takes one minibatch that is the whole VS.
for tImages, tLabels in oVSData.take(1):
    nImages            = tImages.numpy()
    nTargetClassOneHot = tLabels.numpy()  
nTargetClassLabels = np.argmax(nTargetClassOneHot, axis=1)

nPredictedProbabilities = oNN.predict(nImages)
nPredictedClassLabels  = np.argmax(nPredictedProbabilities, axis=1)

oEvaluator = CEvaluator(nTargetClassLabels, nPredictedClassLabels)

oEvaluator.PrintConfusionMatrix()
print("Per Class Recall (Accuracy)  :", oEvaluator.Recall)
print("Per Class Precision          :", oEvaluator.Precision)
print("Average Accuracy: %.4f" % oEvaluator.AverageRecall)
print("Average F1 Score: %.4f" % oEvaluator.AverageF1Score)
      
oConfusionMatrixPlot = CPlotConfusionMatrix(oEvaluator.ConfusionMatrix)
oConfusionMatrixPlot.Show()      
