<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Training-Mechanics" data-toc-modified-id="Training-Mechanics-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Training Mechanics</a></span><ul class="toc-item"><li><span><a href="#Custom-Training" data-toc-modified-id="Custom-Training-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>Custom Training</a></span></li><li><span><a href="#TensorFlow-GPU" data-toc-modified-id="TensorFlow-GPU-1.2"><span class="toc-item-num">1.2&nbsp;&nbsp;</span>TensorFlow GPU</a></span></li><li><span><a href="#TensorFlow-Model-Optimization" data-toc-modified-id="TensorFlow-Model-Optimization-1.3"><span class="toc-item-num">1.3&nbsp;&nbsp;</span>TensorFlow Model Optimization</a></span></li><li><span><a href="#Debugging" data-toc-modified-id="Debugging-1.4"><span class="toc-item-num">1.4&nbsp;&nbsp;</span>Debugging</a></span></li></ul></li></ul></div>

# Training Mechanics

## Custom Training


In [7]:
import tensorflow as tf
from tensorflow.keras.layers import *
import os
import matplotlib.pyplot as plt


print(tf.__version__)

2.1.0


In [8]:
import importlib.util
spec = importlib.util.spec_from_file_location("cifar_functions", "C://Users/harri/Desktop/2020_Notes/DL/Implementations/CV/Image_Recognition/utils/cifar_functions.py")
utils = importlib.util.module_from_spec(spec)
spec.loader.exec_module(utils)

In [15]:
DATA_NUM_CLASSES        = 10
DATA_CHANNELS           = 3
DATA_ROWS               = 32
DATA_COLS               = 32
DATA_CROP_ROWS          = 28
DATA_CROP_COLS          = 28
DATA_MEAN               = np.array([[[125.30691805, 122.95039414, 113.86538318]]]) # CIFAR10
DATA_STD_DEV            = np.array([[[ 62.99321928,  62.08870764,  66.70489964]]]) # CIFAR10

# model
MODEL_LEVEL_0_BLOCKS    = 4
MODEL_LEVEL_1_BLOCKS    = 4
MODEL_LEVEL_2_BLOCKS    = 5
BLOCK_REPEATS = [MODEL_LEVEL_0_BLOCKS,MODEL_LEVEL_1_BLOCKS,MODEL_LEVEL_2_BLOCKS]

# training
TRAINING_BATCH_SIZE      = 64
TRAINING_SHUFFLE_BUFFER  = 5000
TRAINING_BN_MOMENTUM     = 0.9
TRAINING_BN_EPSILON      = 0.001

TRAINING_LR_MAX          = 0.001
TRAINING_LR_INIT_SCALE   = 0.01
TRAINING_LR_INIT_EPOCHS  = 5
TRAINING_LR_FINAL_SCALE  = 0.01
TRAINING_LR_FINAL_EPOCHS = 25

# training (derived)
TRAINING_NUM_EPOCHS = TRAINING_LR_INIT_EPOCHS + TRAINING_LR_FINAL_EPOCHS
TRAINING_LR_INIT    = TRAINING_LR_MAX*TRAINING_LR_INIT_SCALE
TRAINING_LR_FINAL   = TRAINING_LR_MAX*TRAINING_LR_FINAL_SCALE

# saving
SAVE_MODEL_PATH = 'F://Models/Model_Design/'

conv_params = {"padding":'same',
              "use_bias":False,
              "activation":None}

bn_params = {"axis":-1,
             "momentum":TRAINING_BN_MOMENTUM, 
             "epsilon":TRAINING_BN_EPSILON, 
             "center":True, 
             "scale":True}

##########################################################################

dataset_train, dataset_test = utils.load_cifar()

##################################################################################

def mobilenet_v2_tail(inputs, dims):
    return Conv2D(dims, (3,3), **conv_params)(inputs)

def inverted_residual(inputs, squeeze_dims, expand_ratio=4, downsample=False):
    """
    inputs: Tensor- input to the first layer
    squeeze_dims: int - depth of the channel dimension after squeeze
    expand_ratio: float - multiplier to expand channel dimenions after bottleneck
    downsample: bool - whether to downsample the image
    
    Inverted residual a la MobileNet V2 note the channel dimension will
     be expanded by pointwise conv, processed with depthwise conv, then 
     compressed by a linear bottleneck
    """
    
    expand_dims = int(squeeze_dims * expand_ratio)
    if downsample:
        strides =(2,2)
    else:
        strides=(1,1)
                
    #RESIDUAL PATH (EXPAND - > COMPRESS)
    x = Conv2D(expand_dims, (1, 1), strides=strides, **conv_params)(inputs) #expansion (linear comb across channels)
    x = BatchNormalization(**bn_params)(x)
    x = ReLU(max_value=6)(x)  # the paper uses ReLU6, a thresholded ReLU (3-bit output)

    x = DepthwiseConv2D((3,3), strides=(1,1), **conv_params)(x) #depthwise conv (for spatial data)
    x = BatchNormalization(**bn_params)(x)
    x = ReLU(max_value=6)(x)
    
    x = Conv2D(squeeze_dims, (1, 1), strides=(1, 1), **conv_params)(x) #bottleneck layer
    x = BatchNormalization(**bn_params)(x) #No activation here (Linear BottleNeck)
    
    #IDENTITY PATH
    if downsample: # maintain dimensions during downsampling
        inputs = Conv2D(squeeze_dims, (1, 1), strides=strides, padding='same')(inputs)
    
    #COMBINE
    return Add()([x, inputs])

def mobilenet_v2_head(inputs, dims=None):
    x = Conv2D(dims, (1, 1), strides=(1,1), padding='same')(inputs)
    x = GlobalAveragePooling2D()(x)
    return Dense(DATA_NUM_CLASSES, activation='softmax')(x)

In [16]:
mobilenet_v2 = utils.VGG_Like_CNN(mobilenet_v2_tail,
            inverted_residual,
            mobilenet_v2_head,
            input_shape = (DATA_CROP_ROWS, DATA_CROP_COLS, DATA_CHANNELS),
            num_levels= 3,
            num_downsamples=2,
            block_repeats=BLOCK_REPEATS)

mobilenet_v2.compile(optimizer = tf.keras.optimizers.Adam(TRAINING_LR_MAX),
              loss='sparse_categorical_crossentropy',
               metrics=['accuracy'])

In [None]:
hist = utils.train(mobilenet_v2, dataset_train, dataset_test, 'mobilenet_v2', logs=True, save=False)

In [None]:
utils.benchmark(mobilenet_v2, dataset_test, hist, 'mobilenet_v2')

## TensorFlow GPU

https://www.tensorflow.org/guide/gpu

https://medium.com/@colinshaw_36798/fully-utilizing-your-deep-learning-gpus-61ee7acd3e57

https://data-flair.training/blogs/gpu-in-tensorflow/

https://www.youtube.com/watch?v=26t8MfP8Fo0

## TensorFlow Model Optimization

- [1](https://www.tensorflow.org/model_optimization/guide)
- [2](https://www.tensorflow.org/guide/graph_optimization)


Minimize the complexity of a model during inference,

## Debugging
- [1](https://www.tensorflow.org/api_docs/python/tf/debugging)