# CNN Image Recognition Architecture Implementation


In [1]:
#GETTING THE CIFAR DATASET READY FOR EXPERIMENTATION
import tensorflow_datasets as tfds
import tensorflow as     tf
import math
import numpy             as np
import matplotlib.pyplot as plt
from tensorflow.keras.layers import *

## CNN Architecture Utilities

In [2]:
from utils.cifar_functions import *

#PREPARE THE CIFAR DATASET
# download data and split into training and testing datasets
dataset_train, info = tfds.load("cifar10", split=tfds.Split.TRAIN, with_info=True)
dataset_test,  info = tfds.load("cifar10", split=tfds.Split.TEST,  with_info=True)

dataset_train = dataset_train.map(pre_processing_train, num_parallel_calls=4)
dataset_train = dataset_train.shuffle(buffer_size=TRAINING_SHUFFLE_BUFFER)
dataset_train = dataset_train.batch(TRAINING_BATCH_SIZE)
dataset_train = dataset_train.prefetch(buffer_size=3)

# transform testing dataset
dataset_test = dataset_test.map(pre_processing_test, num_parallel_calls=4)
dataset_test = dataset_test.batch(TRAINING_BATCH_SIZE)
dataset_test = dataset_test.prefetch(buffer_size=3)

## ResNeXT Architecture

### Sources
- [1](https://arxiv.org/pdf/1611.05431.pdf)

### Intro

__VGGNet__ style architecture of stacking identical blocks of convolutional operations on top of each other to form a deep network is popular because it reduces the complexiy of designing a network. (Reduces the hyperparameters in design)



*The __Inception models__ have evolved
over time [38, 39], but an important common property is
a __split-transform-merge strategy__. In an Inception module,
the input is split into a few lower-dimensional embeddings
(by 1×1 convolutions), transformed by a set of specialized
filters (3×3, 5×5, etc.), and merged by concatenation.*

Similar to what we learned with __ResNet__ paper. Training multiple pathways must take major compute. So the designers of Inception used a bottleneck to reduce the computational complexity. __More representation for less compute.__

### Method


#### Aggregated Transformations

The author argues for an *aggregated transformation* $$F(x) = \sum^{C}_{i=1}\tau_i(x)$$ where C is the __cardinality__, and $\tau_i$ are bottleneck-shaped architectures.


Combined with ResNet style connections between layers, the final model takes the form 
$$y = x+\sum^{C}_{i=1}\tau_i(x)$$


### Implementaion Details
![](imgs/resnext.PNG)

*Our models are realized by the form of Fig. 3(c). We
perform batch normalization (BN) [17] right after the con
volutions in Fig. 3(c).6 ReLU is performed right after each
BN, expect for the output of the block where ReLU is performed after the adding to the shortcut, following [14].
We note that the three forms in Fig. 3 are strictly equivalent, when BN and ReLU are appropriately addressed as
mentioned above. We have trained all three forms and
obtained the same results. We choose to implement by
Fig. 3(c) because it is more succinct and faster than the other
two forms*




In [3]:
#RESNET V1
def generic_tail(inputs, dims):
    return Conv2D(dims, (3,3), activation='relu', padding='same')(inputs)

def resnext_block(inputs, expand_dims, cardinality=8, squeeze_ratio=.25, downsample=False):
    """
    Implementation of the ResNext Style blocks introduced in "Aggregated Residual 
    Transformations for Deep Neural Networks"

    Parameters:
    inputs: tensor -  shape(batch, height, width, channels) input tensor for the block
    expand_dims: int - number of dimensions in the channel dimension that the block will output
    cardinality: int - number of parallel and identical bottlenecks within a block.
    squeeze_ratio: float - the relative size of the channel dimension to compress to in bottleneck
    downsample: bool - whether this block should VGG-style downsample (divide HxW by 2)

    """
    to_aggregate = []
    squeeze_dims = int(expand_dims//(1/squeeze_ratio))
    if expand_dims>=64:
        squeeze_dims= squeeze_dims//4
    strides=(1,1)
    if downsample:
        strides=(2,2)
    
    #RESIDUAL PATHS
    for i in range(cardinality):
        x = conv_block(inputs, squeeze_dims, kernel_size=(1,1), strides=(1,1))
        x = conv_block(x, squeeze_dims, kernel_size=(3,3), strides=strides)
        x = Conv2D(expand_dims, (1,1), strides=(1,1), **conv_params)(x)
        x = BatchNormalization(**bn_params)(x)
        to_aggregate.append(x)
    path_outputs = Add()(to_aggregate) #ADD PATHS TOGETHER
    
    #IDENTITY PATH
    if downsample:
        inputs = Conv2D(expand_dims,(1,1), strides=strides, **conv_params)(inputs)
    
    #COMBINE
    return tf.nn.relu(Add()([inputs, path_outputs]))

def generic_head(inputs, dims=None):
    x = GlobalAveragePooling2D()(inputs)
    return Dense(DATA_NUM_CLASSES, activation='softmax')(x)

In [4]:
resnext = VGG_Like_CNN(generic_tail,
            resnext_block,
            generic_head,
            input_shape = (DATA_CROP_ROWS, DATA_CROP_COLS, DATA_CHANNELS),
            num_levels= 3,
            num_downsamples=2,
            block_repeats=BLOCK_REPEATS)

resnext.compile(optimizer = tf.keras.optimizers.Adam(TRAINING_LR_MAX),
              loss='sparse_categorical_crossentropy',
               metrics=['accuracy'])

In [5]:
print("Number of trainable parameters: ", get_num_params(resnext))

Number of trainable parameters:  216971


In [6]:
hist = train(resnext, dataset_train, dataset_test, 'resnext', logs=True)

######################################################
resnext
######################################################




Training model resnext...
Epoch 1/30
Epoch 2/30

KeyError: 'val_accuracy'

In [None]:
benchmark(resnext, dataset_test, hist, 'resnext')

### More
https://paperswithcode.com/paper/shakedrop-regularization-for-deep-residual

## MobileNet V3 Architecture

### Sources
- [1](https://arxiv.org/abs/1905.02244v5)

In [None]:
#MobileNetV3 Implementation  