# CNN on CIFAR Assignment
<ol>
<li>Please visit this link to access the state-of-art DenseNet code for reference - DenseNet - cifar10 notebook link
<li>You need to create a copy of this and "retrain" this model to achieve 90+ test accuracy.
<li>You cannot use Dense Layers (also called fully connected layers), or DropOut.
<li>You MUST use Image Augmentation Techniques.
<li>You cannot use an already trained model as a beginning points, you have to initilize as your own
<li>You cannot run the program for more than 300 Epochs, and it should be clear from your log, that you have only used 300 Epochs
<li>You cannot use test images for training the model.
<li>You cannot change the general architecture of DenseNet (which means you must use Dense Block, Transition and Output blocks as mentioned in the code)
<li>You are free to change Convolution types (e.g. from 3x3 normal convolution to Depthwise Separable, etc)
<li>You cannot have more than 1 Million parameters in total
<li>You are free to move the code from Keras to Tensorflow, Pytorch, MXNET etc.
<li>You can use any optimization algorithm you need.
<li>You can checkpoint your model and retrain the model from that checkpoint so that no need of training the model from first if you lost at any epoch while training. You can directly load that model and Train from that epoch.
</ol>

In [1]:
# import keras
# from keras.datasets import cifar10
# from keras.models import Model, Sequential
# from keras.layers import Dense, Dropout, Flatten, Input, AveragePooling2D, merge, Activation
# from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
# from keras.layers import Concatenate
# from keras.optimizers import Adam
import numpy as np
np.random.seed(42)
from tensorflow.keras import models, layers
from tensorflow.keras.models import Model
from tensorflow.keras.layers import BatchNormalization, Activation, Flatten
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.regularizers import l1
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger
# this part will prevent tensorflow to allocate all the avaliable GPU Memory
# backend
import tensorflow as tf
from tensorflow import keras
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())
tf.config.list_physical_devices('GPU')

Using TensorFlow backend.


[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 11382562692963312915
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 3135687884
locality {
  bus_id: 1
  links {
  }
}
incarnation: 10717626493672370588
physical_device_desc: "device: 0, name: GeForce GTX 1050 Ti, pci bus id: 0000:01:00.0, compute capability: 6.1"
]


[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [2]:
# Hyperparameters
num_classes = 10
l = 12
num_filter = 12
compression = 0.5

In [3]:
# Load CIFAR10 Data
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()
img_height, img_width, channel = X_train.shape[1],X_train.shape[2],X_train.shape[3]

# convert to one hot encoing 
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes) 

In [4]:
X_train.shape

(50000, 32, 32, 3)

In [5]:
X_test.shape

(10000, 32, 32, 3)

In [6]:
# Refer: https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html
train_datagen = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)

test_datagen = ImageDataGenerator(
    rescale = 1./255)

train_datagen.fit(X_train)
test_datagen.fit(X_test)

In [7]:
# Dense Block
def denseblock(input, num_filter = 12):
    global compression
    temp = input
    for _ in range(l): 
        BatchNorm = layers.BatchNormalization()(temp)
        relu = layers.Activation('relu')(BatchNorm)
        Conv2D_3_3 = layers.Conv2D(int(num_filter*compression), (3,3), use_bias=False ,padding='same', kernel_initializer=\
                                  keras.initializers.glorot_normal(seed=42))(relu)
        concat = layers.Concatenate(axis=-1)([temp,Conv2D_3_3])
        temp = concat
        
    return temp

## transition Block
def transition(input, num_filter = 12):
    global compression
    BatchNorm = layers.BatchNormalization()(input)
    relu = layers.Activation('relu')(BatchNorm)
    Conv2D_BottleNeck = layers.Conv2D(int(num_filter*compression), (1,1), use_bias=False ,padding='same', \
                                      kernel_initializer= keras.initializers.glorot_normal(seed=42))(relu)
    avg = layers.AveragePooling2D(pool_size=(2,2))(Conv2D_BottleNeck)
    return avg

#output layer
def output_layer(input):
    global compression
    BatchNorm = layers.BatchNormalization()(input)
    relu = layers.Activation('relu')(BatchNorm)
    AvgPooling = layers.AveragePooling2D(pool_size=(2,2))(relu)
    output_1 = layers.Conv2D(num_classes, (2,2), kernel_initializer=keras.initializers.glorot_normal(seed=42))(AvgPooling)
    out_1 = Activation('softmax')(output_1)
    flat = layers.Flatten()(out_1)
    return flat

In [8]:
num_filter = 37

input = layers.Input(shape=(img_height, img_width, channel))
First_Conv2D = layers.Conv2D(num_filter, (3,3), use_bias=False ,padding='same')(input)

First_Block = denseblock(First_Conv2D, num_filter)
First_Transition = transition(First_Block, num_filter)

Second_Block = denseblock(First_Transition, num_filter)
Second_Transition = transition(Second_Block, num_filter)

Third_Block = denseblock(Second_Transition, num_filter)
Third_Transition = transition(Third_Block, num_filter)

Last_Block = denseblock(Third_Transition,  num_filter)
output = output_layer(Last_Block)

In [9]:
#https://arxiv.org/pdf/1608.06993.pdf
# from IPython.display import IFrame, YouTubeVideo
# YouTubeVideo(id='-W6y8xnd--U', width=600)

In [10]:
model = Model(inputs=[input], outputs=[output])
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 32, 32, 3)]  0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 32, 32, 37)   999         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 32, 32, 37)   148         conv2d[0][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, 32, 32, 37)   0           batch_normalization[0][0]        
______________________________________________________________________________________________

In [11]:
# determine Loss function and Optimizer
sgd = SGD(learning_rate=0.1, momentum=0.9)
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])

In [24]:
csv_logger = CSVLogger('training6.csv', append=True)

In [25]:
early_stop = tf.keras.callbacks.EarlyStopping(
    monitor='val_accuracy', patience=3, verbose=0, mode='auto', restore_best_weights=False
)

In [26]:
model.fit_generator(train_datagen.flow(X_train, y_train, batch_size=32),
                    epochs=50,
                    steps_per_epoch = X_train.shape[0]/32,
                    verbose=1, 
                    validation_data=test_datagen.flow(X_test, y_test, batch_size=32),
                   callbacks=[csv_logger])

W0321 13:13:39.592768  9596 data_adapter.py:1091] sample_weight modes were coerced from
  ...
    to  
  ['...']
W0321 13:13:39.609715  9596 data_adapter.py:1091] sample_weight modes were coerced from
  ...
    to  
  ['...']


Train for 1562.5 steps, validate for 313 steps
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x18181c99160>

**Epoch: 50 done**

In [27]:
model.fit_generator(train_datagen.flow(X_train, y_train, batch_size=32),
                    epochs=50,
                    steps_per_epoch = X_train.shape[0]/32,
                    verbose=1, 
                    validation_data=test_datagen.flow(X_test, y_test, batch_size=32),
                   callbacks=[csv_logger])

W0321 16:23:05.149328  9596 data_adapter.py:1091] sample_weight modes were coerced from
  ...
    to  
  ['...']
W0321 16:23:05.166312  9596 data_adapter.py:1091] sample_weight modes were coerced from
  ...
    to  
  ['...']


Train for 1562.5 steps, validate for 313 steps
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x18181cd5160>

**Epoch: 100 done**

In [28]:
model.save_weights('Denset_Checkpt1.h5')

In [29]:
model.fit_generator(train_datagen.flow(X_train, y_train, batch_size=32),
                    epochs=50,
                    steps_per_epoch = X_train.shape[0]/32,
                    verbose=1, 
                    validation_data=test_datagen.flow(X_test, y_test, batch_size=32),
                   callbacks=[csv_logger])

W0321 19:28:33.631057  9596 data_adapter.py:1091] sample_weight modes were coerced from
  ...
    to  
  ['...']
W0321 19:28:33.649005  9596 data_adapter.py:1091] sample_weight modes were coerced from
  ...
    to  
  ['...']


Train for 1562.5 steps, validate for 313 steps
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x18181d19390>

**Epoch: 150 done**

In [30]:
tf.keras.backend.set_value(model.optimizer.lr, 0.01)

In [31]:
model.fit_generator(train_datagen.flow(X_train, y_train, batch_size=32),
                    epochs=5,
                    steps_per_epoch = X_train.shape[0]/32,
                    verbose=1, 
                    validation_data=test_datagen.flow(X_test, y_test, batch_size=32),
                   callbacks=[csv_logger])

W0321 22:33:33.005362  9596 data_adapter.py:1091] sample_weight modes were coerced from
  ...
    to  
  ['...']
W0321 22:33:33.024336  9596 data_adapter.py:1091] sample_weight modes were coerced from
  ...
    to  
  ['...']


Train for 1562.5 steps, validate for 313 steps
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x18185e980f0>

**Epoch: 155 done**

__Stopped trained epoch here to avoid overfitting__ <br>
__Achieved above 90% test accuracy as per assignment task__

In [32]:
model.save_weights('Denset_Checkpt2.h5')

In [33]:
# Test the model
score = model.evaluate(X_test/255, y_test, verbose=1)



In [34]:
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.29551431020498276
Test accuracy: 0.9124
