## IDLS Project

# Effects of Regularization Techniques on Image Classification Tasks

Dataset: Cifar10

Model: ResNet50

Batchnorm Layers: 2, 3

Batch Size: 256

Notebook to collect data using batchsize = 256. Used to compare the performance of Adaptive Gradient Clipping Techniques

References:

https://towardsdatascience.com/understand-and-implement-resnet-50-with-tensorflow-2-0-1190b9b52691

https://github.com/suvoooo/Learn-TensorFlow/blob/master/resnet/Implement_Resnet_TensorFlow.ipynb

In [1]:
import matplotlib.pyplot as plt
from matplotlib.patches import  Rectangle
import tensorflow as tf

import os
import tensorflow.keras as keras
from keras.models import Sequential
from keras.layers import Input, Dense, Conv2D, MaxPool2D,MaxPooling2D, Flatten,BatchNormalization, Dropout,ZeroPadding2D, AveragePooling2D, Add, Activation
from tensorflow.keras.models import Model
from tensorflow.keras.regularizers import l2
from tensorflow.keras import activations
from keras.preprocessing.image import ImageDataGenerator
import numpy as np

from tensorflow.keras.datasets import cifar10
from keras.utils import to_categorical

import time
import pickle

Using TensorFlow backend.


In [2]:
#Define the Model
#Removing all regularizers

def res_identity(x, filters, num_batchnorm = 0, num_dropout=0, dropout_prob = 0):
  #renet block where dimension doesnot change.
  #The skip connection is just simple identity conncection
  #we will have 3 blocks and then input will be added

    x_skip = x # this will be used for addition with the residual block 
    f1, f2 = filters
    bn = num_batchnorm
    drp = num_dropout

    #first block 
    x = Conv2D(f1, kernel_size=(1, 1), strides=(1, 1), padding='valid')(x)
    if bn>0:
        x = BatchNormalization()(x)
        bn-=1
    x = Activation(activations.relu)(x)
    if drp>0:
        x = Dropout(dropout_prob)(x)
        drp-=1

    #second block # bottleneck (but size kept same with padding)
    x = Conv2D(f1, kernel_size=(3, 3), strides=(1, 1), padding='same')(x)
    if bn>0:
        x = BatchNormalization()(x)
        bn-=1
    x = Activation(activations.relu)(x)
    if drp>0:
        x = Dropout(dropout_prob)(x)
        drp-=1

    # third block activation used after adding the input
    x = Conv2D(f2, kernel_size=(1, 1), strides=(1, 1), padding='valid')(x)
    if bn>0:
        x = BatchNormalization()(x)
        bn-=1
    x = Activation(activations.relu)(x)
    if drp>0:
        x = Dropout(dropout_prob)(x)
        drp-=1

    # add the input 
    x = Add()([x, x_skip])
    x = Activation(activations.relu)(x)


    return x

def res_conv(x, s, filters, num_batchnorm = 0, num_dropout=0, dropout_prob = 0):
    x_skip = x
    f1, f2 = filters
    bn = num_batchnorm
    drp = num_dropout

    # first block
    x = Conv2D(f1, kernel_size=(1, 1), strides=(s, s), padding='valid')(x)
    # when s = 2 then it is like downsizing the feature map
    if bn>0:
        x = BatchNormalization()(x)
        bn-=1
    x = Activation(activations.relu)(x)
    if drp>0:
        x = Dropout(dropout_prob)(x)
        drp-=1

    # second block
    x = Conv2D(f1, kernel_size=(3, 3), strides=(1, 1), padding='same')(x)
    if bn>0:
        x = BatchNormalization()(x)
        bn-=1
    x = Activation(activations.relu)(x)
    if drp>0:
        x = Dropout(dropout_prob)(x)
        drp-=1

    #third block
    x = Conv2D(f2, kernel_size=(1, 1), strides=(1, 1), padding='valid')(x)
    if bn>0:
        x = BatchNormalization()(x)
        bn-=1

    # shortcut 
    x_skip = Conv2D(f2, kernel_size=(1, 1), strides=(s, s), padding='valid')(x_skip)
    x_skip = BatchNormalization()(x_skip)

    # add 
    x = Add()([x, x_skip])
    x = Activation(activations.relu)(x)

    return x

def resnet50(num_batchnorm = 0, bn_pooling = False, dropout_prob=0, num_dropout_conv = 0, num_dropout_id = 0, num_dropout=0):

    input_im = Input(shape=(32, 32, 3)) # cifar 10 images size
    x = ZeroPadding2D(padding=(3, 3))(input_im)
    bn = num_batchnorm
    drp = num_dropout

    # 1st stage
    # here we perform maxpooling, see the figure above

    x = Conv2D(64, kernel_size=(7, 7), strides=(2, 2))(x)
    if bn_pooling:
        x = BatchNormalization()(x)
    x = Activation(activations.relu)(x)
    x = MaxPooling2D((3, 3), strides=(2, 2))(x)
    if drp>0:
        x = Dropout(dropout_prob)(x)
        drp-=1

    #2nd stage 
    # frm here on only conv block and identity block, no pooling

    x = res_conv(x, s=1, filters=(64, 256),num_batchnorm=bn, num_dropout = num_dropout_conv, dropout_prob = dropout_prob)
    x = res_identity(x, filters=(64, 256),num_batchnorm=bn, num_dropout = num_dropout_id, dropout_prob = dropout_prob)
    x = res_identity(x, filters=(64, 256),num_batchnorm=bn, num_dropout = num_dropout_id, dropout_prob = dropout_prob)

    # 3rd stage

    x = res_conv(x, s=2, filters=(128, 512),num_batchnorm=bn, num_dropout = num_dropout_conv, dropout_prob = dropout_prob)
    x = res_identity(x, filters=(128, 512),num_batchnorm=bn, num_dropout = num_dropout_id, dropout_prob = dropout_prob)
    x = res_identity(x, filters=(128, 512),num_batchnorm=bn, num_dropout = num_dropout_id, dropout_prob = dropout_prob)
    x = res_identity(x, filters=(128, 512),num_batchnorm=bn, num_dropout = num_dropout_id, dropout_prob = dropout_prob)

    # 4th stage

    x = res_conv(x, s=2, filters=(256, 1024),num_batchnorm=bn, num_dropout = num_dropout_conv, dropout_prob = dropout_prob)
    x = res_identity(x, filters=(256, 1024),num_batchnorm=bn, num_dropout = num_dropout_id, dropout_prob = dropout_prob)
    x = res_identity(x, filters=(256, 1024),num_batchnorm=bn, num_dropout = num_dropout_id, dropout_prob = dropout_prob)
    x = res_identity(x, filters=(256, 1024),num_batchnorm=bn, num_dropout = num_dropout_id, dropout_prob = dropout_prob)
    x = res_identity(x, filters=(256, 1024),num_batchnorm=bn, num_dropout = num_dropout_id, dropout_prob = dropout_prob)
    x = res_identity(x, filters=(256, 1024),num_batchnorm=bn, num_dropout = num_dropout_id, dropout_prob = dropout_prob)

    # 5th stage

    x = res_conv(x, s=2, filters=(512, 2048),num_batchnorm=bn, num_dropout = num_dropout_conv, dropout_prob = dropout_prob)
    x = res_identity(x, filters=(512, 2048),num_batchnorm=bn, num_dropout = num_dropout_id, dropout_prob = dropout_prob)
    x = res_identity(x, filters=(512, 2048),num_batchnorm=bn, num_dropout = num_dropout_id, dropout_prob = dropout_prob)

    # ends with average pooling and dense connection

    x = AveragePooling2D((2, 2), padding='same')(x)

    x = Flatten()(x)
    if drp>0:
        x = Dropout(dropout_prob)(x)
        drp-=1
    x = Dense(10, activation='softmax')(x) #multi-class

    # define the model 

    model = Model(inputs=input_im, outputs=x, name='Resnet50')

    return model

In [3]:
#to measure Time to Accuracy
#https://keras.io/guides/writing_your_own_callbacks/

class timeToAccuracy(keras.callbacks.Callback):
    def __init__(self, startTime, epoch_ckpt, model_name):
        super(timeToAccuracy, self).__init__()
        self.targetAcc = 0.87 #CHANGE TO 0.87 WHEN RUNNING MODEL
        self.foundTarget = False
        self.startTime = startTime
        self.epoch = 0
        self.epoch_ckpt = epoch_ckpt
        self.model_name = model_name
    def on_epoch_end(self, epoch, logs=None):
        if self.epoch % self.epoch_ckpt == 0:
            name = self.model_name + '.h5'
            self.model.save_weights(name)
        self.epoch += 1
        
        if not self.foundTarget:
            if logs['accuracy'] >= self.targetAcc:
                current = time.time()
                print("Time to reach {} accuracy: {} s".format(self.targetAcc, current-self.startTime))
                with open('{}_tta.pkl'.format(model_name), 'wb') as file:
                    pickle.dump(current-self.startTime, file)
                self.foundTarget = True

In [4]:
#Fit, evaluate and checkpoint
def fit_resnet(model, xtrain, ytrain, xtest, ytest, model_name):
  
    EPOCHS = 100
    BATCH_SIZE=256
    VERBOSITY = 0   #Change to 0 when actually running model
    EPOCH_CKPT = 10 # save model every N epochs

    opt = keras.optimizers.Adam(learning_rate=1e-3)

    model.compile(loss = keras.losses.categorical_crossentropy, optimizer = opt, metrics=['accuracy'])
    #fit the model
    start = time.time()
    model.fit(
        xtrain,
        ytrain,
        batch_size=BATCH_SIZE,
        verbose=VERBOSITY,
        epochs = EPOCHS,
        callbacks=[timeToAccuracy(startTime=start, epoch_ckpt=EPOCH_CKPT, model_name=model_name)]
    )
    train_time = time.time() - start

    #evaluate
    score = model.evaluate(xtest,ytest)
    loss = score[0]
    acc = score[1]

    return train_time,  acc

In [5]:
#prepare data

(x_train, y_train), (x_test, y_test) = cifar10.load_data() 

# Data normalization
x_train = x_train/255
x_test = x_test/255
y_train = to_categorical(y_train) 
y_test = to_categorical(y_test)

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [6]:
#BN=2, dropout_prob=0.2
num_dropout = [0,1,2,3]
for dp in num_dropout:
    if dp:
        prob=0.2
        print('Training BN2, DPID{}, DPCONV{}, DPPROB{}'.format(dp, dp, prob))
        model_name = 'model_256_bn_2_dpid_{}_dpconv_{}_dpprob_{}.pkl'.format(dp, dp, prob)
        resnet_model = resnet50(num_batchnorm=2, bn_pooling=True, num_dropout_conv=dp, num_dropout_id=dp, dropout_prob=prob)
        tt, acc = fit_resnet(resnet_model,x_train, y_train, x_test, y_test, model_name)
            
        print('Final accuracy {} reached in {}'.format(acc, tt))
    else:
        prob = 0
        print('Training BN2, DPID{}, DPCONV{}, DPPROB{}'.format(dp, dp, prob))
        model_name = 'model_256_bn_2_dpid_{}_dpconv_{}_dpprob_{}.pkl'.format(dp, dp, prob)
        resnet_model = resnet50(num_batchnorm=2, bn_pooling=True, num_dropout_conv=dp, num_dropout_id=dp, dropout_prob=prob)
        tt, acc = fit_resnet(resnet_model,x_train, y_train, x_test, y_test, model_name)
            
        print('Final accuracy {} reached in {}'.format(acc, tt))

Training BN2, DPID0, DPCONV0, DPPROB0
Final accuracy 0.6614000201225281 reached in 739.5686771869659
Training BN2, DPID1, DPCONV1, DPPROB0.2
Time to reach 0.87 accuracy: 84.46076798439026 s
Final accuracy 0.7508999705314636 reached in 728.4942226409912
Training BN2, DPID2, DPCONV2, DPPROB0.2
Time to reach 0.87 accuracy: 130.88760042190552 s
Final accuracy 0.7585999965667725 reached in 746.7530069351196
Training BN2, DPID3, DPCONV3, DPPROB0.2
Time to reach 0.87 accuracy: 135.1029691696167 s
Final accuracy 0.7451000213623047 reached in 816.8732273578644


In [7]:
#BN=3, dropout_prob=0.2
num_dropout = [0,1,2,3]
for dp in num_dropout:
    if dp:
        prob=0.2
        print('Training BN3, DPID{}, DPCONV{}, DPPROB{}'.format(dp, dp, prob))
        model_name = 'model_256_bn_3_dpid_{}_dpconv_{}_dpprob_{}.pkl'.format(dp, dp, prob)
        resnet_model = resnet50(num_batchnorm=3, bn_pooling=True, num_dropout_conv=dp, num_dropout_id=dp, dropout_prob=prob)
        tt, acc = fit_resnet(resnet_model,x_train, y_train, x_test, y_test, model_name)
            
        print('Final accuracy {} reached in {}'.format(acc, tt))
    else:
        prob = 0
        print('Training BN3, DPID{}, DPCONV{}, DPPROB{}'.format(dp, dp, prob))
        model_name = 'model_256_bn_3_dpid_{}_dpconv_{}_dpprob_{}.pkl'.format(dp, dp, prob)
        resnet_model = resnet50(num_batchnorm=3, bn_pooling=True, num_dropout_conv=dp, num_dropout_id=dp, dropout_prob=prob)
        tt, acc = fit_resnet(resnet_model,x_train, y_train, x_test, y_test, model_name)
            
        print('Final accuracy {} reached in {}'.format(acc, tt))

Training BN3, DPID0, DPCONV0, DPPROB0
Time to reach 0.87 accuracy: 294.1397042274475 s
Final accuracy 0.7091000080108643 reached in 827.2737009525299
Training BN3, DPID1, DPCONV1, DPPROB0.2
Time to reach 0.87 accuracy: 364.3352873325348 s
Final accuracy 0.7146999835968018 reached in 840.2007429599762
Training BN3, DPID2, DPCONV2, DPPROB0.2
Time to reach 0.87 accuracy: 515.2273375988007 s
Final accuracy 0.7390000224113464 reached in 854.6738781929016
Training BN3, DPID3, DPCONV3, DPPROB0.2
Time to reach 0.87 accuracy: 538.9585971832275 s
Final accuracy 0.7583000063896179 reached in 877.1752090454102
