### ResNet50 Model

References:

https://towardsdatascience.com/understand-and-implement-resnet-50-with-tensorflow-2-0-1190b9b52691

https://github.com/suvoooo/Learn-TensorFlow/blob/master/resnet/Implement_Resnet_TensorFlow.ipynb

In [1]:
import matplotlib.pyplot as plt
from matplotlib.patches import  Rectangle
import tensorflow as tf

import os
import tensorflow.keras as keras
from keras.models import Sequential
from keras.layers import Input, Dense, Conv2D, MaxPool2D,MaxPooling2D, Flatten,BatchNormalization, Dropout,ZeroPadding2D, AveragePooling2D, Add, Activation
from tensorflow.keras.models import Model
from tensorflow.keras.regularizers import l2
from tensorflow.keras import activations
from keras.preprocessing.image import ImageDataGenerator
import numpy as np

from tensorflow.keras.datasets import cifar10
from keras.utils import to_categorical

import time

Using TensorFlow backend.


In [2]:
 #Removing all regularizers

def res_identity(x, filters, num_batchnorm = 0, num_dropout=0, dropout_prob = 0):
  #renet block where dimension doesnot change.
  #The skip connection is just simple identity conncection
  #we will have 3 blocks and then input will be added

  x_skip = x # this will be used for addition with the residual block 
  f1, f2 = filters
  bn = num_batchnorm
  drp = num_dropout

  #first block 
  x = Conv2D(f1, kernel_size=(1, 1), strides=(1, 1), padding='valid')(x)
  if bn>0:
    x = BatchNormalization()(x)
    bn-=1
  x = Activation(activations.relu)(x)
  if drp>0:
    x = Dropout(dropout_prob)(x)
    drp-=1

  #second block # bottleneck (but size kept same with padding)
  x = Conv2D(f1, kernel_size=(3, 3), strides=(1, 1), padding='same')(x)
  if bn>0:
    x = BatchNormalization()(x)
    bn-=1
  x = Activation(activations.relu)(x)
  if drp>0:
    x = Dropout(dropout_prob)(x)
    drp-=1

  # third block activation used after adding the input
  x = Conv2D(f2, kernel_size=(1, 1), strides=(1, 1), padding='valid')(x)
  if bn>0:
    x = BatchNormalization()(x)
    bn-=1
  x = Activation(activations.relu)(x)
  if drp>0:
    x = Dropout(dropout_prob)(x)
    drp-=1

  # add the input 
  x = Add()([x, x_skip])
  x = Activation(activations.relu)(x)


  return x

def res_conv(x, s, filters, num_batchnorm = 0, num_dropout=0, dropout_prob = 0):
  '''
  here the input size changes''' 
  x_skip = x
  f1, f2 = filters
  bn = num_batchnorm
  drp = num_dropout

  # first block
  x = Conv2D(f1, kernel_size=(1, 1), strides=(s, s), padding='valid')(x)
  # when s = 2 then it is like downsizing the feature map
  if bn>0:
    x = BatchNormalization()(x)
    bn-=1
  x = Activation(activations.relu)(x)
  if drp>0:
    x = Dropout(dropout_prob)(x)
    drp-=1

  # second block
  x = Conv2D(f1, kernel_size=(3, 3), strides=(1, 1), padding='same')(x)
  if bn>0:
    x = BatchNormalization()(x)
    bn-=1
  x = Activation(activations.relu)(x)
  if drp>0:
    x = Dropout(dropout_prob)(x)
    drp-=1

  #third block
  x = Conv2D(f2, kernel_size=(1, 1), strides=(1, 1), padding='valid')(x)
  if bn>0:
    x = BatchNormalization()(x)
    bn-=1

  # shortcut 
  x_skip = Conv2D(f2, kernel_size=(1, 1), strides=(s, s), padding='valid')(x_skip)
  x_skip = BatchNormalization()(x_skip)

  # add 
  x = Add()([x, x_skip])
  x = Activation(activations.relu)(x)

  return x

def resnet50(num_batchnorm = 0, bn_pooling = False, dropout_prob=0, num_dropout_conv = 0, num_dropout_id = 0, num_dropout=0):

  input_im = Input(shape=(32, 32, 3)) # cifar 10 images size
  x = ZeroPadding2D(padding=(3, 3))(input_im)
  bn = num_batchnorm
  drp = num_dropout

  # 1st stage
  # here we perform maxpooling, see the figure above

  x = Conv2D(64, kernel_size=(7, 7), strides=(2, 2))(x)
  if bn_pooling:
    x = BatchNormalization()(x)
  x = Activation(activations.relu)(x)
  x = MaxPooling2D((3, 3), strides=(2, 2))(x)
  if drp>0:
    x = Dropout(dropout_prob)(x)
    drp-=1

  #2nd stage 
  # frm here on only conv block and identity block, no pooling

  x = res_conv(x, s=1, filters=(64, 256),num_batchnorm=bn, num_dropout = num_dropout_conv, dropout_prob = dropout_prob)
  x = res_identity(x, filters=(64, 256),num_batchnorm=bn, num_dropout = num_dropout_id, dropout_prob = dropout_prob)
  x = res_identity(x, filters=(64, 256),num_batchnorm=bn, num_dropout = num_dropout_id, dropout_prob = dropout_prob)

  # 3rd stage

  x = res_conv(x, s=2, filters=(128, 512),num_batchnorm=bn, num_dropout = num_dropout_conv, dropout_prob = dropout_prob)
  x = res_identity(x, filters=(128, 512),num_batchnorm=bn, num_dropout = num_dropout_id, dropout_prob = dropout_prob)
  x = res_identity(x, filters=(128, 512),num_batchnorm=bn, num_dropout = num_dropout_id, dropout_prob = dropout_prob)
  x = res_identity(x, filters=(128, 512),num_batchnorm=bn, num_dropout = num_dropout_id, dropout_prob = dropout_prob)

  # 4th stage

  x = res_conv(x, s=2, filters=(256, 1024),num_batchnorm=bn, num_dropout = num_dropout_conv, dropout_prob = dropout_prob)
  x = res_identity(x, filters=(256, 1024),num_batchnorm=bn, num_dropout = num_dropout_id, dropout_prob = dropout_prob)
  x = res_identity(x, filters=(256, 1024),num_batchnorm=bn, num_dropout = num_dropout_id, dropout_prob = dropout_prob)
  x = res_identity(x, filters=(256, 1024),num_batchnorm=bn, num_dropout = num_dropout_id, dropout_prob = dropout_prob)
  x = res_identity(x, filters=(256, 1024),num_batchnorm=bn, num_dropout = num_dropout_id, dropout_prob = dropout_prob)
  x = res_identity(x, filters=(256, 1024),num_batchnorm=bn, num_dropout = num_dropout_id, dropout_prob = dropout_prob)

  # 5th stage

  x = res_conv(x, s=2, filters=(512, 2048),num_batchnorm=bn, num_dropout = num_dropout_conv, dropout_prob = dropout_prob)
  x = res_identity(x, filters=(512, 2048),num_batchnorm=bn, num_dropout = num_dropout_id, dropout_prob = dropout_prob)
  x = res_identity(x, filters=(512, 2048),num_batchnorm=bn, num_dropout = num_dropout_id, dropout_prob = dropout_prob)

  # ends with average pooling and dense connection

  x = AveragePooling2D((2, 2), padding='same')(x)

  x = Flatten()(x)
  if drp>0:
    x = Dropout(dropout_prob)(x)
    drp-=1
  x = Dense(10, activation='softmax')(x) #multi-class

  # define the model 

  model = Model(inputs=input_im, outputs=x, name='Resnet50')

  return model

In [3]:
import pickle

In [4]:
#to measure Time to Accuracy
#https://keras.io/guides/writing_your_own_callbacks/

class timeToAccuracy(keras.callbacks.Callback):
    def __init__(self, startTime, epoch_ckpt, model_name):
        super(timeToAccuracy, self).__init__()
        self.targetAcc = 0.87 #CHANGE TO 0.87 WHEN RUNNING MODEL
        self.foundTarget = False
        self.startTime = startTime
        self.epoch = 0
        self.epoch_ckpt = epoch_ckpt
        self.model_name = model_name
    def on_epoch_end(self, epoch, logs=None):
        if self.epoch % self.epoch_ckpt == 0:
            name = self.model_name + '.h5'
            self.model.save_weights(name)
        self.epoch += 1
        
        if not self.foundTarget:
            if logs['accuracy'] >= self.targetAcc:
                current = time.time()
                print("Time to reach {} accuracy: {} s".format(self.targetAcc, current-self.startTime))
                with open('{}_tta.pkl'.format(model_name), 'wb') as file:
                    pickle.dump(current-self.startTime, file)
                self.foundTarget = True

In [5]:
def fit_resnet(model, xtrain, ytrain, xtest, ytest, model_name):
  
    EPOCHS = 100      #Change to 200 when running the model
    BATCH_SIZE= 256
    VERBOSITY = 0   #Change to 0 when actually running model
    EPOCH_CKPT = 10 # save model every N epochs

    opt = keras.optimizers.Adam(learning_rate=1e-3)

    model.compile(loss = keras.losses.categorical_crossentropy, optimizer = opt, metrics=['accuracy'])
    #fit the model
    start = time.time()
    print('Fitting with BS ', BATCH_SIZE)
    model.fit(
        xtrain,
        ytrain,
        batch_size=BATCH_SIZE,
        verbose=VERBOSITY,
        epochs = EPOCHS,
        callbacks=[timeToAccuracy(startTime=start, epoch_ckpt=EPOCH_CKPT, model_name=model_name)]
    )
    train_time = time.time() - start

    #evaluate
    score = model.evaluate(xtest,ytest)
    loss = score[0]
    acc = score[1]

    return train_time,  acc

In [6]:
#prepare data

(x_train, y_train), (x_test, y_test) = cifar10.load_data() 

# Data normalization
x_train = x_train/255
x_test = x_test/255
y_train = to_categorical(y_train) 
y_test = to_categorical(y_test)

In [7]:
dropout_probabilities = [0.2,0.5,0.8]
num_dropout = [1,2,3]
num_batchnorm = [1,2,3]

In [None]:
#here is output with bs 64
for dp_conv in num_dropout:
    for dp_id in num_dropout:
        for prob in dropout_probabilities:
            print('Training BN1, DPID{}, DPCONV{}, DPPROB{}'.format(dp_id, dp_conv, prob))
            model_name = 'model_bn1_dpid_{}_dpconv_{}_dpprob_{}'.format(dp_id, dp_conv, prob)
            resnet_model = resnet50(num_batchnorm=1, bn_pooling=True, num_dropout_conv=dp_conv, num_dropout_id=dp_id, dropout_prob=prob)
            tt, acc = fit_resnet(resnet_model,x_train, y_train, x_test, y_test, model_name)
            
            print('Final accuracy {} reached in {}'.format(acc, tt))

Training BN1, DPID1, DPCONV1, DPPROB0.2
Time to reach 0.87 accuracy: 372.54839181900024 s
Final accuracy 0.774399995803833 reached in 3829.346446990967
Training BN1, DPID1, DPCONV1, DPPROB0.5
Time to reach 0.87 accuracy: 589.5758216381073 s
Final accuracy 0.7669000029563904 reached in 3769.910964488983
Training BN1, DPID1, DPCONV1, DPPROB0.8
Time to reach 0.87 accuracy: 1491.6611511707306 s
Final accuracy 0.5841000080108643 reached in 3829.7431721687317
Training BN1, DPID2, DPCONV1, DPPROB0.2
Time to reach 0.87 accuracy: 425.7824034690857 s
Final accuracy 0.7657999992370605 reached in 3866.098507165909
Training BN1, DPID2, DPCONV1, DPPROB0.5
Time to reach 0.87 accuracy: 881.6204488277435 s
Final accuracy 0.7696999907493591 reached in 3914.1877200603485
Training BN1, DPID2, DPCONV1, DPPROB0.8
Time to reach 0.87 accuracy: 2061.481954097748 s
Final accuracy 0.6880000233650208 reached in 4010.3561074733734
Training BN1, DPID3, DPCONV1, DPPROB0.2
Time to reach 0.87 accuracy: 440.93960690498

In [None]:
3+6

In [14]:
for dp_id in [2,3]:
    for prob in dropout_probabilities:
        print('Training BN1, DPID{}, DPCONV{}, DPPROB{}'.format(dp_id, 3, prob))
        model_name = 'model_bn1_dpid_{}_dpconv_{}_dpprob_{}'.format(dp_id, 3, prob)
        resnet_model = resnet50(num_batchnorm=1, bn_pooling=True, num_dropout_conv=3, num_dropout_id=dp_id, dropout_prob=prob)
        tt, acc = fit_resnet(resnet_model,x_train, y_train, x_test, y_test, model_name)

        print('Final accuracy {} reached in {}'.format(acc, tt))

Training BN1, DPID2, DPCONV3, DPPROB0.2
Time to reach 0.87 accuracy: 561.4235665798187 s
Final accuracy 0.7799999713897705 reached in 3987.347130537033
Training BN1, DPID2, DPCONV3, DPPROB0.5
Time to reach 0.87 accuracy: 1469.642249584198 s
Final accuracy 0.7343000173568726 reached in 3935.1762204170227
Training BN1, DPID2, DPCONV3, DPPROB0.8
Time to reach 0.87 accuracy: 3511.4251670837402 s
Final accuracy 0.5343999862670898 reached in 3899.4076869487762
Training BN1, DPID3, DPCONV3, DPPROB0.2
Time to reach 0.87 accuracy: 542.2596106529236 s
Final accuracy 0.7867000102996826 reached in 3988.3534348011017
Training BN1, DPID3, DPCONV3, DPPROB0.5
Time to reach 0.87 accuracy: 1618.7941226959229 s
Final accuracy 0.7096999883651733 reached in 3997.3226013183594
Training BN1, DPID3, DPCONV3, DPPROB0.8
Time to reach 0.87 accuracy: 3916.1720113754272 s
Final accuracy 0.5781000256538391 reached in 4058.1947960853577


In [15]:
print('Training BN1')
model_name = 'model_bn1'
resnet_model = resnet50(num_batchnorm=1, bn_pooling=True, num_dropout_conv=0, num_dropout_id=0)
tt, acc = fit_resnet(resnet_model,x_train, y_train, x_test, y_test, model_name)

print('Final accuracy {} reached in {}'.format(acc, tt))

Training BN1
Time to reach 0.87 accuracy: 240.94208121299744 s
Final accuracy 0.7505999803543091 reached in 3654.2860102653503


In [11]:
num_dropout = [0,1,2,3]
for dp in num_dropout:
    prob = 0.2
    print('Training BN0, DPID{}, DPCONV{}, DPPROB{}'.format(dp, dp, prob))
    model_name = 'model_bn0_dpid_{}_dpconv_{}_dpprob_{}'.format(dp, dp, prob)
    resnet_model = resnet50(num_batchnorm=0, bn_pooling=False, num_dropout_conv=dp, num_dropout_id=dp, dropout_prob=prob)
    tt, acc = fit_resnet(resnet_model,x_train, y_train, x_test, y_test, model_name)

    print('Final accuracy {} reached in {}'.format(acc, tt))

Training BN0, DPID0, DPCONV0, DPPROB0.2
Fitting with BS  256
Time to reach 0.87 accuracy: 82.08244347572327 s
Final accuracy 0.7084000110626221 reached in 708.535605430603
Training BN0, DPID1, DPCONV1, DPPROB0.2
Fitting with BS  256
Time to reach 0.87 accuracy: 87.687912940979 s
Final accuracy 0.7103000283241272 reached in 705.064444065094
Training BN0, DPID2, DPCONV2, DPPROB0.2
Fitting with BS  256
Time to reach 0.87 accuracy: 118.21027135848999 s
Final accuracy 0.6967999935150146 reached in 716.3103256225586
Training BN0, DPID3, DPCONV3, DPPROB0.2
Fitting with BS  256
Time to reach 0.87 accuracy: 136.94057059288025 s
Final accuracy 0.6814000010490417 reached in 744.1642060279846


In [12]:
for dp in num_dropout:
    prob = 0.2
    bn = 1
    print('Training BN{}, DPID{}, DPCONV{}, DPPROB{}'.format(bn, dp, dp, prob))
    model_name = 'model_bn{}_dpid_{}_dpconv_{}_dpprob_{}'.format(bn, dp, dp, prob)
    resnet_model = resnet50(num_batchnorm=bn, bn_pooling=True, num_dropout_conv=dp, num_dropout_id=dp, dropout_prob=prob)
    tt, acc = fit_resnet(resnet_model,x_train, y_train, x_test, y_test, model_name)

    print('Final accuracy {} reached in {}'.format(acc, tt))

Training BN1, DPID0, DPCONV0, DPPROB0.2
Fitting with BS  256
Time to reach 0.87 accuracy: 60.920347929000854 s
Final accuracy 0.7210999727249146 reached in 716.0166599750519
Training BN1, DPID1, DPCONV1, DPPROB0.2
Fitting with BS  256
Time to reach 0.87 accuracy: 76.42478275299072 s
Final accuracy 0.7469000220298767 reached in 734.286524772644
Training BN1, DPID2, DPCONV2, DPPROB0.2
Fitting with BS  256
Time to reach 0.87 accuracy: 115.41627550125122 s
Final accuracy 0.7577000260353088 reached in 743.4480056762695
Training BN1, DPID3, DPCONV3, DPPROB0.2
Fitting with BS  256
Time to reach 0.87 accuracy: 119.08597731590271 s
Final accuracy 0.7508000135421753 reached in 767.3323245048523
