### ResNet50 Model

References:

https://towardsdatascience.com/understand-and-implement-resnet-50-with-tensorflow-2-0-1190b9b52691

https://github.com/suvoooo/Learn-TensorFlow/blob/master/resnet/Implement_Resnet_TensorFlow.ipynb

In [1]:
import matplotlib.pyplot as plt
from matplotlib.patches import  Rectangle
import tensorflow as tf

import os
import tensorflow.keras as keras
from keras.models import Sequential
from keras.layers import Input, Dense, Conv2D, MaxPool2D,MaxPooling2D, Flatten,BatchNormalization, Dropout,ZeroPadding2D, AveragePooling2D, Add, Activation
from tensorflow.keras.models import Model
from tensorflow.keras.regularizers import l2
from tensorflow.keras import activations
from keras.preprocessing.image import ImageDataGenerator
import numpy as np

from tensorflow.keras.datasets import cifar10
from keras.utils import to_categorical

import time

Using TensorFlow backend.


In [2]:
 #Removing all regularizers

def res_identity(x, filters, num_batchnorm = 0, num_dropout=0, dropout_prob = 0):
  #renet block where dimension doesnot change.
  #The skip connection is just simple identity conncection
  #we will have 3 blocks and then input will be added

  x_skip = x # this will be used for addition with the residual block 
  f1, f2 = filters
  bn = num_batchnorm
  drp = num_dropout

  #first block 
  x = Conv2D(f1, kernel_size=(1, 1), strides=(1, 1), padding='valid')(x)
  if bn>0:
    x = BatchNormalization()(x)
    bn-=1
  x = Activation(activations.relu)(x)
  if drp>0:
    x = Dropout(dropout_prob)(x)
    drp-=1

  #second block # bottleneck (but size kept same with padding)
  x = Conv2D(f1, kernel_size=(3, 3), strides=(1, 1), padding='same')(x)
  if bn>0:
    x = BatchNormalization()(x)
    bn-=1
  x = Activation(activations.relu)(x)
  if drp>0:
    x = Dropout(dropout_prob)(x)
    drp-=1

  # third block activation used after adding the input
  x = Conv2D(f2, kernel_size=(1, 1), strides=(1, 1), padding='valid')(x)
  if bn>0:
    x = BatchNormalization()(x)
    bn-=1
  x = Activation(activations.relu)(x)
  if drp>0:
    x = Dropout(dropout_prob)(x)
    drp-=1

  # add the input 
  x = Add()([x, x_skip])
  x = Activation(activations.relu)(x)


  return x

def res_conv(x, s, filters, num_batchnorm = 0, num_dropout=0, dropout_prob = 0):
  '''
  here the input size changes''' 
  x_skip = x
  f1, f2 = filters
  bn = num_batchnorm
  drp = num_dropout

  # first block
  x = Conv2D(f1, kernel_size=(1, 1), strides=(s, s), padding='valid')(x)
  # when s = 2 then it is like downsizing the feature map
  if bn>0:
    x = BatchNormalization()(x)
    bn-=1
  x = Activation(activations.relu)(x)
  if drp>0:
    x = Dropout(dropout_prob)(x)
    drp-=1

  # second block
  x = Conv2D(f1, kernel_size=(3, 3), strides=(1, 1), padding='same')(x)
  if bn>0:
    x = BatchNormalization()(x)
    bn-=1
  x = Activation(activations.relu)(x)
  if drp>0:
    x = Dropout(dropout_prob)(x)
    drp-=1

  #third block
  x = Conv2D(f2, kernel_size=(1, 1), strides=(1, 1), padding='valid')(x)
  if bn>0:
    x = BatchNormalization()(x)
    bn-=1

  # shortcut 
  x_skip = Conv2D(f2, kernel_size=(1, 1), strides=(s, s), padding='valid')(x_skip)
  x_skip = BatchNormalization()(x_skip)

  # add 
  x = Add()([x, x_skip])
  x = Activation(activations.relu)(x)

  return x

def resnet50(num_batchnorm = 0, bn_pooling = False, dropout_prob=0, num_dropout_conv = 0, num_dropout_id = 0, num_dropout=0):

  input_im = Input(shape=(32, 32, 3)) # cifar 10 images size
  x = ZeroPadding2D(padding=(3, 3))(input_im)
  bn = num_batchnorm
  drp = num_dropout

  # 1st stage
  # here we perform maxpooling, see the figure above

  x = Conv2D(64, kernel_size=(7, 7), strides=(2, 2))(x)
  if bn_pooling:
    x = BatchNormalization()(x)
  x = Activation(activations.relu)(x)
  x = MaxPooling2D((3, 3), strides=(2, 2))(x)
  if drp>0:
    x = Dropout(dropout_prob)(x)
    drp-=1

  #2nd stage 
  # frm here on only conv block and identity block, no pooling

  x = res_conv(x, s=1, filters=(64, 256),num_batchnorm=bn, num_dropout = num_dropout_conv, dropout_prob = dropout_prob)
  x = res_identity(x, filters=(64, 256),num_batchnorm=bn, num_dropout = num_dropout_id, dropout_prob = dropout_prob)
  x = res_identity(x, filters=(64, 256),num_batchnorm=bn, num_dropout = num_dropout_id, dropout_prob = dropout_prob)

  # 3rd stage

  x = res_conv(x, s=2, filters=(128, 512),num_batchnorm=bn, num_dropout = num_dropout_conv, dropout_prob = dropout_prob)
  x = res_identity(x, filters=(128, 512),num_batchnorm=bn, num_dropout = num_dropout_id, dropout_prob = dropout_prob)
  x = res_identity(x, filters=(128, 512),num_batchnorm=bn, num_dropout = num_dropout_id, dropout_prob = dropout_prob)
  x = res_identity(x, filters=(128, 512),num_batchnorm=bn, num_dropout = num_dropout_id, dropout_prob = dropout_prob)

  # 4th stage

  x = res_conv(x, s=2, filters=(256, 1024),num_batchnorm=bn, num_dropout = num_dropout_conv, dropout_prob = dropout_prob)
  x = res_identity(x, filters=(256, 1024),num_batchnorm=bn, num_dropout = num_dropout_id, dropout_prob = dropout_prob)
  x = res_identity(x, filters=(256, 1024),num_batchnorm=bn, num_dropout = num_dropout_id, dropout_prob = dropout_prob)
  x = res_identity(x, filters=(256, 1024),num_batchnorm=bn, num_dropout = num_dropout_id, dropout_prob = dropout_prob)
  x = res_identity(x, filters=(256, 1024),num_batchnorm=bn, num_dropout = num_dropout_id, dropout_prob = dropout_prob)
  x = res_identity(x, filters=(256, 1024),num_batchnorm=bn, num_dropout = num_dropout_id, dropout_prob = dropout_prob)

  # 5th stage

  x = res_conv(x, s=2, filters=(512, 2048),num_batchnorm=bn, num_dropout = num_dropout_conv, dropout_prob = dropout_prob)
  x = res_identity(x, filters=(512, 2048),num_batchnorm=bn, num_dropout = num_dropout_id, dropout_prob = dropout_prob)
  x = res_identity(x, filters=(512, 2048),num_batchnorm=bn, num_dropout = num_dropout_id, dropout_prob = dropout_prob)

  # ends with average pooling and dense connection

  x = AveragePooling2D((2, 2), padding='same')(x)

  x = Flatten()(x)
  if drp>0:
    x = Dropout(dropout_prob)(x)
    drp-=1
  x = Dense(10, activation='softmax')(x) #multi-class

  # define the model 

  model = Model(inputs=input_im, outputs=x, name='Resnet50')

  return model

In [3]:
import pickle

In [40]:
#to measure Time to Accuracy
#https://keras.io/guides/writing_your_own_callbacks/

class timeToAccuracy(keras.callbacks.Callback):
    def __init__(self, startTime, epoch_ckpt, model_name):
        super(timeToAccuracy, self).__init__()
        self.targetAcc = 0.87 #CHANGE TO 0.87 WHEN RUNNING MODEL
        self.foundTarget = False
        self.startTime = startTime
        self.epoch = 0
        self.epoch_ckpt = epoch_ckpt
        self.model_name = model_name
        self.prev_loss = None
    def on_epoch_end(self, epoch, logs=None):
        if self.epoch % self.epoch_ckpt == 0:
            print(self.epoch, epoch)
            name = self.model_name + '.h5'
            self.model.save_weights(name)
            print('end', logs)
            
        self.epoch += 1
#         if (self.prev_loss == None):
#             self.prev_loss = logs['loss']
#         else:
#             delta = np.abs(logs['loss'] - self.prev_loss)
        if not self.foundTarget:
            if logs['accuracy'] >= self.targetAcc:
                current = time.time()
                print("Time to reach {} accuracy: {} s".format(self.targetAcc, current-self.startTime))
                with open('{}_tta.pkl'.format(model_name), 'wb') as file:
                    pickle.dump(current-self.startTime, file)
                self.foundTarget = True

In [47]:
def fit_resnet(model, xtrain, ytrain, xtest, ytest, model_name, convergence=False):
  
    EPOCHS = 500 if convergence else 100     #Change to 200 when running the model
    BATCH_SIZE= 256
    VERBOSITY = 0   #Change to 0 when actually running model
    EPOCH_CKPT = 15 # save model every N epochs

    opt = keras.optimizers.Adam(learning_rate=1e-3)

    model.compile(loss = keras.losses.categorical_crossentropy, optimizer = opt, metrics=['accuracy'])
    #fit the model
    start = time.time()
    print('Fitting with BS ', BATCH_SIZE)
    es = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=20) if convergence else None
    model.fit(
        xtrain,
        ytrain,
        batch_size=BATCH_SIZE,
        verbose=VERBOSITY,
        epochs = EPOCHS,
        callbacks=[timeToAccuracy(startTime=start, epoch_ckpt=EPOCH_CKPT, model_name=model_name), es]
    )
    train_time = time.time() - start

    #evaluate
    score = model.evaluate(xtest,ytest)
    loss = score[0]
    acc = score[1]

    return train_time,  acc

In [48]:
#prepare data

(x_train, y_train), (x_test, y_test) = cifar10.load_data() 

# Data normalization
x_train = x_train/255
x_test = x_test/255
y_train = to_categorical(y_train) 
y_test = to_categorical(y_test)

In [49]:
dp = 2
prob = 0.2
bn = 2
print('Training BN{}, DPID{}, DPCONV{}, DPPROB{} till convergence'.format(bn, dp, dp, prob))
model_name = 'model_bn{}_dpid_{}_dpconv_{}_dpprob_{}_convergence'.format(bn, dp, dp, prob)
resnet_model = resnet50(num_batchnorm=bn, bn_pooling=True, num_dropout_conv=dp, num_dropout_id=dp, dropout_prob=prob)
tt, acc = fit_resnet(resnet_model,x_train, y_train, x_test, y_test, model_name, convergence=True)

print('Final accuracy {} reached in {}'.format(acc, tt))

Training BN2, DPID2, DPCONV2, DPPROB0.2 till convergence
Fitting with BS  256
0 0
end {'loss': 1.9282594919204712, 'accuracy': 0.3712800145149231}
Time to reach 0.87 accuracy: 116.73651146888733 s
15 15
end {'loss': 0.3318769037723541, 'accuracy': 0.8838800191879272}
30 30
end {'loss': 0.1208881065249443, 'accuracy': 0.9584400057792664}
45 45
end {'loss': 0.1318434327840805, 'accuracy': 0.9588199853897095}
60 60
end {'loss': 0.04956609010696411, 'accuracy': 0.9829199910163879}
75 75
end {'loss': 0.040059611201286316, 'accuracy': 0.9863799810409546}
90 90
end {'loss': 0.03312574326992035, 'accuracy': 0.9883800148963928}
105 105
end {'loss': 0.029459983110427856, 'accuracy': 0.9901599884033203}
120 120
end {'loss': 0.027327213436365128, 'accuracy': 0.9910799860954285}
135 135
end {'loss': 0.02266576699912548, 'accuracy': 0.9922800064086914}
150 150
end {'loss': 0.02108732983469963, 'accuracy': 0.9930400252342224}
165 165
end {'loss': 0.01892142742872238, 'accuracy': 0.9935799837112427}
1

In [50]:
tt, acc = fit_resnet(resnet_model,x_train, y_train, x_test, y_test, model_name, convergence=True)

print('Final accuracy {} reached in {}'.format(acc, tt))

Fitting with BS  256
0 0
end {'loss': 0.015352043323218822, 'accuracy': 0.9948199987411499}
Time to reach 0.87 accuracy: 13.4077730178833 s
15 15
end {'loss': 0.014902709051966667, 'accuracy': 0.9948999881744385}
30 30
end {'loss': 0.010909068398177624, 'accuracy': 0.9963399767875671}
45 45
end {'loss': 0.012066292576491833, 'accuracy': 0.9961199760437012}
Final accuracy 0.7768999934196472 reached in 417.9462547302246


In [51]:
tt, acc = fit_resnet(resnet_model,x_train, y_train, x_test, y_test, model_name, convergence=True)

print('Final accuracy {} reached in {}'.format(acc, tt))

Fitting with BS  256
0 0
end {'loss': 0.011465387418866158, 'accuracy': 0.9961199760437012}
Time to reach 0.87 accuracy: 13.045371770858765 s
15 15
end {'loss': 0.01073383167386055, 'accuracy': 0.9966199994087219}
30 30
end {'loss': 0.011743739247322083, 'accuracy': 0.9957200288772583}
45 45
end {'loss': 0.010371353477239609, 'accuracy': 0.9963399767875671}
60 60
end {'loss': 0.012688098475337029, 'accuracy': 0.9958199858665466}
75 75
end {'loss': 0.008674683049321175, 'accuracy': 0.9970200061798096}
Final accuracy 0.7764999866485596 reached in 619.9140453338623
