<a href="https://colab.research.google.com/github/cliche-niche/model-zoo-submissions/blob/main/ResNet/resNew.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
from keras.datasets import cifar10
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import activations
from tensorflow.keras import regularizers

In [10]:
(trainIm, trainLab), (testIm, testLab) = cifar10.load_data()
trainIm = trainIm / 255.0
testIm = testIm / 255.0
trainLab = tf.keras.utils.to_categorical(trainLab, 10)
testLab = tf.keras.utils.to_categorical(testLab, 10)

In [11]:
#For callbacks
class mcb(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
       if(logs.get('accuracy')>0.78):
            print("\nReached 78% accuracy so cancelling training!")
            self.model.stop_training = True
cb = mcb()

https://www.tensorflow.org/guide/keras/custom_layers_and_models

Two classes, `resId` and `resSh`, with reference from the link above were written.

`resId` is used to make a resNet block in which Identity shortcuts are used, padding is used to keep dimensions of residual the same as input (inp). They are then added together and a LeakyReLU layer is applied on them

`resSh` is used to make a resNet block in which Shortcuts are used, the size of input is made the same as residual by using a 1x1 convolution with `strides= (2, 2)`. The residual is obtained by first using a `3x3 convolution` (with padding) and `strides= (2, 2)`, followed by another `3x3 convolution` (with padding) but `strides=1` this time.

A layer for `BatchNormalization`, followed by a `ReLU` is used after every convolution, unless they're being added, in which case `ReLU` is applied after their addition

In [12]:
class resId(layers.Layer):
    def __init__(self, filters=64, kerSize=3):
        super(resId, self).__init__()
        self.co = layers.Conv2D(filters, kernel_size=kerSize, padding='same',
                                kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(1e-4))
        self.bn = layers.BatchNormalization()
        self.re = layers.Activation(activations.relu)
    def call(self, inp):
        x = inp
        id = inp
        x = self.co(x)
        x = self.bn(x)
        x = self.re(x)
        x = self.co(x)
        x = self.bn(x)
        return self.re(x+id)
 
class resSh(layers.Layer):
    def __init__(self, filters, kerSize=3, stride=(2, 2)):
        super(resSh, self).__init__()
        self.co1 = layers.Conv2D(filters, kernel_size=kerSize, padding='same', strides=stride,
                                kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(1e-4))
        self.bn = layers.BatchNormalization()
        self.re = layers.Activation(activations.relu)
        self.co2 = layers.Conv2D(filters, kernel_size=kerSize, padding='same',
                                kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(1e-4))
        self.co = layers.Conv2D(filters, (1, 1), strides=stride, padding='same')
    def call(self, inp):
        x = inp
        sh = inp
        x = self.co1(x)
        x = self.bn(x)
        x = self.re(x)
        x = self.co2(x)
        x = self.bn(x)
        sh = self.co(sh)
        return self.re(x+sh)

This is a resNet model based on the paper:
https://arxiv.org/abs/1512.03385

It has some additional `Dropout` layers to obtain a better accuracy on test data, rather than without the layers.

In [13]:
class resNet(tf.keras.Model):
    def __init__(self):
        super(resNet, self).__init__()
        self.co1 = layers.Conv2D(64, (7, 7), strides=(2, 2), padding='same')
        self.mp1 = layers.MaxPooling2D((2, 2), padding='same')

        self.ri1 = resId(filters=64, kerSize=3)
        self.rs1 = resSh(filters=128, kerSize=3, stride=(2, 2))
        self.ri2 = resId(filters=128, kerSize=3)
        self.rs2 = resSh(filters=256, kerSize=3, stride=(2, 2))
        self.ri3 = resId(filters=256, kerSize=3)
        self.rs3 = resSh(filters=512, kerSize=3, stride=(2, 2))
        self.ri4 = resId(filters=512, kerSize=3)

        self.gap = layers.GlobalAveragePooling2D()
        self.fla = layers.Flatten()
        self.end = layers.Dense(10, activation="softmax", kernel_initializer='he_normal')

    def call(self, inp):
        x = inp
        x = self.co1(x)
        x = self.mp1(x)
        for i in range(3):
            x = self.ri1(x)
        x = self.rs1(x)
  #      x = layers.Dropout(0.2)(x)
        for i in range(3):
            x = self.ri2(x)
        x = self.rs2(x)
  #      x = layers.Dropout(0.2)(x)
        for i in range(5):
            x = self.ri3(x)
        x = self.rs3(x)
 #       x = layers.Dropout(0.2)(x)
        x = self.ri4(x)
        x = self.ri4(x)
   #     x = layers.Dropout(0.2)(x)

        x = self.gap(x)
        x = self.fla(x)
        x = self.end(x)
        return(x)

    def model(self):
        x = layers.Input(shape=(32, 32, 3))
        return tf.keras.Model(inputs=[x], outputs=self.call(x))

In [14]:
datagen = tf.keras.preprocessing.image.ImageDataGenerator(
        zca_epsilon=1e-06,
        width_shift_range=0.1,
        height_shift_range=0.1,
        fill_mode='nearest',
        horizontal_flip=True)
datagen.fit(trainIm)

In [15]:
model = resNet()
print(model.model().summary())
model.compile(  optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3) ,
                loss = 'categorical_crossentropy',
                metrics = ['accuracy'])

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 32, 32, 3)]  0                                            
__________________________________________________________________________________________________
conv2d_14 (Conv2D)              (None, 16, 16, 64)   9472        input_2[0][0]                    
__________________________________________________________________________________________________
max_pooling2d_1 (MaxPooling2D)  (None, 8, 8, 64)     0           conv2d_14[0][0]                  
__________________________________________________________________________________________________
res_id_4 (resId)                (None, 8, 8, 64)     37184       max_pooling2d_1[0][0]            
                                                                 res_id_4[0][0]             

In [16]:
model.fit(datagen.flow(trainIm, trainLab, batch_size=32), epochs=50, callbacks=[cb], workers=4, validation_data= (testIm, testLab))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50

Reached 78% accuracy so cancelling training!


<tensorflow.python.keras.callbacks.History at 0x7fcbaa39c8d0>

In [17]:
model.evaluate(testIm, testLab)



[1.5784642696380615, 0.5594000220298767]

In [18]:
train_loss, train_acc = model.evaluate(trainIm, trainLab)
test_loss, test_acc = model.evaluate(testIm, testLab)
print("Training loss:", train_loss)
print("Training accuracy:", train_acc)
print("Test loss:", test_loss)
print("Test accuracy:", test_acc)

Training loss: 1.5505293607711792
Training accuracy: 0.5803400278091431
Test loss: 1.5784642696380615
Test accuracy: 0.5594000220298767
