In [1]:
import keras
from keras.layers import Dense, Conv2D, BatchNormalization, Activation
from keras.layers import AveragePooling2D, Input, Flatten
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, LearningRateScheduler
from keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.regularizers import l2
from keras import backend as K
from keras.models import Model
from keras.datasets import cifar10
import numpy as np
import os

In [2]:
from colabcode import ColabCode

Now, We set different training hyper parameters that are required for ResNet architecture. We also perform some preprocessing on our dataset to prepare it for training phase.

In [4]:
batch_size = 32
epochs = 200
data_augmentation = True
num_classes = 10
subtract_pixel_mean = True
n = 3
version = 1

if version == 1:
    depth = n * 6 + 2
elif version == 2:
    depth = n * 9 + 2

model_type = 'ResNet % dv % d' % (depth, version)

(x_train, y_train), (x_test, y_test) = cifar10.load_data()

input_shape = x_train.shape[1:]

x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255

if subtract_pixel_mean:
    x_train_mean = np.mean(x_train, axis=0)
    x_train -= x_train_mean
    x_test -= x_train_mean

print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
print('y_train shape:', y_train.shape)

y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)


Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
[1m170498071/170498071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 0us/step
x_train shape: (50000, 32, 32, 3)
50000 train samples
10000 test samples
y_train shape: (50000, 1)


 we set the learning rate according to the number of epochs. As the number of epochs the learning rate must be decreased to ensure better learning.

In [5]:
def lr_schedule(epoch):
    lr = 1e-3
    if epoch &gt; 180:
        lr *= 0.5e-3
    elif epoch &gt; 160:
        lr *= 1e-3
    elif epoch &gt; 120:
        lr *= 1e-2
    elif epoch &gt; 80:
        lr *= 1e-1
    print('Learning rate: ', lr)
    return lr

SyntaxError: invalid syntax (843913916.py, line 3)

This function, lr_schedule, adjusts the learning rate based on the current training epoch. It starts with a base learning rate of 0.001 (1e-3). As the number of epochs increases, the learning rate decreases step by step: after 80 epochs, it becomes 0.0001 (10 times smaller), after 120 epochs, it reduces further to 0.00001, and so on.

This gradual decrease helps the model make smaller updates to fine-tune its learning as training progresses. The function also prints the current learning rate so you can track how it’s changing.

Defining basic ResNet building block that can be used for defining the ResNet V1 and V2 architecture.

In [3]:
def resnet_layer(inputs,
                 num_filters=16,
                 kernel_size=3,
                 strides=1,
                 activation='relu',
                 batch_normalization=True,
                 conv_first=True):
    conv = Conv2D(num_filters,
                  kernel_size=kernel_size,
                  strides=strides,
                  padding='same',
                  kernel_initializer='he_normal',
                  kernel_regularizer=l2(1e-4))

    x = inputs
    if conv_first:
        x = conv(x)
        if batch_normalization:
            x = BatchNormalization()(x)
        if activation is not None:
            x = Activation(activation)(x)
    else:
        if batch_normalization:
            x = BatchNormalization()(x)
        if activation is not None:
            x = Activation(activation)(x)
        x = conv(x)

    return x


The resnet_layer function creates a ResNet layer with a convolution (Conv2D), optional batch normalization, and activation (e.g., ReLU). The order of these operations depends on the conv_first flag, making it flexible for building ResNet architectures.

In [None]:
def resnet_v1(input_shape, depth, num_classes=10): 
    if (depth - 2) % 6 != 0: 
        raise ValueError('depth should be 6n + 2 (eg 20, 32, 44 in [a])') 

    num_filters = 16
    num_res_blocks = int((depth - 2) / 6) 

    inputs = Input(shape=input_shape) 
    x = resnet_layer(inputs=inputs) 

    for stack in range(3): 
        for res_block in range(num_res_blocks): 
            strides = 1
            if stack > 0 and res_block == 0: 
                strides = 2 
            y = resnet_layer(inputs=x, 
                             num_filters=num_filters, 
                             strides=strides) 
            y = resnet_layer(inputs=y, 
                             num_filters=num_filters, 
                             activation=None) 
            if stack > 0 and res_block == 0: 
                x = resnet_layer(inputs=x, 
                                 num_filters=num_filters, 
                                 kernel_size=1, 
                                 strides=strides, 
                                 activation=None, 
                                 batch_normalization=False) 
            x = keras.layers.add([x, y]) 
            x = Activation('relu')(x) 
        num_filters *= 2

    x = AveragePooling2D(pool_size=8)(x) 
    y = Flatten()(x) 
    outputs = Dense(num_classes, 
                    activation='softmax', 
                    kernel_initializer='he_normal')(y) 

    model = Model(inputs=inputs, outputs=outputs) 
    return model


Define ResNet V2 architecture that is based on the ResNet building block we defined above: 

In [None]:
def resnet_v2(input_shape, depth, num_classes=10):
    if (depth - 2) % 9 != 0:
        raise ValueError('depth should be 9n + 2 (eg 56 or 110 in [b])')

    num_filters_in = 16
    num_res_blocks = int((depth - 2) / 9)

    inputs = Input(shape=input_shape)
    x = resnet_layer(inputs=inputs,
                     num_filters=num_filters_in,
                     conv_first=True)

    for stage in range(3):
        for res_block in range(num_res_blocks):
            activation = 'relu'
            batch_normalization = True
            strides = 1
            if stage == 0:
                num_filters_out = num_filters_in * 4
                if res_block == 0:
                    activation = None
                    batch_normalization = False
            else:
                num_filters_out = num_filters_in * 2
                if res_block == 0:
                    strides = 2

            y = resnet_layer(inputs=x,
                             num_filters=num_filters_in,
                             kernel_size=1,
                             strides=strides,
                             activation=activation,
                             batch_normalization=batch_normalization,
                             conv_first=False)
            y = resnet_layer(inputs=y,
                             num_filters=num_filters_in,
                             conv_first=False)
            y = resnet_layer(inputs=y,
                             num_filters=num_filters_out,
                             kernel_size=1,
                             conv_first=False)
            if res_block == 0:
                x = resnet_layer(inputs=x,
                                 num_filters=num_filters_out,
                                 kernel_size=1,
                                 strides=strides,
                                 activation=None,
                                 batch_normalization=False)
            x = keras.layers.add([x, y])

        num_filters_in = num_filters_out

    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = AveragePooling2D(pool_size=8)(x)
    y = Flatten()(x)
    outputs = Dense(num_classes,
                    activation='softmax',
                    kernel_initializer='he_normal')(y)

    model = Model(inputs=inputs, outputs=outputs)
    return model


This code implements ResNet V2, a deep residual network with bottleneck blocks, batch normalization, and ReLU before convolutions. It efficiently downsamples inputs, ending with global average pooling and a softmax classifier for robust training of deep models.

 The code below is used to train and test the ResNet v1 and v2 architecture we defined above:

In [None]:

if version == 2: 
    model = resnet_v2(input_shape=input_shape, depth=depth) 
else: 
    model = resnet_v1(input_shape=input_shape, depth=depth) 

model.compile(loss='categorical_crossentropy', 
              optimizer=Adam(learning_rate=lr_schedule(0)), 
              metrics=['accuracy']) 
model.summary() 
print(model_type)

save_dir = os.path.join(os.getcwd(), 'saved_models') 
model_name = 'cifar10_%s_model.{epoch:03d}.keras' % model_type 
if not os.path.isdir(save_dir): 
    os.makedirs(save_dir) 
filepath = os.path.join(save_dir, model_name)

checkpoint = ModelCheckpoint(filepath=filepath, 
                              monitor='val_acc', 
                              verbose=1, 
                              save_best_only=True) 

lr_scheduler = LearningRateScheduler(lr_schedule) 

lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), 
                               cooldown=0, 
                               patience=5, 
                               min_lr=0.5e-6) 

callbacks = [checkpoint, lr_reducer, lr_scheduler] 

if not data_augmentation: 
    print('Not using data augmentation.') 
    model.fit(x_train, y_train, 
              batch_size=batch_size, 
              epochs=epochs, 
              validation_data=(x_test, y_test), 
              shuffle=True, 
              callbacks=callbacks) 
else: 
    print('Using real-time data augmentation.') 
    datagen = ImageDataGenerator(
        feat
      
