In [2]:
%%bash
# create virtual environment, activate it and install packages. If you already have a venv (from previous assignment for example) or
# you're not even using one, you can skip this step.
python3 -m venv .venv
source .venv/bin/activate
pip install numpy matplotlib keras tensorflow[and-cuda]




[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [26]:
import matplotlib.pyplot as plt
import numpy as np
from keras.datasets import cifar10
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D, Dense, Flatten, BatchNormalization, Add
from keras.activations import relu
from keras.models import Sequential
from keras.utils import to_categorical
from keras.regularizers import l1, l2
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.models import Model
from keras.optimizers import Adam, SGD

In [5]:
#CIFAR-10 dataset
(train_images, train_labels), (test_images, test_labels) = cifar10.load_data()
print(train_images.shape)
train_images = train_images.reshape(train_images.shape[0], 32, 32, 3) #ensure shape 32 W x 32 H x 3 channels for each image
test_images = test_images.reshape(test_images.shape[0], 32, 32, 3)

#range 0-1
train_images = train_images.astype('float32') / 255.0
test_images = test_images.astype('float32') / 255.0

#One-hot encoding labels
train_labels = to_categorical(train_labels, 10)
test_labels = to_categorical(test_labels, 10)

(50000, 32, 32, 3)


In [29]:

#1st try LeNet-5 with batch normalization and l2 regularization - about 0.66 test validation with 0.0001 lambda and 0.001 le, I believe
'''
checkpoint_callback = ModelCheckpoint(
    filepath='best_weights.h5',
    monitor='val_accuracy',              
    save_best_only=True,                 
    mode='max',  # Mode for monitoring, 'max' for accuracy
    verbose=1                        
)
'''
"""
model = Sequential()
l2_lambda = 0.0001
model.add(Conv2D(6, kernel_size=(5, 5), activation='relu', input_shape=(32, 32, 3), kernel_regularizer=l2(l2_lambda))) #C1
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2))) #S1
model.add(Conv2D(16, kernel_size=(5, 5), activation='relu', kernel_regularizer=l2(l2_lambda))) #C2
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2))) #S2
model.add(Flatten())
model.add(Dense(120, activation='relu', kernel_regularizer=l2(l2_lambda)))  # FC1
model.add(Dense(84, activation='relu', kernel_regularizer=l2(l2_lambda)))  # FC2
model.add(Dense(10, activation='softmax', kernel_regularizer=l2(l2_lambda)))  # FC3
#model.summary()
 """

#currently trying ResNet18, according to paper: https://arxiv.org/pdf/1512.03385.pdf
class ResNetBlock(Model): #inherits from Model class
    def __init__(self, n_filters, kernel_size = (3, 3), kernel_init = 'HeNormal', downsample=False, **kwargs):
        super().__init__(**kwargs)
        self.downsample = downsample
        self.kernel_size = kernel_size
        self.n_filters = n_filters
        self.strides = [2, 1] if downsample else [1, 1] 
        self.kernel_init = kernel_init
    
        self.conv1 = Conv2D(self.n_filters, self.kernel_size, strides=self.strides[0], padding='same', activation='relu',
                             kernel_initializer=self.kernel_init)
        self.bn1 = BatchNormalization() # batch normalization after every convolutional layer
        self.conv2 = Conv2D(self.n_filters, self.kernel_size, strides=self.strides[1], padding='same', activation='relu',
                             kernel_initializer=self.kernel_init)
        self.bn2 = BatchNormalization()
        
        if self.downsample: # the shortcut connection should also match the dimensions (convolution with a (1,1) kernel and stride of 2)
            self.residual_conv = Conv2D(filters=self.n_filters, strides=2, kernel_size=(1, 1), kernel_initializer=self.kernel_init, padding="same")
            self.residual_bn = BatchNormalization()
        
        self.add = Add()

    
    def call(self, inputs): #forward pass (overriding parent class)
        x = self.conv1(inputs)
        x = self.bn1(x)
        x = self.conv2(x)
        x = self.bn2(x)
        
        if self.downsample: 
            res = self.residual_conv(inputs)
            res = self.residual_bn(res)
        else:
            res = inputs #no need to change dimensions
        
        x = self.add([x, res]) #merge block output with shortcut connection (residual path)
        # pretty much same as x + residual, simply adding the two tensors
        out = relu(x)
        return out 
    
class ResNet18(Model):
    def __init__(self, n_classes=10, **kwargs): #default 10 classes for CIFAR-10
        super().__init__(**kwargs)
        self.n_classes = n_classes

        #initial part
        self.conv1 = Conv2D(kernel_size=(3, 3), strides=1, filters=64, padding='same', activation='relu',
                            kernel_initializer='HeNormal')
        self.bn1 = BatchNormalization()
        
        #blocks -> 2 x 2 blocks x 4 stages conv layers
        # "Downsampling is performed by conv3 1, conv4 1, and conv5 1 with a stride of 2."
        self.pool1 = MaxPooling2D(pool_size=(3, 3), strides=2, padding='same')
        self.conv2_1 = ResNetBlock(n_filters=64) #conv2_x blocks have no downsampling
        self.conv2_2 = ResNetBlock(n_filters=64)

        self.conv3_1 = ResNetBlock(n_filters=128, downsample=True) #<-
        self.conv3_2 = ResNetBlock(n_filters=128)

        self.conv4_1 = ResNetBlock(n_filters=256, downsample=True) #<-
        self.conv4_2 = ResNetBlock(n_filters=256)
        
        self.conv5_1 = ResNetBlock(n_filters=512, downsample=True) #<-
        self.conv5_2 = ResNetBlock(n_filters=512)

        #final part
        self.avg_pool = GlobalAveragePooling2D()
        self.fc = Dense(self.n_classes, activation='softmax')

    def call(self, inputs): #forward pass
        x = self.conv1(inputs)
        x = self.bn1(x)
        x = self.pool1(x)
        for block in [self.conv2_1, self.conv2_2, self.conv3_1, self.conv3_2, self.conv4_1, self.conv4_2, self.conv5_1, self.conv5_2]:
            x = block(x)
        x = self.avg_pool(x)
        out = self.fc(x)
        return out   

In [33]:
model = ResNet18(10)
model.build(input_shape=(None, 32, 32, 3)) #Cifar-10
#optimizer = Adam(learning_rate=1e-2)
opt = SGD(learning_rate=0.1, momentum=0.9, decay=5e-4) #stochastic grad descent with l2 regularization (decay)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])


es = EarlyStopping(patience=5, restore_best_weights=True, monitor="val_accuracy")
history = output = model.fit(train_images,
           train_labels,
           batch_size=128,
           epochs=100,
           verbose=1,
           validation_data=(test_images, test_labels),
           callbacks=[es])

print("Best inference accuracy, after early stopping:")
model.evaluate(test_images, test_labels)
model.save_weights("model.weights.h5")
model.summary()

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Best inference accuracy, after early stopping:
Model: "res_net18_13"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_263 (Conv2D)         multiple                  1792      
                                                                 
 batch_normalization_263 (Ba  multiple                 256       
 tchNormalization)                                              