In [1]:
%%bash
pip install --upgrade pip
pip install numpy matplotlib keras tensorflow[and-cuda]

Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable






[0m

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from keras.datasets import cifar10
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D, Dense, Flatten, BatchNormalization, Add, Input
from keras.activations import relu
from keras.models import Sequential
from keras.utils import to_categorical
from keras.regularizers import l2
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, LearningRateScheduler
from keras.models import Model
from keras.optimizers import Adam, SGD
from tensorflow.keras.preprocessing.image import ImageDataGenerator

2024-12-19 18:09:53.694138: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-12-19 18:09:53.707785: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-12-19 18:09:53.724923: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-12-19 18:09:53.730298: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-12-19 18:09:53.743811: I tensorflow/core/platform/cpu_feature_guar

In [10]:
# if you want to use gpus
tf.debugging.set_log_device_placement(False)
gpus = tf.config.list_physical_devices('GPU')
index_to_use = [0, 1] # add more depending on the server
device_names = [f'/GPU:{i}' for i in index_to_use]
strategy = tf.distribute.MirroredStrategy(devices=device_names)

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1')


In [4]:
#using only CPU
tf.debugging.set_log_device_placement(False)
# Disable all GPUs
tf.config.set_visible_devices([], 'GPU')

In [5]:
#CIFAR-10 dataset
(train_images, train_labels), (test_images, test_labels) = cifar10.load_data()
print(train_images.shape)
train_images = train_images.reshape(train_images.shape[0], 32, 32, 3) #ensure shape 32 W x 32 H x 3 channels for each image
test_images = test_images.reshape(test_images.shape[0], 32, 32, 3)

#range 0-1
train_images = train_images.astype('float32') / 255.0
test_images = test_images.astype('float32') / 255.0

#One-hot encoding labels
train_labels = to_categorical(train_labels, 10)
test_labels = to_categorical(test_labels, 10)

(50000, 32, 32, 3)


In [7]:
#currently trying ResNet18, according to paper: https://arxiv.org/pdf/1512.03385.pdf
class ResNetBlock(Model): #inherits from Model class
    def __init__(self, n_filters, kernel_size = (3, 3), kernel_init = 'HeNormal', downsample=False, **kwargs):
        super().__init__(**kwargs)
        self.downsample = downsample
        self.kernel_size = kernel_size
        self.n_filters = n_filters
        self.strides = [2, 1] if downsample else [1, 1] 
        self.kernel_init = kernel_init
    
        self.conv1 = Conv2D(self.n_filters, self.kernel_size, strides=self.strides[0], padding='same', activation='relu',
                             kernel_initializer=self.kernel_init)
        self.bn1 = BatchNormalization() # batch normalization after every convolutional layer
        self.conv2 = Conv2D(self.n_filters, self.kernel_size, strides=self.strides[1], padding='same', activation='relu',
                             kernel_initializer=self.kernel_init)
        self.bn2 = BatchNormalization()
        
        if self.downsample: # the shortcut connection should also match the dimensions (convolution with a (1,1) kernel and stride of 2)
            self.residual_conv = Conv2D(filters=self.n_filters, strides=2, kernel_size=(1, 1), kernel_initializer=self.kernel_init, padding="same")
            self.residual_bn = BatchNormalization()
        
        self.add = Add()

    
    def call(self, inputs): #forward pass (overriding parent class)
        x = self.conv1(inputs)
        x = self.bn1(x)
        x = relu(x) #had forgotten this
        x = self.conv2(x)
        x = self.bn2(x)
        
        if self.downsample: 
            res = self.residual_conv(inputs)
            res = self.residual_bn(res)
        else:
            res = inputs #no need to change dimensions
        
        x = self.add([x, res]) #merge block output with shortcut connection (residual path)
        # pretty much same as x + residual, simply adding the two tensors
        out = relu(x)
        return out 
    
class ResNet18(Model):
    def __init__(self, n_classes=10, **kwargs): #default 10 classes for CIFAR-10
        super().__init__(**kwargs)
        self.n_classes = n_classes

        #initial part
        self.conv1 = Conv2D(kernel_size=(3, 3), strides=1, filters=64, padding='same', activation='relu',
                            kernel_initializer='HeNormal')
        self.bn1 = BatchNormalization()
        
        #blocks -> 2 x 2 blocks x 4 stages conv layers
        # "Downsampling is performed by conv3 1, conv4 1, and conv5 1 with a stride of 2."
        #self.pool1 = MaxPooling2D(pool_size=(3, 3), strides=2, padding='same') -> removing this for cifar-10 
        self.conv2_1 = ResNetBlock(n_filters=64) #conv2_x blocks have no downsampling
        self.conv2_2 = ResNetBlock(n_filters=64)

        self.conv3_1 = ResNetBlock(n_filters=128, downsample=True) #<-
        self.conv3_2 = ResNetBlock(n_filters=128)

        self.conv4_1 = ResNetBlock(n_filters=256, downsample=True) #<-
        self.conv4_2 = ResNetBlock(n_filters=256)
        
        self.conv5_1 = ResNetBlock(n_filters=512, downsample=True) #<-
        self.conv5_2 = ResNetBlock(n_filters=512)

        #final part
        self.avg_pool = GlobalAveragePooling2D()
        self.fc = Dense(self.n_classes, activation='softmax')

    def call(self, inputs): #forward pass
        x = self.conv1(inputs)
        x = self.bn1(x)
        x = relu(x) # had forgotten this
        #x = self.pool1(x)
        for block in [self.conv2_1, self.conv2_2, self.conv3_1, self.conv3_2, self.conv4_1, self.conv4_2, self.conv5_1, self.conv5_2]:
            x = block(x)
        x = self.avg_pool(x)
        out = self.fc(x)
        return out  

In [6]:
# functional API instead:
# Define ResNetBlock as a function
def ResNetBlock(x, n_filters, kernel_size=(3, 3), kernel_init='HeNormal', downsample=False):
    strides = [2, 1] if downsample else [1, 1]
    
    # Residual connection - if downsampling, apply to the original input
    if downsample:
        res = Conv2D(n_filters, kernel_size=(1, 1), strides=2, padding='same', kernel_initializer=kernel_init)(x)  # Apply downsampling to original input
        res = BatchNormalization()(res)
    else:
        res = x  # When not downsampling, residual is just the output of the block
    
    # First convolution
    x = Conv2D(n_filters, kernel_size, strides=strides[0], padding='same', kernel_initializer=kernel_init)(x)
    x = BatchNormalization()(x)
    x = relu(x)
        
    # Second convolution
    x = Conv2D(n_filters, kernel_size, strides=strides[1], padding='same', kernel_initializer=kernel_init)(x)
    x = BatchNormalization()(x)
    
    
    
    # Add the residual connection (skip connection)
    x = Add()([x, res])
    x = relu(x)
    
    return x

# Create the ResNet18 model using the functional API
def ResNet18(input_shape=(32, 32, 3), n_classes=10):
    input_tensor = Input(shape=input_shape)
    
    # Initial part
    x = Conv2D(64, (3, 3), strides=1, padding='same', activation='relu', kernel_initializer='HeNormal')(input_tensor)
    x = BatchNormalization()(x)
    x = relu(x)
    
    # Blocks - 2 x 2 blocks x 4 stages of convolution layers
    x = ResNetBlock(x, 64, downsample=False)  # 64 filters, no downsampling
    x = ResNetBlock(x, 64, downsample=False)
    
    x = ResNetBlock(x, 128, downsample=True)  # 128 filters, with downsampling
    x = ResNetBlock(x, 128, downsample=False)
    
    x = ResNetBlock(x, 256, downsample=True)  # 256 filters, with downsampling
    x = ResNetBlock(x, 256, downsample=False)
    
    x = ResNetBlock(x, 512, downsample=True)  # 512 filters, with downsampling
    x = ResNetBlock(x, 512, downsample=False)

    # Final part
    x = GlobalAveragePooling2D()(x)
    output = Dense(n_classes, activation='softmax')(x)
    
    # Create the complete model
    model = Model(inputs=input_tensor, outputs=output)
    
    return model

In [7]:
#with strategy.scope():
batch_size = 128
datagen = ImageDataGenerator(
            featurewise_center=False,  # set input mean to 0 over the dataset
            samplewise_center=False,  # set each sample mean to 0
            featurewise_std_normalization=False,  # divide inputs by std of the dataset
            samplewise_std_normalization=False,  # divide each input by its std
            zca_whitening=False,  # apply ZCA whitening
            # rotation_range=15,  # randomly rotate images in the range (degrees, 0 to 180)
            width_shift_range=4,  # randomly shift images horizontally (fraction of total width)
            height_shift_range=4,  # randomly shift images vertically (fraction of total height)
            horizontal_flip=True,  # randomly flip images
            vertical_flip=False,  # randomly flip images
        )
print('Data Augmentation...')
train_gen = datagen.flow(train_images, train_labels, batch_size=batch_size)

#Build model, set optimizations
model = ResNet18()
model.build(input_shape=(None, 32, 32, 3)) #Cifar-10
model.summary()    
#optimizer = Adam(learning_rate=1e-2)
opt = SGD(learning_rate=0.1, momentum=0.9, weight_decay=1e-4)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])


es = EarlyStopping(patience=20, restore_best_weights=True, monitor="val_accuracy")
def lr_schedule(epoch, lr):
    if(epoch % 50 == 0):
        new_lr = 0.1 * lr
        print("Learning rate is", new_lr)
        return new_lr
    return lr

lr_scheduler = LearningRateScheduler(lr_schedule)

#fit and evaluate
history = model.fit(train_gen,
           batch_size=batch_size,
           epochs=200,
           verbose=1,
           validation_data=(test_images, test_labels),
           callbacks=[lr_scheduler])

print("Best inference accuracy, after early stopping:")
model.evaluate(test_images, test_labels)

model.save_weights("model_func_no_es.weights.h5")

Data Augmentation...


Learning rate is 0.010000000149011612
Epoch 1/200


  self._warn_if_super_not_called()
I0000 00:00:1734628240.224090    8838 service.cc:146] XLA service 0x7f52c02654d0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1734628240.224150    8838 service.cc:154]   StreamExecutor device (0): Host, Default Version
2024-12-19 18:10:40.397252: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1734628248.079030    8838 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m 46/391[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m19:36[0m 3s/step - accuracy: 0.2032 - loss: 2.3048

KeyboardInterrupt: 