In [1]:
%%bash
pip install --upgrade pip
pip install numpy matplotlib keras tensorflow[and-cuda]

Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable






[0m

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from keras.datasets import cifar10
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D, Dense, Flatten, BatchNormalization, Add, Input, ReLU
from keras.models import Sequential
from keras.utils import to_categorical
from keras.regularizers import l2
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, LearningRateScheduler
from keras.models import Model
from keras.optimizers import Adam, SGD
from tensorflow.keras.preprocessing.image import ImageDataGenerator

2024-12-26 12:19:27.765828: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-12-26 12:19:27.938241: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-12-26 12:19:29.130945: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64
2024-12-26 12:19:29.131085

In [2]:
# if you want to use gpus
tf.debugging.set_log_device_placement(False)
gpus = tf.config.list_physical_devices('GPU')
index_to_use = [0, 1] # add more depending on the server
device_names = [f'/GPU:{i}' for i in index_to_use]
strategy = tf.distribute.MirroredStrategy(devices=device_names)

2024-12-26 12:19:35.720070: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 20682 MB memory:  -> device: 0, name: NVIDIA A10, pci bus id: 0000:17:00.0, compute capability: 8.6
2024-12-26 12:19:35.722265: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 20682 MB memory:  -> device: 1, name: NVIDIA A10, pci bus id: 0000:ca:00.0, compute capability: 8.6


INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1')


In [4]:
#using only CPU
tf.debugging.set_log_device_placement(False)
# Disable all GPUs
tf.config.set_visible_devices([], 'GPU')

In [3]:
#CIFAR-10 dataset
(train_images, train_labels), (test_images, test_labels) = cifar10.load_data()
print(train_images.shape)
train_images = train_images.reshape(train_images.shape[0], 32, 32, 3) #ensure shape 32 W x 32 H x 3 channels for each image
test_images = test_images.reshape(test_images.shape[0], 32, 32, 3)

#range 0-1
train_images = train_images.astype('float32') / 255.0
test_images = test_images.astype('float32') / 255.0

#One-hot encoding labels
train_labels = to_categorical(train_labels, 10)
test_labels = to_categorical(test_labels, 10)

(50000, 32, 32, 3)


In [4]:
# functional API instead:
# Define ResNetBlock as a function
def ResNetBlock(x, n_filters, kernel_size=(3, 3), kernel_init='HeNormal', downsample=False):
    strides = [2, 1] if downsample else [1, 1]
    
    # Residual connection - if downsampling, apply to the original input
    if downsample:
        res = Conv2D(n_filters, kernel_size=(1, 1), strides=2, padding='same', kernel_initializer=kernel_init)(x)  # Apply downsampling to original input
        res = BatchNormalization()(res)
    else:
        res = x  # When not downsampling, residual is just the output of the block
    
    # First convolution
    x = Conv2D(n_filters, kernel_size, strides=strides[0], padding='same', kernel_initializer=kernel_init)(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)
        
    # Second convolution
    x = Conv2D(n_filters, kernel_size, strides=strides[1], padding='same', kernel_initializer=kernel_init)(x)
    x = BatchNormalization()(x)
    
    
    
    # Add the residual connection (skip connection)
    x = Add()([x, res])
    x = ReLU()(x)
    
    return x

# Create the ResNet18 model using the functional API
def ResNet18(input_shape=(32, 32, 3), n_classes=10):
    input_tensor = Input(shape=input_shape)
    
    # Initial part
    x = Conv2D(64, (3, 3), strides=1, padding='same', activation='relu', kernel_initializer='HeNormal')(input_tensor)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    
    # Blocks - 2 x 2 blocks x 4 stages of convolution layers
    x = ResNetBlock(x, 64, downsample=False)  # 64 filters, no downsampling
    x = ResNetBlock(x, 64, downsample=False)
    
    x = ResNetBlock(x, 128, downsample=True)  # 128 filters, with downsampling
    x = ResNetBlock(x, 128, downsample=False)
    
    x = ResNetBlock(x, 256, downsample=True)  # 256 filters, with downsampling
    x = ResNetBlock(x, 256, downsample=False)
    
    x = ResNetBlock(x, 512, downsample=True)  # 512 filters, with downsampling
    x = ResNetBlock(x, 512, downsample=False)

    # Final part
    x = GlobalAveragePooling2D()(x)
    output = Dense(n_classes, activation='softmax')(x)
    
    # Create the complete model
    model = Model(inputs=input_tensor, outputs=output)
    
    return model

In [9]:
with strategy.scope():
    batch_size = 128
    #following the paper
    datagen = ImageDataGenerator(
                featurewise_center=False,  # set input mean to 0 over the dataset
                samplewise_center=False,  # set each sample mean to 0
                featurewise_std_normalization=False,  # divide inputs by std of the dataset
                samplewise_std_normalization=False,  # divide each input by its std
                zca_whitening=False,  # apply ZCA whitening
                # rotation_range=15,  # randomly rotate images in the range (degrees, 0 to 180)
                width_shift_range=4,  # randomly shift images horizontally (fraction of total width)
                height_shift_range=4,  # randomly shift images vertically (fraction of total height)
                horizontal_flip=True,  # randomly flip images
                vertical_flip=False,  # randomly flip images
            )
    print('Data Augmentation...')
    train_gen = datagen.flow(train_images, train_labels, batch_size=batch_size)

    #Build model, set optimizations
    model = ResNet18()
    model.build(input_shape=(None, 32, 32, 3)) #Cifar-10
    model.summary()    
    #opt = Adam(learning_rate=1e-2)
    opt = SGD(learning_rate=0.1, momentum=0.9, decay=1e-4)
    model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])


    #es = EarlyStopping(patience=20, restore_best_weights=True, monitor="val_accuracy")
    checkpoint = ModelCheckpoint(
        filepath='best_model_schedule.h5',    
        monitor='val_accuracy',      
        save_best_only=True,         
        mode='max',                  
        verbose=True
    )
    
    def lr_schedule(epoch, lr):
        if(epoch % 100 == 0):
            new_lr = 0.1 * lr
            print("Learning rate is", new_lr)
            return new_lr
        return lr

    lr_scheduler = LearningRateScheduler(lr_schedule)

    #fit and evaluate
    history = model.fit(train_gen,
               batch_size=batch_size,
               epochs=200,
               verbose=1,
               validation_data=(test_images, test_labels),
               callbacks=[checkpoint, lr_scheduler])

Data Augmentation...
Model: "model_4"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_5 (InputLayer)           [(None, 32, 32, 3)]  0           []                               
                                                                                                  
 conv2d_80 (Conv2D)             (None, 32, 32, 64)   1792        ['input_5[0][0]']                
                                                                                                  
 batch_normalization_80 (BatchN  (None, 32, 32, 64)  256         ['conv2d_80[0][0]']              
 ormalization)                                                                                    
                                                                                                  
 re_lu_68 (ReLU)                (None, 32, 32, 64)   0           ['batc

2024-12-26 14:15:48.974703: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:784] AUTO sharding policy will apply DATA sharding policy as it failed to apply FILE sharding policy because of the following reason: Found an unshardable source dataset: name: "TensorDataset/_1"
op: "TensorDataset"
input: "Placeholder/_0"
attr {
  key: "Toutput_types"
  value {
    list {
      type: DT_INT32
    }
  }
}
attr {
  key: "_cardinality"
  value {
    i: 1
  }
}
attr {
  key: "metadata"
  value {
    s: "\n\022TensorDataset:5646"
  }
}
attr {
  key: "output_shapes"
  value {
    list {
      shape {
      }
    }
  }
}
experimental_type {
  type_id: TFT_PRODUCT
  args {
    type_id: TFT_DATASET
    args {
      type_id: TFT_PRODUCT
      args {
        type_id: TFT_TENSOR
        args {
          type_id: TFT_INT32
        }
      }
    }
  }
}



Learning rate is 0.010000000149011612
Epoch 1/200
INFO:tensorflow:batch_all_reduce: 82 all-reduces with algorithm = nccl, num_packs = 1
INFO:tensorflow:batch_all_reduce: 82 all-reduces with algorithm = nccl, num_packs = 1
Epoch 1: val_accuracy improved from -inf to 0.51430, saving model to best_model_schedule.h5
Epoch 2/200
Epoch 2: val_accuracy improved from 0.51430 to 0.64500, saving model to best_model_schedule.h5
Epoch 3/200
Epoch 3: val_accuracy improved from 0.64500 to 0.65680, saving model to best_model_schedule.h5
Epoch 4/200
Epoch 4: val_accuracy improved from 0.65680 to 0.65880, saving model to best_model_schedule.h5
Epoch 5/200
Epoch 5: val_accuracy improved from 0.65880 to 0.71990, saving model to best_model_schedule.h5
Epoch 6/200
Epoch 6: val_accuracy did not improve from 0.71990
Epoch 7/200
Epoch 7: val_accuracy improved from 0.71990 to 0.72780, saving model to best_model_schedule.h5
Epoch 8/200
Epoch 8: val_accuracy improved from 0.72780 to 0.76610, saving model to best