[Imagenet (mini) 1000 dataset](https://www.kaggle.com/datasets/ifigotin/imagenetmini-1000)

[main paper](https://arxiv.org/pdf/2105.01601.pdf)

In [None]:
# download all required dependencies
! pip install datasets
! pip install vit-keras
! pip install tensorflow-addons
! pip install tensorflow tensorflow-hub
! pip install opencv-python

In [None]:
# import libraries
import os
import time
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense, LayerNormalization, Input, Add, Conv2D, Reshape, GlobalAveragePooling1D, Dropout, Flatten
from tensorflow.keras.activations import gelu
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from tensorflow.keras.datasets import mnist, cifar100
from tensorflow.keras.callbacks import Callback
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.regularizers import l2
from tensorflow.keras import backend as K
from tensorflow.keras.applications import VGG16
from sklearn.model_selection import train_test_split
from vit_keras import vit, utils
import matplotlib.pyplot as plt
import numpy as np
import shutil
from PIL import Image
from sklearn.metrics import accuracy_score
from datasets import load_dataset

In [None]:
# setup file directory
file_path = "/home/ecbm4040/e4040-2023fall-project-mlpm-hb2776-dg3370-amp2365"

# from google.colab import drive
# drive.mount('/content/drive')
import os
os.chdir(file_path)

In [None]:
from helper_functions import *
from mlp import *

In [None]:
# check availability of GPU
print(tf.__version__)

if tf.test.is_gpu_available():
    print("GPU is available.")
    print("Available GPUs:")
    for gpu in tf.config.list_physical_devices('GPU'):
        print(gpu)
else:
    print("CPU is available.")

# Fine tuning metrics

In [None]:
# Function for gradient clipping
def clip_norm(gradients, clip_value):
    # Clip gradients to a specified range
    return K.clip(gradients, -clip_value, clip_value)


# Learning Rate Scheduler
class CosineAnnealingScheduler(Callback):
    def __init__(self, T_max, eta_max, eta_min=0, verbose=0):
        # Cosine annealing learning rate scheduler
        super(CosineAnnealingScheduler, self).__init__()
        self.T_max = T_max
        self.eta_max = eta_max
        self.eta_min = eta_min
        self.verbose = verbose

    def on_epoch_begin(self, epoch, logs=None):
        # Callback at the beginning of each epoch
        if not hasattr(self.model.optimizer, 'lr'):
            raise ValueError('Optimizer must have a "lr" attribute.')
        lr = self.eta_min + 0.5 * (self.eta_max - self.eta_min) * (1 + np.cos(np.pi * epoch / self.T_max))
        K.set_value(self.model.optimizer.lr, lr)
        if self.verbose > 0:
            print('\nEpoch %05d: CosineAnnealingScheduler setting learning rate to %s.' % (epoch + 1, lr))

class WarmUpLearningRateScheduler(Callback):
    def __init__(self, warmup_batches, init_lr, verbose=0):
        # Warm-up learning rate scheduler
        super(WarmUpLearningRateScheduler, self).__init__()
        self.warmup_batches = warmup_batches
        self.init_lr = init_lr
        self.verbose = verbose
        self.current_batch = 0

    def on_batch_begin(self, batch, logs=None):
        # Callback at the beginning of each batch
        if self.current_batch <= self.warmup_batches:
            lr = self.current_batch * self.init_lr / self.warmup_batches
            K.set_value(self.model.optimizer.lr, lr)
            if self.verbose > 0:
                print('\nBatch %05d: WarmUpLearningRateScheduler setting learning rate to %s.' % (self.current_batch + 1, lr))
        self.current_batch += 1

# Set hyperparameters
warmup_batches = 500
init_lr = 0.001

# Create the callbacks for cosine annealing and warm-up
cosine_annealing = CosineAnnealingScheduler(T_max=100, eta_max=0.001, eta_min=0.0001, verbose=1)
warmup_lr = WarmUpLearningRateScheduler(warmup_batches=warmup_batches, init_lr=init_lr, verbose=1)

# Load the tiny-ImageNet Dataset

In [None]:
def plot_tiny_imnet_data(data, rows=10, cols=10):
    # Generate random indices to select images from the dataset
    random_indices = tf.random.uniform(shape=(rows*cols,), maxval=len(data), dtype=tf.int32)
    
    # Create subplots for displaying images
    fig, axes = plt.subplots(rows, cols, figsize=(20, 20))
    fig.suptitle("16 random Images from the dataset", fontsize=16)
    
    # Loop through subplots and display images
    for i, ax in enumerate(axes.flat):
      # Extract image data and convert to numpy array
      image = np.array((data[int(random_indices[i])]["image"]).convert('RGB'))
      ax.imshow(image)
      ax.set_title("{}".format(data[random_indices[i]]["label"]))
      ax.axis('off')
    
    # Adjust subplot spacing
    plt.subplots_adjust(wspace=0.7, hspace=0.1)
    plt.show()


def tiny_imagenet_data_generator(data, batch_size=32, shuffle=True):
  # Define the target image size
  target_size = (224, 224, 3)
  indices = np.arange(len(data))
    
  # Create an array of indices for the dataset
  while True:
    # Shuffle the indices if required
    if shuffle:
      np.random.shuffle(indices)

    for i in range(0, len(indices), batch_size):
      # Extract batch indices and corresponding data
      batch_indices = np.array(indices[i:i+batch_size]).astype('int16')
      batch_data = [data[int(idx)] for idx in batch_indices]
      # Resize and preprocess images
      batch_images = [np.array(image['image'].convert('RGB').resize(target_size[:2])) for image in batch_data]
      batch_labels = [image['label'] for image in batch_data]
      
      # Extract and convert batch labels      
      batch_images = np.stack(batch_images)
      batch_images = batch_images.astype('float32') / 255.0
      batch_labels = np.array(batch_labels).astype('int16')
      
      yield batch_images, batch_labels

#  Define the number of classes and shape of images
tinyimagenet_num_classes = 200
tinyimagenet_shape = (64, 64, 3)

In [None]:
start_time = time.time()

# Load the Tiny Imagenet dataset
tiny_imagenet_train = load_dataset('Maysee/tiny-imagenet', split='train')
tiny_imagenet_valid = load_dataset('Maysee/tiny-imagenet', split='valid')

# Calculate and print the time taken to download and split the Tiny Imagenet data
print("Downloading and splitting the tiny Imagenet data takes {} seconds".format(round(time.time() - start_time, 4)))

# Load the CIFAR-100 dataset

In [None]:
# Load CIFAR-100 dataset
(cifar_x_train, cifar_y_train), (cifar_x_test, cifar_y_test) = cifar100.load_data()

# Check if the input images are grayscale or color
if len(cifar_x_train.shape) == 3:
  cifar_input_shape = (cifar_x_train.shape[1], cifar_x_train.shape[2], 1)
elif len(cifar_x_train.shape) == 4:
  cifar_input_shape = cifar_x_train.shape[1:]

# Store input, output, and class information
cifar_train_input_shape = cifar_x_train.shape
cifar_test_input_shape = cifar_x_test.shape
cifar_output_shape = cifar_y_train.shape
cifar_output_classes = np.unique(cifar_y_train)
cifar_num_classes = len(cifar_output_classes)

# Print dataset information
print("Shape of training data:", cifar_train_input_shape)
print("Shape of testing data:", cifar_test_input_shape)
print("Number of Output classes:", cifar_num_classes)
print("Output classes:", cifar_output_classes)

# Define a CIFAR data generator function
def cifar_data_generator(data, labels, batch_size=32, shuffle=True):
    # Define the target image size for resizing
    target_size = (224, 224, 3)
    
    # Create an array of indices for the dataset
    indices = np.arange(len(data))

    while True:
      if shuffle:
        # Shuffle the indices if required
        np.random.shuffle(indices)

      for i in range(0, len(indices), batch_size):
        # Extract batch indices and resize images
        batch_images = tf.image.resize(data[i: i + batch_size, ] / 255.0, target_size[:2]).numpy()
        batch_labels = labels[i: i + batch_size]

        yield batch_images, batch_labels

# VGG model

In [None]:
def create_vgg(output_class):
  # Load the pre-trained VGG16 model with ImageNet weights
  base_model = VGG16(
      include_top=True,
      weights="imagenet",
      input_tensor=None,
      input_shape=None,
      pooling=None,
      classes=1000,
      classifier_activation="softmax",
  )
  
  # Freeze the layers of the base model
  for layer in base_model.layers:
      layer.trainable = False
  
  # Create a custom top layer for the model
  x = (base_model.output)
  x = Flatten()(x)
  x = Dense(512, activation='relu')(x)
  x = Dense(256, activation='relu')(x)
    
  # Output layer with the specified number of classes and softmax activation  
  output = Dense(output_class, activation='softmax')(x)
  
  # Create the final model with the base model as the input and custom top layer as output
  return Model(inputs=base_model.input, outputs=output)

# VGG16 on tiny image net

In [None]:
# Set the clipping value for gradient clipping
clip_value = 1.0

# Create a VGG16-based model for Tiny Imagenet with the specified number of classes
vgg16_tiny_imnet_model = create_vgg(tinyimagenet_num_classes)

# Compile the model with Adam optimizer and sparse categorical crossentropy loss
vgg16_tiny_imnet_model.compile(optimizer=Adam(learning_rate=0.001, clipnorm=clip_value), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Display the model summary
vgg16_tiny_imnet_model.summary()

# Create data generators for training and validation from Tiny Imagenet datasets
vgg_tinyimnet_train_data = tiny_imagenet_data_generator(tiny_imagenet_train)
vgg_tinyimnet_val_data = tiny_imagenet_data_generator(tiny_imagenet_valid)

# Train the model with specified callbacks and evaluate on training and validation sets
start_time = time.time()
vgg_tiny_imnet_history = vgg16_tiny_imnet_model.fit(vgg_tinyimnet_train_data,
                                                    epochs=10,
                                                    steps_per_epoch=len(tiny_imagenet_train) // 32 + 1,
                                                    validation_data=vgg_tinyimnet_val_data,
                                                    validation_steps=len(tiny_imagenet_valid) // 32 + 1,
                                                    callbacks=[cosine_annealing, warmup_lr])
vgg_tiny_imnet_time = time.time() - start_time
print("Time to fine-tune VGG16 on Tiny Imagenet dataset: {}".format(round(vgg_tiny_imnet_time, 4)))

# Evaluate the model on training and validation datasets
vgg_tinyimnet_train_result = vgg16_tiny_imnet_model.evaluate(vgg_tinyimnet_train_data, steps=len(tiny_imagenet_train) // 32 + 1)
vgg_tinyimnet_val_result = vgg16_tiny_imnet_model.evaluate(vgg_tinyimnet_val_data, steps=len(tiny_imagenet_valid) // 32 + 1)

# Display training and validation accuracies and losses
print('')
print('Training Accuracy: {}% and Training Loss: {}'.format(round(vgg_tinyimnet_train_result[1] * 100, 2), round(vgg_tinyimnet_train_result[0] * 100, 2)))
print('Testing Accuracy: {}% and Testing Loss: {}'.format(round(vgg_tinyimnet_val_result[1] * 100, 2), round(vgg_tinyimnet_val_result[0]  * 100, 2)))
print('')

# Save the model and training history
save_data(file_path, vgg16_tiny_imnet_model, vgg_tiny_imnet_history, "vgg16_tiny_imnet")

# Delete variables to free up memory
del vgg16_tiny_imnet_model, vgg_tiny_imnet_history, vgg_tinyimnet_train_result, vgg_tinyimnet_val_result

# VGG 16 on CIFAR

In [None]:
# Set the clipping value and batch size
clip_value = 1.0
batch_size = 32

# Create VGG16 model for CIFAR dataset
vgg16_cifar_model = create_vgg(cifar_num_classes)
vgg16_cifar_model.compile(optimizer=Adam(learning_rate=0.001, clipnorm=clip_value), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
vgg16_cifar_model.summary()

# Create data generators for training and validation sets
vgg_cifar_train_data = cifar_data_generator(cifar_x_train, cifar_y_train, batch_size=batch_size, shuffle=True)
vgg_cifar_val_data = cifar_data_generator(cifar_x_test, cifar_y_test, batch_size=batch_size, shuffle=True)

# Train the VGG16 model on CIFAR dataset
start_time = time.time()
vgg_cifar_history = vgg16_cifar_model.fit(vgg_cifar_train_data,
                                          epochs=10,
                                          steps_per_epoch=len(cifar_x_train) // 32 + 1,
                                          validation_data=vgg_cifar_val_data,
                                          validation_steps=len(cifar_x_test) // 32 + 1,
                                          callbacks=[cosine_annealing, warmup_lr])

# Calculate and print the time taken for training
vgg_cifar_time = time.time() - start_time
print("Time to fine-tune VGG16 on CIFAR dataset: {}".format(round(vgg_cifar_time, 4)))


In [None]:
# Evaluate the VGG16 model on training and validation data
vgg_cifar_train_result = vgg16_cifar_model.evaluate(vgg_cifar_train_data, steps=len(vgg_cifar_train_data) // 32 + 1)
vgg_cifar_val_result = vgg16_cifar_model.evaluate(vgg_cifar_val_data, steps=len(vgg_cifar_val_data) // 32 + 1)

# Print the evaluation results
print('')
print('Training Accuracy: {}% and Training Loss: {}'.format(round(vgg_cifar_train_result[1] * 100, 2), round(vgg_cifar_train_result[0] * 100, 2)))
print('Testing Accuracy: {}% and Testing Loss: {}'.format(round(vgg_cifar_val_result[1] * 100, 2), round(vgg_cifar_val_result[0]  * 100, 2)))
print('')

# Save the model data
save_data(file_path, vgg16_cifar_model, vgg_cifar_history, "vgg16_cifar_imnet")

# Delete variables to free up memory
del vgg16_cifar_model, vgg_cifar_history, vgg_cifar_train_result, vgg_cifar_val_result


# ViT model

In [None]:
# Function to create a Vision Transformer (ViT) model with a specified output class
def create_vit(output_class):
  # Load ViT-B16 model with specific configurations
  base_model = vit.vit_b16(
      image_size=224,
      activation='sigmoid',
      pretrained=True,
      include_top=False,
      pretrained_top=False,
  )

  # Freeze the layers of the base model
  for layer in base_model.layers:
      layer.trainable = False

  # Additional layers for classification
  x = (base_model.output)
  x = Flatten()(x)
  x = Dense(256, activation='relu')(x)
  x = Dropout(0.5)(x)
  output = Dense(output_class, activation='softmax', kernel_regularizer=l2(0.002))(x)

  # Create and return the complete model
  return Model(inputs=base_model.input, outputs=output)


# ViT on tiny-ImageNet

In [None]:
# Set the value for gradient clipping
clip_value = 1.0

# Create a Vision Transformer (ViT) model for Tiny Imagenet classification
vit_tiny_imnet_model = create_vit(tinyimagenet_num_classes)

# Compile the ViT model with Adam optimizer and specified gradient clipping
vit_tiny_imnet_model.compile(optimizer=Adam(learning_rate=0.001, clipnorm=clip_value), 
                             loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Display the summary of the ViT model
vit_tiny_imnet_model.summary()

# Generate data using the Tiny Imagenet data generator
vit_tinyimnet_train_data = tiny_imagenet_data_generator(tiny_imagenet_train)
vit_tinyimnet_val_data = tiny_imagenet_data_generator(tiny_imagenet_valid)

# Train the ViT model on the Tiny Imagenet dataset
start_time = time.time()
vit_tiny_imnet_history = vit_tiny_imnet_model.fit(vit_tinyimnet_train_data,
                                                  epochs=10,
                                                  steps_per_epoch=len(tiny_imagenet_train) // 32 + 1,
                                                  validation_data=vit_tinyimnet_val_data,
                                                  validation_steps=len(tiny_imagenet_valid) // 32 + 1,
                                                  callbacks=[cosine_annealing, warmup_lr])

# Calculate the time taken for training
vit_tinyimnet_time = time.time() - start_time
print("Time to fine-tune ViT on Tiny Imagenet dataset: {}".format(round(vit_tinyimnet_time, 4)))

# Evaluate the performance on training and validation sets
vit_tinyimnet_train_result = vit_tiny_imnet_model.evaluate(vit_tinyimnet_train_data, steps=len(tiny_imagenet_train) // 32 + 1)
vit_tinyimnet_val_result = vit_tiny_imnet_model.evaluate(vit_tinyimnet_val_data, steps=len(tiny_imagenet_valid) // 32 + 1)

# Display the training and validation metrics
print('')
print('Training Accuracy: {}% and Training Loss: {}'.format(round(vit_tinyimnet_train_result[1] * 100, 2), round(vit_tinyimnet_train_result[0] * 100, 2)))
print('Testing Accuracy: {}% and Testing Loss: {}'.format(round(vit_tinyimnet_val_result[1] * 100, 2), round(vit_tinyimnet_val_result[0]  * 100, 2)))
print('')

# Save the model data
save_data(file_path, vit_tiny_imnet_model, vit_tiny_imnet_history, "vit_tiny_imnet")

# Clear variables from memory
del vit_tiny_imnet_model, vit_tiny_imnet_history, vit_tinyimnet_train_result, vit_tinyimnet_val_result

# ViT on CIFAR-100


In [None]:
# Set the value for gradient clipping
clip_value = 1.0

# Create a Vision Transformer (ViT) model for CIFAR classification
vit_cifar_model = create_vit(cifar_num_classes)

# Compile the ViT model with Adam optimizer and specified gradient clipping
vit_cifar_model.compile(optimizer=Adam(learning_rate=0.001, clipnorm=clip_value), 
                        loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Display the summary of the ViT model
vit_cifar_model.summary()

# Generate data using the CIFAR data generator
vit_cifar_train_data = cifar_data_generator(cifar_x_train, cifar_y_train, batch_size=batch_size, shuffle=True)
vit_cifar_val_data = cifar_data_generator(cifar_x_test, cifar_y_test, batch_size=batch_size, shuffle=True)

# Train the ViT model on the CIFAR dataset
start_time = time.time()
vit_cifar_history = vit_cifar_model.fit(vit_cifar_train_data,
                                       epochs=10,
                                       steps_per_epoch=len(cifar_x_train) // 32 + 1,
                                       validation_data=vit_cifar_val_data,
                                       validation_steps=len(cifar_x_test) // 32 + 1,
                                       callbacks=[cosine_annealing, warmup_lr])

# Calculate the time taken for training
vit_cifar_time = time.time() - start_time
print("Time to fine-tune ViT on CIFAR dataset: {}".format(round(vit_cifar_time, 4)))

# Evaluate the performance on training and testing sets
vit_cifar_train_result = vit_cifar_model.evaluate(cifar_x_train, cifar_y_train)
vit_cifar_val_result = vit_cifar_model.evaluate(cifar_x_test, cifar_y_test)

# Display the training and testing metrics
print('')
print('Training Accuracy: {}% and Training Loss: {}'.format(round(vit_cifar_train_result[1] * 100, 2), round(vit_cifar_train_result[0] * 100, 2)))
print('Testing Accuracy: {}% and Testing Loss: {}'.format(round(vit_cifar_val_result[1] * 100, 2), round(vit_cifar_val_result[0]  * 100, 2)))
print('')

# Save the model data
save_data(file_path, vit_cifar_model, vit_cifar_history, "vit_cifar")

# Clear variables from memory
del vit_cifar_model, vit_cifar_history, vit_cifar_train_result, vit_cifar_val_result