# Improving lightweight vision models using knowledge distillation

Below is the training script interface we created to train multiple models for our experiments. All experiment metrics are saved in wandb dashboard for reference. This scripts allows us to periodically train each model once and avoid GPU usage limitation imposed by Google Colab. 

In [None]:
from google.colab import drive
import os

drive.mount('/content/drive')

#IMPORTANT
#please make sure you direct the directory to where you save the folder. 
#Make sure you direct it such that you are *inside* the folder containig all the scripts else you cant run pip install below.
os.chdir('/content/drive/MyDrive/source/image-classification-KD-main')
#you can search the folder path by clicking the folder icon at the ride side of this notebook
#navigate to the folder containing all the scripts in the drive folder and then copy the path by clicking the small three dots icon that appears when you hover your cursor on the folder 

In [None]:
!pip install -r requirements.txt #run once
#after installing all dependencies you may be asked to restart the runtime. please do so and then dont rerun this code block anymore
#you still need to rerun the first code block above to mount your google drive again. 

In [None]:
from run import cli_main
from watermark import watermark
import torch
import gc

print(watermark(packages="torch,lightning,wandb,torchvision,torchmetrics,jsonargparse", python=True))

# Model Training 

In [None]:
#Interface to run finetuning proccess
#change the arguments aka hyperparameters below accordingly to run your desired training.
train_args = {
    "trainer": {
        "accelerator": "gpu", #make sure GPU is turn on else change it to "cpu"
        "fast_dev_run": False, #for debugging purposes, ignore this.
        "log_every_n_steps": 5, #step interval to log metrics
        "logger":{
                "name": "resnet18-test", #change this according to model_name
                "project": "image-classification-KD",
            },
        "precision": "16-mixed", #if training on "cpu", change this to "32-true".
        "max_epochs": 6,
    },
    "model": {
        "class_path": "src.training.LightningTraining",
        "init_args": {
            #model to train. other arguments include "resnet50", "resnet101", "resnet152", "vit_l_32", "regnet_y_16gf", "swin_s"
            #To train a model other than those listed above, please visit https://pytorch.org/vision/stable/models.html or 
            #run torchvision.models.list_models() to retrieve the list of available model weights.
            "model_name": "resnet18", 
            "dropout_rates": 0.4,
            "learning_rate": 0.01,
            "momentum": 0.9,
            "nesterov": True,
            "weight_decay":1e-2,
            "T_max": 10 #parameters to control Cosine Annealing learning scheduler
        }
    },
    "data": {
        "batch_size": 128,
        "num_workers": os.cpu_count(),
    },
}

In [None]:
# Keep your wandb api key on hand, as you will be asked to insert it as soon as you run this code block.
# Main Training
cli_main(train_args)
#to clear up gpu for training the next model
torch.cuda.empty_cache() 
gc.collect()

# Distillation Training

In [None]:
#change the arguments aka hyperparameters accordingly to run your desired training.
distil_args = {
    "trainer": {
        #arguments same as above
        "accelerator": "gpu",
        "fast_dev_run": False,
        "log_every_n_steps": 5,
        "logger":{
                "name": "resnet50x18", #change this according to model_name
                "project": "image-classification-KD",
            },
        "precision": "16", 
        "max_epochs": 6,
    },
    #this is where the arguments difer than those above
     "model": {
        "class_path": "src.training.DistilledTraining",
        "init_args": {
            "model_name": "resnet18", #student model
            "teacher_model_name": "resnet50", #teacher model other arguments include "resnet101", "resnet152", "vit_l_32", "regnet_y_16gf", "swin_s"
            #after training a teacher model on the dataset using the code block above, we save the model as wandb artifact. 
            #Please look at the artifact section of your wandb dashboard and look for the artifact path of your trained teacher model then copy it 
            #the format of articact path should look similar to the one below.
            "artifact_path": "st311-project/image-classification-KD/model-19ra9jz6:v0", 
            "dropout_rates": 0.4,
            "alpha": 0.5,
            "temperature": 2.0,
            "learning_rate": 0.01,
            "momentum": 0.9,
            "nesterov": True,
            "weight_decay":1e-3,
            "T_max": 10
        },
    },
    "data": {
        "batch_size": 128,
        "num_workers": os.cpu_count(),
    },
   
}

In [None]:
# Distillation Training
cli_main(distil_args)
#to clear up gpu for training the next model
torch.cuda.empty_cache() 
gc.collect()