<div style="display:block" direction=rtl align=right><br><br>
    <div  style="width:100%;margin:100;display:block"  display=block align=center>
        <img width=130 align=right src="https://i.ibb.co/yXKQmtZ/logo1.png" style="margin:0;" />
        <img width=170 align=left  src="https://i.ibb.co/wLjqFkw/logo2.png" style="margin:0;" />
        <span><br><font size=5>University of Tehran , school of ECE</font></span>
        <span><br><font size=3>Deep Learning</font></span>
        <span><br><font size=3>Spring 2023</font></span>
    </div><br><br><br>
    <div style="display:block" align=left display=block> 
        <font size=3>Pytorch tutorial - Learning(Part2)</font><br>
        <hr />
        <font size=3>TA: <a href="mailto:farshads7778@gmail.com">Farshad Sangari</a></font><br>
    </div>
</div>

# Import libraries

In [11]:
import os
import glob
import cv2
from PIL import Image


import torch
import torchvision
from torchvision import transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import  Dataset
from torch.optim.lr_scheduler import StepLR

import pandas as pd
import random
from tqdm import tqdm
import matplotlib.pyplot as plt


# Defined data on torchvision

## Load data

In [12]:
DIR_TRAIN = "./data/CIFAR10/train/"
DIR_VAL = "./data/CIFAR10/val/"

In [13]:
classes = os.listdir(DIR_TRAIN)
print("Total Classes: ", len(classes))

train_imgs = []
val_imgs  = []
for _class in classes:
    train_imgs += glob.glob(DIR_TRAIN + _class + '/*.jpg')
    val_imgs += glob.glob(DIR_VAL + _class + '/*.jpg')

print("\nTotal train images: ", len(train_imgs))
print("Total test images: ", len(val_imgs))

Total Classes:  10

Total train images:  50000
Total test images:  10000


#### Load data

In [14]:
cifar_transforms_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize((0.49139968, 0.48215827 ,0.44653124), (0.24703233,0.24348505,0.26158768))])

cifar_transforms_val = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.49139968, 0.48215827 ,0.44653124), (0.24703233,0.24348505,0.26158768))])

In [15]:
class CIFAR10Dataset(Dataset):
    def __init__(self, imgs_list, classes, transforms=None):
        super(CIFAR10Dataset, self).__init__()
        self.imgs_list = imgs_list
        self.class_to_int = {classes[i] : i for i in range(len(classes))}
        self.transforms = transforms
    def __getitem__(self, index):
    
        image_path = self.imgs_list[index]
        
        # Reading image
        image = Image.open(image_path)
        
        # Retriving class label
        label = image_path.split("/")[-2]
        label = self.class_to_int[label]
        
        # Applying transforms on image
        if self.transforms is not None:
            image = self.transforms(image)
        else:
            image = transforms.ToTensor()(image)
        return image, label
        

    def __len__(self):
        return len(self.imgs_list)

In [16]:
train_dataset = CIFAR10Dataset(imgs_list = train_imgs, classes = classes, transforms = cifar_transforms_train)
val_dataset = CIFAR10Dataset(imgs_list = val_imgs, classes = classes, transforms = cifar_transforms_val)

In [17]:
cifar_train_loader = torch.utils.data.DataLoader(train_dataset,
                                                batch_size=1024,
                                                shuffle=True)

cifar_val_loader = torch.utils.data.DataLoader(val_dataset,
                                                batch_size=1024,
                                                shuffle=False)

## Model

### Custom model

In [18]:
class ModelCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.feature_extractor = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.LeakyReLU(0.2,inplace=True),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.LeakyReLU(0.2,inplace=True),
            nn.MaxPool2d(2, 2), # output: 64 x 16 x 16

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.LeakyReLU(0.2,inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
            nn.LeakyReLU(0.2,inplace=True),
            nn.MaxPool2d(2, 2), # output: 128 x 8 x 8

            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.LeakyReLU(0.2,inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.LeakyReLU(0.2,inplace=True),
            nn.MaxPool2d(2, 2) # output: 256 x 4 x 4
            )
        
        self.feature_extractor[0].weight.data = torch.nn.init.xavier_normal_(self.feature_extractor[0].weight.data,
                                                                     gain = torch.nn.init.calculate_gain("leaky_relu"))
        
        ## Bias --> Standard distribution
        self.feature_extractor[0].bias.data = torch.randn(self.feature_extractor[0].bias.data.shape)
        
        self.flat = nn.Flatten()

        self.classifier = nn.Sequential(
            nn.Linear(256*4*4, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 10))
        
    def forward(self, x):
        x = self.feature_extractor(x)
        # x = self.flat(x)
        x = x.view(-1,256*4*4)
        x = self.classifier(x)
        return x

# Training

## Training process

In [9]:
def accuracy(output, target, topk=(1,)):
    """
    Computes the accuracy over the k top predictions for the specified values of k
    """
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)
        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))
        res = []
        for k in topk:
            # correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
            correct_k = correct[:k].float().sum()
            res.append(correct_k.mul_(100.0 / batch_size).item())
        return res

### Simple training

In [19]:
torch.cuda.is_available()

True

In [None]:
batch_size = 256
epochs = 10
learning_rate = 1e-3
gamma=0.5
step_size=10
ckpt_save_freq = 10

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


cifar_train_loader = torch.utils.data.DataLoader(train_dataset,
                                                batch_size=batch_size,
                                                shuffle=True)

cifar_val_loader = torch.utils.data.DataLoader(val_dataset,
                                                batch_size=batch_size,
                                                shuffle=False)


model = ModelCNN().to(device)



# loss function
criterion = nn.CrossEntropyLoss()
# optimzier
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

top1_acc_train = []
top1_acc_val = []
loss_avg_train = []
loss_avg_val = []

for epoch in range(1, epochs + 1):
    sum_train_acc_epoch = 0
    sum_val_acc_epoch = 0
    sum_train_loss_epoch = 0
    sum_val_loss_epoch = 0
    
    model.train()
    mode = "train"
    for batch_idx, (images, labels) in enumerate(cifar_train_loader,1):
        images = images.to(device)
        labels = labels.to(device)
        labels_pred = model(images)
        loss = criterion(labels_pred, labels)
        
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        acc1 = accuracy(labels_pred, labels)
        sum_train_acc_epoch += acc1[0]
        sum_train_loss_epoch += loss.detach().item()
        if batch_idx % int(round(len(cifar_train_loader)/6)) ==0:
            print(f"At epoch {epoch}, average accuracy till batch_index --> {batch_idx}: {sum_train_acc_epoch/batch_idx}")
    top1_acc_train.append(sum_train_acc_epoch / batch_idx)
    loss_avg_train.append(sum_train_loss_epoch / batch_idx)
    
    
    model.eval()
    mode = "val"
    with torch.no_grad():        
        for batch_idx, (images, labels) in enumerate(cifar_val_loader,1):
            images = images.to(device)
            labels = labels.to(device)
            labels_pred = model(images)
            loss = criterion(labels_pred, labels)
            acc1 = accuracy(labels_pred, labels)
            sum_val_acc_epoch += acc1[0]
            sum_val_loss_epoch += loss.detach().item()

        top1_acc_val.append(sum_val_acc_epoch / batch_idx)
        loss_avg_val.append(sum_val_loss_epoch / batch_idx)

### Training utilities

In [13]:
class AverageMeter(object):
    """
    computes and stores the average and current value
    """

    def __init__(self, start_val=0, start_count=0, start_avg=0, start_sum=0):
        self.reset()
        self.val = start_val
        self.avg = start_avg
        self.sum = start_sum
        self.count = start_count

    def reset(self):
        """
        Initialize 'value', 'sum', 'count', and 'avg' with 0.
        """
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, num=1):
        """
        Update 'value', 'sum', 'count', and 'avg'.
        """
        self.val = val
        self.sum += val * num
        self.count += num
        self.avg = self.sum / self.count


def save_model(file_path, file_name, model, optimizer=None):
    """
    In this function, a model is saved.Usually save model after training in each epoch.
    ------------------------------------------------
    Args:
        - model (torch.nn.Module)
        - optimizer (torch.optim)
        - file_path (str): Path(Folder) for saving the model
        - file_name (str): name of the model checkpoint to save
    """
    state_dict = dict()
    state_dict["model"] = model.state_dict()

    if optimizer is not None:
        state_dict["optimizer"] = optimizer.state_dict()
    torch.save(state_dict, os.path.join(file_path, file_name))


def load_model(ckpt_path, model, optimizer=None):
    """
    Loading a saved model and optimizer (from checkpoint)
    """
    checkpoint = torch.load(ckpt_path, map_location=torch.device("cpu"))
    model.load_state_dict(checkpoint["model"])
    if (optimizer != None) & ("optimizer" in checkpoint.keys()):
        optimizer.load_state_dict(checkpoint["optimizer"])
    return model, optimizer


def accuracy(output, target, topk=(1,)):
    """
    Computes the accuracy over the k top predictions for the specified values of k
    """
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)
        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))
        res = []
        for k in topk:
            # correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
            correct_k = correct[:k].float().sum()
            res.append(correct_k.mul_(100.0 / batch_size).item())
        return res


In [14]:
def train(
    train_loader,
    val_loader,
    model,
    model_name,
    epochs,
    learning_rate,
    gamma,
    step_size,
    device,
    load_saved_model,
    ckpt_save_freq,
    ckpt_save_path,
    ckpt_path,
    report_path,
):

    model = model.to(device)

    # loss function
    criterion = nn.CrossEntropyLoss()

    # optimzier
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    if load_saved_model:
        model, optimizer = load_model(
            ckpt_path=ckpt_path, model=model, optimizer=optimizer
        )

    lr_scheduler = StepLR(optimizer, step_size=step_size, gamma=gamma)
    report = pd.DataFrame(
        columns=[
            "model_name",
            "mode",
            "image_type",
            "epoch",
            "learning_rate",
            "batch_size",
            "batch_index",
            "loss_batch",
            "avg_train_loss_till_current_batch",
            "avg_train_top1_acc_till_current_batch",
            "avg_val_loss_till_current_batch",
            "avg_val_top1_acc_till_current_batch"])

    for epoch in tqdm(range(1, epochs + 1)):
        top1_acc_train = AverageMeter()
        loss_avg_train = AverageMeter()
        top1_acc_val = AverageMeter()
        loss_avg_val = AverageMeter()

        model.train()
        mode = "train"
        
        
        loop_train = tqdm(
            enumerate(train_loader, 1),
            total=len(train_loader),
            desc="train",
            position=0,
            leave=True)
        for batch_idx, (images, labels) in loop_train:
            images = images.to(device)
            labels = labels.to(device)
            labels_pred = model(images)
            loss = criterion(labels_pred, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            acc1 = accuracy(labels_pred, labels)
            top1_acc_train.update(acc1[0], images.size(0))
            loss_avg_train.update(loss.item(), images.size(0))

            new_row = pd.DataFrame(
                {"model_name": model_name,
                 "mode": mode,
                 "image_type":"original",
                 "epoch": epoch,
                 "learning_rate":optimizer.param_groups[0]["lr"],
                 "batch_size": images.size(0),
                 "batch_index": batch_idx,
                 "loss_batch": loss.detach().item(),
                 "avg_train_loss_till_current_batch":loss_avg_train.avg,
                 "avg_train_top1_acc_till_current_batch":top1_acc_train.avg,
                 "avg_val_loss_till_current_batch":None,
                 "avg_val_top1_acc_till_current_batch":None},index=[0])

            
            report.loc[len(report)] = new_row.values[0]
            
            loop_train.set_description(f"Train - iteration : {epoch}")
            loop_train.set_postfix(
                loss_batch="{:.4f}".format(loss.detach().item()),
                avg_train_loss_till_current_batch="{:.4f}".format(loss_avg_train.avg),
                top1_accuracy_train="{:.4f}".format(top1_acc_train.avg),
                max_len=2,
                refresh=True,
            )
        if epoch % ckpt_save_freq == 0:
            save_model(
                file_path=ckpt_save_path,
                file_name=f"ckpt_{model_name}_epoch{epoch}.ckpt",
                model=model,
                optimizer=optimizer,
            )

        model.eval()
        mode = "val"
        with torch.no_grad():
            loop_val = tqdm(
                enumerate(val_loader, 1),
                total=len(val_loader),
                desc="val",
                position=0,
                leave=True,
            )
            for batch_idx, (images, labels) in loop_val:
                optimizer.zero_grad()
                images = images.to(device).float()
                labels = labels.to(device)
                labels_pred = model(images)
                loss = criterion(labels_pred, labels)
                acc1 = accuracy(labels_pred, labels)
                top1_acc_val.update(acc1[0], images.size(0))
                loss_avg_val.update(loss.item(), images.size(0))
                new_row = pd.DataFrame(
                    {"model_name": model_name,
                     "mode": mode,
                     "image_type":"original",
                     "epoch": epoch,
                     "learning_rate":optimizer.param_groups[0]["lr"],
                     "batch_size": images.size(0),
                     "batch_index": batch_idx,
                     "loss_batch": loss.detach().item(),
                     "avg_train_loss_till_current_batch":None,
                     "avg_train_top1_acc_till_current_batch":None,
                     "avg_val_loss_till_current_batch":loss_avg_val.avg,
                     "avg_val_top1_acc_till_current_batch":top1_acc_val.avg},index=[0],)
                
                report.loc[len(report)] = new_row.values[0]
                loop_val.set_description(f"val - iteration : {epoch}")
                loop_val.set_postfix(
                    loss_batch="{:.4f}".format(loss.detach().item()),
                    avg_val_loss_till_current_batch="{:.4f}".format(loss_avg_val.avg),
                    top1_accuracy_val="{:.4f}".format(top1_acc_val.avg),
                    refresh=True,
                )
        lr_scheduler.step()
    report.to_csv(f"{report_path}/{model_name}_report.csv")
    return model, optimizer, report


In [15]:
batch_size = 64
epochs = 10
learning_rate = 1e-3
gamma=0.5
step_size=10
ckpt_save_freq = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
custom_model = ModelCNN()

cifar_train_loader = torch.utils.data.DataLoader(train_dataset,
                                                batch_size=batch_size,
                                                shuffle=True)

cifar_val_loader = torch.utils.data.DataLoader(val_dataset,
                                                batch_size=batch_size,
                                                shuffle=False)

trainer = train(
    train_loader=cifar_train_loader,
    val_loader=cifar_val_loader,
    model = custom_model,
    model_name="Custom model",
    epochs=epochs,
    learning_rate=learning_rate,
    gamma = gamma,
    step_size = step_size,
    device=device,
    load_saved_model=False,
    ckpt_save_freq=ckpt_save_freq,
    ckpt_save_path="./",
    ckpt_path="./",
    report_path="./",
)

Train - iteration : 1:   4%|▎         | 29/782 [00:11<05:05,  2.46it/s, avg_train_loss_till_current_batch=2.3594, loss_batch=2.2965, max_len=2, top1_accuracy_train=10.3448]
  0%|          | 0/10 [00:11<?, ?it/s]


KeyboardInterrupt: 