In [None]:
import os

import matplotlib.pyplot as plt

import torch 
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter

import numpy as np
import tensorboard

from torchvision import transforms
from torchsummary import summary
from tqdm.notebook import tqdm

# for model evaluation
import numpy as np

from models.vgg import VGG
        
from utils import load_cifar10, visualize_cifar10, plot_confusion_matrix


seed = 420
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True

In [None]:
# hyperparameters for training
batch_size = 32
num_epochs = 30
lr = 1e-3
weight_decay = 15e-5
grad_clip = 0.001

data_mean, data_std = [0.4914, 0.4822, 0.4465], [0.247, 0.243, 0.261]

device = "cuda" if torch.cuda.is_available() else "cpu"
num_workers = os.cpu_count()

# CIFAR-10 Classes
class_names = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

# Training and visualization of the dataset

In [None]:
train_loader, val_loader = load_cifar10(root_dir='data', batch_size=batch_size, num_workers=num_workers, augment_data=False)

In [None]:
visualize_cifar10(train_loader, class_names)

# 1.1 ResNet Image Classifier


## a) Basic Block Implementation



![BasicBlock.png](BasicBlock.png "Basic Block")

<mark>*TODO*: Initialize the layers of the basic block.</mark> 
 
The basic block consists of two parts:
    
1. **Residual layer:**                             
    * The residual layer comprises **2 convolutions** using **3x3 kernel** and **padding of 1**. 
    * The first conv layer uses **'stride'** as stride and the second one a **stride of 1**.   
    * After each convolution we need a **batch norm layer**.
    * After the first convolution we have **ReLU activation**.
    * After the second convolution we have **no activation**.         
2. **Residual connection:**                         
    * The residual connection is a **skip connection** that combines the **input of the block** with the **output of the last conv layer**.           
    * If the **input and output channels are not the same** or the **stride is not one**, we need to use a **1x1 conv layer** with a **stride of 'stride'** and a **batch norm layer** to downsample the input (this is already implemented for you below).   
    
* (You can set **bias=False** for all conv layers.) 

<mark>*TODO*: Implement the **forward model** that passes the input through the complete network.</mark> 
Don't forget to **activate the output of the residual connection**.


In [None]:
import torch
import torch.nn as nn

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_channels, out_channels, stride=1):
        super(BasicBlock, self).__init__()

        # TODO: Initialize the layers of the basic block.

        # Residual connection
        self.residual = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.residual = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        #################################################
        # TODO: Implement the forward pass of the basic #
        # ResNet block. Use 'cbr' order, where 'c' is   #
        # conv, 'b' is batchnorm, and 'r' is relu.      #
        # Activate the output AFTER adding the residual.#
        #################################################
        
        #################################################
        return x

# b) ResNet Implementation



![BasicBlock.png](ResNet.png "Basic Block")

<mark>*TODO*: Initialize the layers of the ResNet model.</mark> 
1. **Input layer:**                                  
    * Consists of a **conv-batchnorm-relu** block.  
    * The conv layer uses **3x3 kernel**, **padding of 1** and a **stride of 1**.        
    * Not for all implementations a 3x3 kernel is used, e.g., the original implementation uses a 7x7 kernel followed by max pooling.  
        If you want to, you can also try out different kernel sizes and combinations.          
2. **ResNet layers:**                               
    * Make **4 layers** with the number of blocks as specified by **'num_blocks'**.  
      For this, you can use the function `_make_layer`, which makes a block of the class specified in the parameter `block`. For this, we later pass the parameter `block=BasicBlock` to use the Basic block you already implemented.  
    * After the **4 layers**, use **Adaptive average pooling** with a **1x1 kernel**.                
    * Initialize a **linear layer** to map from channels to **num_classes**.  

<mark>*TODO*: Implement the **forward model** that passes the input through the complete network.</mark> 
 
 Be aware that the linear layer requires a 1D input, while the convolutional layer returns a 2D output.


In [None]:
class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_channels = 64

        # TODO: Initialize the layers of the ResNet model 


    def _make_layer(self, block, out_channels, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for s in strides:
            layers.append(block(self.in_channels, out_channels, stride=s))
            self.in_channels = out_channels * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        #################################################
        # TODO: Implement the forward pass of ResNet.   #
        # Use 'cbr' order, where 'c' is conv, 'b' is    #
        # batchnorm, and 'r' is relu.                   #
        # Resize the output after the average pooling   #
        # layer to 1D tensor before applying the linear #
        # layer.                                        #
        #################################################
        
        #################################################
        return x

    def save_checkpoint(self, epoch, accuracy, ckptpath="checkpoint"):
        checkpoint = {
            'epoch': epoch,
            'model_state_dict': self.state_dict(),
            'accuracy': accuracy,
        }
        ckptpath = os.path.join(ckptpath, "checkpoint_{}_{:.4f}.pth".format(epoch, accuracy))
        # Save the dictionary to file
        torch.save(checkpoint, ckptpath)

    def load_checkpoint(self, ckptpath, map_location=None):
        # Load the saved file
        checkpoint = torch.load(ckptpath, map_location=map_location)
        
        # Restore model and optimizer state
        self.load_state_dict(checkpoint['model_state_dict'])

# c) Training and Validation

Now that you have implemented the model we can train it for image classification on the provided data set.
Implement the training and validation loop at the specified positions. You can use the provided comments as guidance.

<mark>*TODO*: Implement the training and validation loops.</mark>

In [None]:
def train(model, dataloader, optimizer, scheduler, criterion, device):
    model.train()  # Set the model to training mode
    total_loss = 0
    correct = 0
    total = 0

    for batch_idx, (inputs, targets) in (pbar := tqdm(enumerate(dataloader), total=len(dataloader), ncols=800, unit='batches', leave=False)):
        
        inputs, targets = inputs.to(device), targets.to(device)
        
        #################################################
        # TODO: Implement the training loop.            #
        # 1) Forward pass:                              #
        #      - Calculate the model prediction using   #
        #        the input data                         #
        #      - Calculate the loss using the predic-   #
        #        tions and true targets                 #
        # 2) Backward pass:                             #
        #      - Ensure zero gradients                  #
        #      - Perform backward pass to calculate gra-#
        #        dients                                 #
        #      - Update weights                         #
        # 3) Track loss and number of correctly classi- #
        #    fied samples                               #
        #      - Add loss to total_loss                 #
        #      - Calculate predicted class labels       #
        #      - Add targets.size to total and the num- #
        #        of correctly classified samples to     #
        #        correct                                #
        #################################################

        #################################################
       
        pbar.set_description(f"Iteration: {batch_idx}")
        pbar.set_postfix_str(f"Train Loss: {loss.item():.4f} | Train Acc: {100* correct / total:.2f} | LR: {optimizer.param_groups[0]['lr']}")        
    
    avg_loss = total_loss / len(dataloader)
    accuracy = 100. * correct / total

    
    if scheduler:
        scheduler.step(avg_loss)
    
    return avg_loss, accuracy


def validate(model, dataloader, criterion, device):
    model.eval()  # Set the model to evaluation mode
    total_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():  # Disable gradient calculation
        for batch_idx, (inputs, targets) in (pbar := tqdm(enumerate(dataloader), total=len(dataloader), ncols=800, unit='batches', leave=False)):
            
            inputs, targets = inputs.to(device), targets.to(device)
            #################################################
            # TODO: Implement the validation loop.          #
            # 1) Forward pass:                              #
            #      - Calculate the model prediction using   #
            #        the input data                         #
            #      - Calculate the loss using the predic-   #
            #        tions and true targets                 #
            # 2) Track loss and number of correctly classi- #
            #    fied samples                               #
            #      - Add loss to total_loss                 #
            #      - Calculate predicted class labels       #
            #      - Add targets.size to total and the num- #
            #        of correctly classified samples to     #
            #        correct                                #
            #################################################
        
            #################################################
            pbar.set_description(f"Iteration: {batch_idx}")
            pbar.set_postfix_str(f"Valid Loss: {loss.item():.4f} | Valid Acc: {100* correct / total:.2f}")
            
    avg_loss = total_loss / len(dataloader)
    accuracy = 100. * correct / total
    return avg_loss, accuracy

In [None]:
# load tensorboard to visualize the training process
%load_ext tensorboard
%tensorboard --logdir runs

<mark>*TODO*: Train the preimlemented VGG model.</mark>

For this you do not need to implement anything. Just run the code below.

In [None]:
# Setup model architecture
vgg = VGG(3, 10)
# Loss function, optimizer, and scheduler
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(vgg.parameters(),lr=lr,weight_decay=weight_decay)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, mode='min', verbose=False, factor=0.3, patience=3, threshold=0.09)

summary(vgg, (3, 32, 32), batch_size=batch_size, device=device)

In [None]:
# Writer will output to ./runs/ directory by default
writer = SummaryWriter()
ckptpath = os.path.join(writer.log_dir, "checkpoints")

# Metric to watch
best_valid_acc = 0.0

# Training loop
for epoch in (pbar := tqdm(range(num_epochs), unit='epoch')):
    pbar.set_description(f"Epoch {epoch + 1}")

    train_loss, train_acc = train(vgg, train_loader, optimizer, scheduler, loss_function, device)
    valid_loss, valid_acc = validate(vgg, val_loader, loss_function, device)

    writer.add_scalars('losses', {'train': train_loss, 'valid': valid_loss}, global_step=epoch)
    writer.add_scalars('accuracies', {'train': train_acc, 'valid': valid_acc}, global_step=epoch)
    pbar.set_postfix_str(f"Valid Loss: {valid_loss:.4f} | Valid Acc: {valid_acc:.2f}")


    if valid_acc > best_valid_acc:
        best_valid_acc = valid_acc
        vgg.save_checkpoint(epoch, accuracy=valid_acc, ckptpath=ckptpath)

<mark>*TODO*: Train your ResNet model.</mark>

For this you do not need to implement anything. Just run the code below. However, you are encouraged to look through the code.

In [None]:
# Setup model architecture
resnet = ResNet(BasicBlock, num_blocks=[1, 2, 2, 1], num_classes=10)
# Loss function, optimizer, and scheduler
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(resnet.parameters(), lr=lr, weight_decay=weight_decay)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, mode='min', verbose=True, factor=0.3, patience=3, threshold=0.09)

summary(resnet, (3, 32, 32), batch_size=batch_size, device=device)

In [None]:
# Writer will output to ./runs/ directory by default
writer = SummaryWriter()
ckptpath = os.path.join(writer.log_dir, "checkpoints")

# Metric to watch
best_valid_acc = 0.0

# Training loop
for epoch in (pbar := tqdm(range(num_epochs), unit='epoch')):
    pbar.set_description(f"Epoch {epoch + 1}")

    train_loss, train_acc = train(resnet, train_loader, optimizer, scheduler, loss_function, device)
    valid_loss, valid_acc = validate(resnet, val_loader, loss_function, device)

    writer.add_scalars('losses', {'train': train_loss, 'valid': valid_loss}, global_step=epoch)
    writer.add_scalars('accuracies', {'train': train_acc, 'valid': valid_acc}, global_step=epoch)
    pbar.set_postfix_str(f"Valid Loss: {valid_loss:.4f} | Valid Acc: {valid_acc:.2f}")


    if valid_acc > best_valid_acc:
        best_valid_acc = valid_acc
        resnet.save_checkpoint(epoch, accuracy=valid_acc, ckptpath=ckptpath)


# Evaluation 
For evaluating both models we load the models from a checkpoint and load the parameters into the model. For comparing both models'
performance confusion matrices are used. 
## Confusion Matrix
A confusion matrix is a powerful tool in classification analysis that helps visualize the performance of a classifier. It is a tabular way to illustrate the discrepancies between predicted and actual classes. This tool is particularly useful in supervised learning where the outcomes are already known.A confusion matrix is a powerful tool in classification analysis that helps visualize the performance of a classifier. It is a tabular way to illustrate the discrepancies between predicted and actual classes. This tool is particularly useful in supervised learning where the outcomes are already known.

### Structure of a Confusion Matrix
For a binary classification problem, the confusion matrix is a $2\times2$ table consisting of four different components:
- True Positives (TP): The number of instances that were predicted as positive and are actually positive.
- False Positives (FP): The number of instances that were predicted as positive but are actually negative.
- True Negatives (TN): The number of instances that were predicted as negative and are actually negative.
- False Negatives (FN): The number of instances that were predicted as negative but are actually positive.

|                  | Predicted Positive   | Predicted Negative   |
|        ---       |        ---           |         ---          |
| Actual Positive  | True Positives (TP)  | False Negatives (FN) |
| Actual Negative  | False Positives (FP) | True Negatives (TN)  |

### Structure of a Multi-Class Confusion Matrix
For a classification problem involving multiple classes (e.g., $10$ classes), the confusion matrix expands from a $2\times2$ structure to an $n \times n$ structure, where $n$ is the number of classes. This matrix provides a detailed picture of the classifier's performance across all these classes.
Each row of the matrix corresponds to the actual class, while each column corresponds to the predicted class. The diagonal elements of the matrix (from top left to bottom right) represent the number of correct predictions for each class (i.e., the true positives for each class).


|   | class $1$   | class $2$  | $\dots$  | class $10$   |
|---|---|---|---|---|
| class $1$   | TP $1$  | FP $1, 2$  | $\dots$   | FP $1, 10$   |
| class $2$   | FP $2, 1$  | TP $2$  | $\dots$   | FP $2, 10$  |
| $\dots$     | $\dots$  |$\dots$   | $\dots$   |$\dots$   |
|  class $10$ | FP $10, 1$  |   | $\dots$   | TP $10$  |

<mark>*TODO*: change the model paths to your model paths</mark>

In [None]:
vgg = VGG(3, 10)

#################################################
# TODO: Change the path to the path of your     #
# trained VGG model.                            #
#################################################
cktp = './runs/Apr24_13-59-53_IFLPC205/checkpoints/checkpoint_47_76.89.pth'
vgg.load_checkpoint(cktp, map_location=device)

plot_confusion_matrix(vgg, val_loader, class_names, device)

In [None]:
resnet = ResNet(BasicBlock, num_blocks=[1, 2, 2, 1], num_classes=10)

#################################################
# TODO: Change the path to the path of your     #
# trained ResNet model.                         #
#################################################
cktp = "./runs/Apr24_18-06-53_IFLPC205/checkpoints/checkpoint_51_86.77.pth"
resnet.load_checkpoint(cktp, map_location=device)

plot_confusion_matrix(resnet, val_loader, class_names, device)