# Preparation of Milestone 2

Today we will start to prepare the second milestone. The goals of this milestone are

- Implement an image classification pipeline
    - Implement a Pytorch classification dataset
    - Implement at least two custom augmentations (do not just import one from Torchvision or albumentations)
    - Implement a training and validation loop
    - Optimize your model
    - Evaluate the performance of your model with the F1 score on the test set

# Pytorch Classification Dataset 

To train neural networks in pytorch with custom datasets, custom dataset classes are needed. Today we will work on a dataset for the term project. A general introduction to custom datasets can be found [here](https://pytorch.org/tutorials/beginner/basics/data_tutorial.html).



Your dataset must have several properties that are specific to the task:

1. Since you only have a small number of images, you will need to use image augmentation. Image augmentation is always applied on the fly. **Never apply image augmentation to images and save the augmented versions to disk for sampling!** Normally, you can use the image augmentation implemented in albumentations or torchvision. **For this project, you will need to implement two augmentations yourself**. I would suggest implementing an augmentation that applies a Gaussian blur to the image and a function that applies a color jitter to the image. Implement the augmentations in a way that they can be **composed** with other TorchVision transformations.

2. Apply minority class oversampling and majority class undersampling for better generalization. You can either implement an initialization function in your dataset where you first sample the coordinates for this epoch, or sample the coordinates on the fly in the __getitem__ method.

3. Your dataset class will inherit from torch.utils.data.Dataset. But you must override the **__len__** and **__getitem__** functions.

In [None]:
from torchvision.models import efficientnet_b0
import torch
import pandas as pd
import os 
from torchvision.io import read_image 
import torch
from torch.utils.data import DataLoader, SubsetRandomSampler, Dataset
import numpy as np
from PIL import Image, ImageFilter, ImageEnhance
from sklearn.metrics import f1_score
from torchvision.transforms import ToTensor, transforms, Resize
import matplotlib.pyplot as plt
import torchmetrics
from torchmetrics import Accuracy
from tqdm import tqdm
import torch.nn.functional as F



In [None]:

class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, annotations_file, img_dir, transform= None, target_transform= None): 
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir= img_dir
        self.transform= transform
        self.target_transform= target_transform

    def __len__(self): 
        return len(self.img_labels)

    def __getitem__(self, idx): 
        img_path= os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        img= Image.open(img_path).convert("RGB")
        label= self.img_labels.iloc[idx]
        filename, max_x, max_y, min_x, min_y, label = self.img_labels.iloc[idx]
        img = img.crop((min_x,min_y,max_x,max_y))
        
        if self.transform:
            img = self.transform(img)
            
        if self.target_transform:
            label = self.target_transform(label)
        
        return img, label



      
        '''red = img[:, :, 0]
        green = img[:, :, 1]
        blue = img[:, :, 2]

        new_img = np.zeros_like(img)
        new_img[:, :, 0] = blue
        new_img[:, :, 1] = red
        new_img[:, :, 2] = green
        jittered_img = np.clip(new_img * self.factor, 0, 255).astype(np.uint8)
    
        
        

##Augmentations(Blur, color jitter)
#class GaussianBlur:
 #   def __init__(self, radius=2):
  #      self.radius = radius
    #def __call__(self, img):
      #  return img.filter(ImageFilter.GaussianBlur(self.radius))


##ColourJitter
#class ColorJitter:
 #   def __init__(self, factor=5):
  #      self.factor = factor

   # def __call__(self, img):
    #    return self.apply_jitter(img)

    #def apply_jitter(self, img):
     #   red = img[:, :, 0]
      #  green = img[:, :, 1]
       # blue = img[:, :, 2]

        #new_img = np.zeros_like(img)
        #new_img[:, :, 0] = blue
        #new_img[:, :, 1] = red
       # new_img[:, :, 2] = green
        #jittered_img = np.clip(new_img * self.factor, 0, 255).astype(np.uint8)
        #return jittered_img


class ImageRotator:
    def __init__(self):
        pass

    def rotate(self, img):
        rotated_img = img.rotate(90)
        rotated_img.show()'''


# Training/Validation/Test Split

After you have successfully created your dataset, you need to define a training, validation, and test split of the data. Split the images at the image level! Splits must not overlap!

1. Split the data.
2. Initialize a dataset for each split and pass it to a Pytorch dataloader.

In [None]:
transform = transforms.Compose([Resize((224, 224)), ToTensor()])  

dataset= CustomDataset(annotations_file='annotation_frame.csv', img_dir='Dataset',transform=transform, target_transform=None)
dataset.transform= transform 
'''labels_map = [0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11]
dataset.transform = False

figure = plt.figure(figsize=(10, 10))
cols, rows = 5, 5
for i in range(1, cols * rows + 1):
    sample_idx = torch.randint(dataset.__len__(), size=(1,)).item()
    img, label = dataset.__getitem__(sample_idx)
    figure.add_subplot(rows, cols, i)
    plt.title(labels_map[label])
    plt.axis('off')
    plt.imshow(img)
    
plt.show()'''

In [None]:
train_size = int(0.7 * dataset.__len__())
eval_size = int(0.15 * dataset.__len__())
test_size = dataset.__len__() - train_size - eval_size

train_dataset, eval_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, eval_size, test_size])

train_loader = DataLoader(dataset=train_dataset, batch_size=32, shuffle=True)
eval_loader = DataLoader(dataset=eval_dataset, batch_size=32)
test_loader = DataLoader(dataset=test_dataset, batch_size=32)



# Initliazie a classification model from pytorch.

For classification we will use a pre-trained model from pytorch. I would suggest an efficientnet_b0. Since we do not have too much training data and to mitigate overfitting, we will use ImageNet weights and only train the last layer. We will also freeze most of the network since we do not have much data and to save training time.

1. Load the model from torchvision (see[https://pytorch.org/vision/stable/models.html]).
2. Load the respective weights.
3. Freeze all but the last layer.
4. If you have a gpu available, bring your model to the gpu.

In [None]:
# Step 1: Load the pre-trained model from torchvision
model = efficientnet_b0(weights='DEFAULT')

# Step 2: Freeze all but the last layer
for param in model.parameters():
    param.requires_grad = False

# Unfreeze the last layer for training
for param in model.classifier.parameters():
    param.requires_grad = True

# Step 3: Move the model to the GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Print the model architecture
print(model)



# Optimization Loop

a) The next step is the actual model optimization. But before we implement the training and validation loops, we need to initialize a loss function and an optimizer. We can use Adam as the optimizer and Cross Entropy Loss as the loss function from pytorch. We also need to set a learning rate for the model optimization.

1. Initialize a Loss Function
2. Initializing an Optimizer and Setting a Learning Rate


In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_fn = torch.nn.CrossEntropyLoss()


b) Now, we will implement an optimization loop. First we implement the training phase. Implement a function (**train_one_epoch**) which performs the follwing steps.:

1. Define a variable running_loss in which you can store the change in loss during training. Since we will later chain the function with a validation function, bring the model to training mode if needed.
2. Iterate over your train dataloader, in each epoch you must:
    1. Transfer the tensors with the images and labels to the GPU or CPU, depending on the device on which your model is located.
    2. Delete the old gradients in the optimizer.
    3. Perform the actual forward pass, i.e. pass the images to the model and calculate the predictions (model(imgs))
    4. Calculate the loss
    5. Backpropagate the loss (loss.backward())
    6. Perform the optimization step (optimizer.stepp())
    7. Add the loss of the current batch to your running_loss
    8. Print the loss every x batches, also return the average loss of the epoch

**Tip:** use **[tqdm.notebook.tqdm](https://tqdm.github.io/docs/notebook/)** to visualize the progress during the epoch.

In [None]:
def train_one_epoch(model, train_loader, loss_fn, device):
    model = model.to(device)
    model.train()
    train_loss = 0.0
    device = ('cpu')

    for batch_idx, (img, labels) in enumerate(tqdm(train_loader, desc="Training iteration")):
        img, labels = img.to(device), labels.to(device)
        # model.to(device)
        optimizer.zero_grad()

        outputs = model(img)

        loss = loss_fn(outputs, labels)

        loss.backward()
        optimizer.step()

        train_loss += loss.item()* img.size(0)
        
        if batch_idx % 10 == 9: #  Print the loss every 10 batches
            avg_loss = running_loss / 10
            print(f'Batch {batch_idx + 1}/{len(train_loader)}, Loss: {avg_loss:.4f}')
            running_loss = 0.0
        train_loss /= len(train_loader.dataset)
        return train_loss
      
    
    

c) We also need a validation loop, which is called after a training loop to determine the training progress on the validation data. Implement this in a function (**validate_one_epoch**). This loop looks very similar to the training loop, but we can skip the backpropagation here. Also, use a metric from Torchmetrics to measure the performance of your models. The validation function should also return the loss or the metric so you can use it to select the best model during training.

1. Turn off gradient storage. Put your model in evaluation mode. Since there is no backpropagation in the validation, we do not need it and can save a lot of memory.
2. As with the training loop, iterate over the validation data loader and perform a forward pass in each epoch.
3. Compute a loss in each epoch, but not backpropagated
4. Store and output the loss and return the average loss of the epoch
5. Update your metric with the predictions and labels of each batch
6. Calculate your metric after the last batch.


In [None]:
'''def validate_one_epoch(model, eval_loader, loss_fn, metric, device):
    model.eval()
    metric.reset()
     
    
    
    val_loss= 0.0
    with torch.no_grad():
        for batch_idx, (img, labels) in enumerate(tqdm(eval_loader, desc="Training iteration")):
            img, labels = img.to(device), labels.to(device)
    

            outputs = model(img)
    
            loss = loss_fn(outputs, labels)
    
            val_loss += loss.item() * img.size(0)
            pred_labels = torch.argmax(outputs, dim=1)
            metric.update(pred_labels, labels)
    

    
    
    val_loss /= len(eval_loader.dataset) 
    metric_result = metric.compute()
    
    return val_loss, metric_result

num_classes = 12
metric = Accuracy(task= 'MULTICLASS' , num_classes=num_classes)
val_loss, val_metric = validate_one_epoch(model, eval_loader, loss_fn, device, metric)'''

import torch
import torch.nn.functional as F
from torchmetrics import Accuracy

def validate_one_epoch(model, eval_loader, loss_fn, device, metric):
    model.eval()  
    metric.reset()  
    total_loss = 0.0
    
    with torch.no_grad():  
        for img, labels in eval_loader:
            img, labels = img.to(device), labels.to(device)
            
            outputs = model(img)
            
            loss = loss_fn(outputs, labels)
            total_loss += loss.item()* img.size(0)
            
        
            pred_labels = torch.argmax(outputs, dim=1)
            metric.update(pred_labels, labels)
    
    avg_loss = total_loss / len(eval_loader.dataset)
    
    metric_value = metric.compute()
    
    print(f"Validation Loss: {avg_loss}, Metric Value: {metric_value}")
    return avg_loss, metric_value

num_classes = 12
metric = Accuracy(task='multiclass', num_classes=num_classes)

val_loss, val_metric = validate_one_epoch(model, eval_loader, loss_fn, device, metric)



d) Combine training and validation in an optimization loop. This loop is run **n_epochs** times. In each iteration, first call your training function and then your validation function. In each epoch, check to see if your model has improved, and if so, save the model. Do not save a new checkpoint in each epoch, as this will consume too much memory. Save the losses of the training and validation functions over the epochs to plot them at the end of your training. If you do not have a GPU available, you do not need to train your model to convergence. Just train a few steps.

In [None]:
def optimization_loop(model, train_loader, eval_loader, loss_fn, optimizer, device, n_epochs, model_save_path=None):
    train_losses = []
    val_losses = []
    best_val_loss = float('inf')
    
    for epoch in range(n_epochs):
        print(f"Epoch {epoch + 1}/{n_epochs}")

       
        train_loss = train_one_epoch(model, train_loader, loss_fn, device)
        train_losses.append(train_loss)
        print(f"Train Loss: {train_loss:.4f}")

        
        val_loss, val_metric = validate_one_epoch(model, eval_loader, loss_fn, device, metric)
        val_losses.append(val_loss)
        print(f"Validation Loss: {val_loss:.4f}, Metric Value: {val_metric}")

        if val_loss < best_val_loss:
            best_val_loss = val_loss
        
            if model_save_path:
                torch.save(model.state_dict(), model_save_path)
                print("Model saved.")
  
    plt.plot(range(1, n_epochs + 1), train_losses, label='Training Loss')
    plt.plot(range(1, n_epochs + 1), val_losses, label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training and Validation Losses')
    plt.legend()
    plt.show()

n_epochs = 10 
model_save_path = 'best_model.pth'  # Path to save the best model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
optimization_loop(model, train_loader, eval_loader, loss_fn, optimizer, device, n_epochs, model_save_path)

# Model testing

Once the optimization is complete, you can test your model on the test data. This means that you apply your model to the images in your dataset. To do this, iterate over all instances in your test data set.

1. Initialize the best performing model.
2. Initialize the F1 score metric from TorchVision.
3. Iterate over all instances in the test dataset and update your metric after each batch.
4. Compute the final metric.

In [1]:
model_path = 'best_model.pth'  

model.load_state_dict(torch.load('best_model.pth'))
model.eval()

progress_bar = tqdm(test_loader, desc='Testing')

metric = F1Score(task='multiclass', num_classes=12).to(device)

with torch.no_grad():
    for b, (inputs,labels) in enumerate(progress_bar):
        inputs, labels = inputs.to(device), labels.to(device)

        outputs = model(inputs)

        metric.update(outputs, labels)

metric_value = metric.compute()
print(f"F1 score: {metric_value}")

NameError: name 'model' is not defined