# Install all necessary modules here




In [1]:
'''!pip install torchvision
!pip install torchmetrics
!pip install torch
!pip install numpy'''

'!pip install torchvision\n!pip install torchmetrics\n!pip install torch\n!pip install numpy'

In [2]:
!pip install torchmetrics
from tqdm.notebook import tqdm
from torchmetrics.detection.mean_ap import MeanAveragePrecision
import torch
import os
import numpy as np
import platform
import pickle
from PIL import Image
import torchvision
from torchvision import transforms
from numpy import random
from torch.utils.data import DataLoader
import albumentations as A
from matplotlib import pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
from albumentations.pytorch import ToTensorV2
import torchmetrics

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com




# Milestone Four Preparation

Today, we will begin preparing for the fourth milestone. The objective of this milestone is to train an object classification model capable of classifying the discovered cells. To successfully complete this milestone, you should follow the steps outlined below:

1. Implement a Dataset Class:
   - Create a Dataset Class specifically designed to load images from the "crops" directory along with their respective labels.
   - Considering the significant class imbalance within the dataset, you will have to make use of approprriate sampling and augmentations.
   - Resize all crops to the same size.

2. Develop a Custom Classification Model:
   - Instead of utilizing an existing model from PyTorch, design your own custom model for this task.

3. Set up Training and Validation/Test Loops:
   - Write a training and validation/test loop for training your model.
   - Select an appropriate optimizer and loss function to facilitate effective training.
   - Continue training your model until convergence is observed, as indicated by the validation loss.
   - Once training is complete, plot the training and test loss to visualize the learning progress.

4. Evaluate Model Performance:
   - Calculate the f1-score and accuracy of your model.

In the end please upload your jupyter notebook to moodle.


# If you run the notebook in colab, you have to mount the google drive with the images. Proceed as follows:

- **First**: Open the following **[link](https://drive.google.com/drive/folders/1eCU34ZatAXQwzkzHMpV4i2gwlR6qvqpC?usp=share_link)** in a new tab.
- **Second**: Add a link to your google Drive.
Example: [Link](https://drive.google.com/file/d/1IcFGGIoktPkDj9-4j5IQ3evInn0c2aq-/view?usp=sharing)
- **Third**: Run the line of code below
- **Fourth**: Grant Google access to your Drive

In [9]:
from google.colab import drive

# path to the link you created
path_to_slides = '/content/gdrive/MyDrive/AgNORs/'
# mount the data
drive.mount('/content/gdrive')

ModuleNotFoundError: No module named 'google.colab'

In [10]:
path_to_slides = 'images'

#1. Implement a Dataset Class

In [11]:
class Dataset(torch.utils.data.Dataset):
    
    def __init__(self, annotations_frame, path_to_slides,  num_samples=1000,crop_size=(128,128), transformations= None):
        super().__init__()
        self.anno_frame = annotations_frame
        self.path_to_slides = path_to_slides
        self.crop_size = crop_size
        self.num_samples = num_samples
        self.transformations = transformations
        #self.df = pickle.load(open(self.annotations_frame, 'rb'))
        self.images = {}
        self._initialize()
        self.sample_cord_list = self._sample_cord_list()

        
    def _initialize(self):
        for filename in self.anno_frame['filename'].unique():
            img_path = f'{self.path_to_slides}/{filename}'
            img = Image.open(img_path).convert('RGB')
            self.images[filename] = img
    
    def __getitem__(self,idx):
        slide=self.sampled[idx][0]
        max_x,max_y,min_x,min_y = self.sampled[idx][1:5]
        init_img=self.images[slide].crop((min_x,min_y,max_x,max_y))
        label = self.sampled[idx][5]
        img = A.Compose([
            #A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
            A.Resize(30,30),
            ToTensorV2()
            ])(image=np.asarray(init_img,dtype=np.float32))
        return img['image'],label
        
    def _sample_cord_list(self):
        #stratified sampling and so we need to make a dict of available choices with weights(inverse to frequency)
        weights = 1/self.anno_frame['label'].value_counts(normalize=True)
        hi=dict(weights)
        self.anno_frame['weights'] = self.anno_frame.apply(lambda row:hi[row.label],
                                                          axis=1)
        selected=self.anno_frame.loc[self.anno_frame['label']>0]
        sampled=selected.sample(n=num_samples,weights = 'weights').to_numpy()
        self.sampled=sampled 
        

    def __len__(self):
        return self.num_samples

    def trigger_sampling(self):
        self.sample_cord_list = self._sample_cord_list()
    


#2. Develop a Custom Classification Model

In [12]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1= nn.Conv2d(3,32,kernel_size = (3,3),stride = 1, padding = 1)
        self.act1 = nn.ReLU()
        self.drop1 = nn.Dropout(0.3)
        
        self.conv2= nn.Conv2d(32,32,kernel_size = (3,3),stride = 1, padding = 1)
        self.act2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size = (2,2))

        self.conv3= nn.Conv2d(32,32,kernel_size = (3,3),stride = 1, padding = 1)
        self.act2 = nn.ReLU()
        self.pool3 = nn.AvgPool2d(kernel_size=(2,2))

        self.flat = nn.Flatten()

        self.fc3 = nn.Linear(1568,512)
        self.act3 = nn.ReLU()
        self.drop3 = nn.Dropout(0.3)

        self.fc4 = nn.Linear(512,12)
        #self.softmax = nn.Softmax(dim=0)


        
    def forward(self, x):
        x = self.act1(self.conv1(x))
        x = self.drop1(x)
        x = self.act2(self.conv2(x))
        x = self.pool2(x)
        x = self.act3(self.conv3(x))
        x = self.pool3(x)
        x = self.flat(x)
        x = self.act3(self.fc3(x))
        x = self.drop3(x)
        x = self.fc4(x)
       # x= self.softmax(x)
        return x


#3. Set up Training and Validation/Test Loops

In [13]:
def train(dataloader, model, optimizer,criterion):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    #print("The model will be running on", device, "device")

    running_loss = 0
    model.to(device)
    model.train()
    # switch to train mode
    if not model.training:
        model.train()
    size = len(dataloader.dataset)
    for epoch in range(5):
        for i, data in enumerate(dataloader, 0):
        # get the inputs; data is a list of [image, labels]
            inputs, labels = data

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            
            '''if i % 1000 == 0:    # print every 1000 mini-batches/samples
                print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
                running_loss = 0.0'''
        #print(running_loss/len(dataloader))
        running_loss=0.0

def validate(dataloader,model):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    running_loss = 0.0

    metric = torchmetrics.classification.MulticlassF1Score(num_classes=12)

  
    #preds = []
    #tag = []

    # switch to validation mode
    if model.training:
        model.eval()

    with torch.no_grad():
          # iterating over batches in valoader_loader
        for i, (inputs,targets) in enumerate(dataloader):
            predictions = model(inputs)
            metric.update(predictions,targets)
            #preds.append(predictions)
            #tag.append(targets)
    #print(preds)
    #print(f"\n{tag}")        
    #return preds, tag
    metrics_values = metric.compute()

    print(f"F1 SCORE = {metrics_values}")


In [15]:
#anno_frame = pickle.load(open('/content/gdrive/MyDrive/AgNORs/annotation_frame.p','rb'))
anno_frame = pickle.load(open('annotation_frame.p','rb'))
num_samples=1000
batch_size=25
num_workers=2
ds = Dataset(annotations_frame = anno_frame, path_to_slides =path_to_slides,num_samples=num_samples)
#img,label = ds[4]
#plt.imshow(img.permute(1, 2, 0))
train_size = int(0.8 * len(ds))
test_size = len(ds) - train_size
train_ds, val_ds = torch.utils.data.random_split(ds, [train_size, test_size])

#NOW TO CONVERT INTO DATALOADERS
train_dl = DataLoader(train_ds,
                      batch_size = batch_size,
                      num_workers = num_workers)
val_dl = DataLoader(val_ds,
                    batch_size = batch_size,
                    num_workers = num_workers)

del train_ds
del val_ds

#4. Evaluate Model Performance

In [None]:
from torch.optim import Adam
model =Net()
optimizer = Adam(model.parameters(), lr=0.0001)
loss=nn.CrossEntropyLoss()
epoch = 20
for i in range(epoch):
    train_dl.dataset.dataset.trigger_sampling()
    train(train_dl,model,optimizer,loss)
    validate(val_dl,model)
    