# Install all necessary modules here




In [4]:
!pip install torchvision
!pip install torchmetrics
!pip install torch
!pip install numpy 

Collecting torchmetrics
  Downloading torchmetrics-1.3.2-py3-none-any.whl.metadata (19 kB)
Collecting lightning-utilities>=0.8.0 (from torchmetrics)
  Downloading lightning_utilities-0.11.2-py3-none-any.whl.metadata (4.7 kB)
Downloading torchmetrics-1.3.2-py3-none-any.whl (841 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m841.5/841.5 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m:00:01[0m0:01[0m
[?25hDownloading lightning_utilities-0.11.2-py3-none-any.whl (26 kB)
Installing collected packages: lightning-utilities, torchmetrics
Successfully installed lightning-utilities-0.11.2 torchmetrics-1.3.2


# Preparation of milestone three

Today we will start preparing the third milestone. The third milestone is to train an object detector to recognize cells. To successfully complete the milestone, you will have to complete the following sub-tasks:
- Initialize a pytorch object detector. For this you can use the code provided. If you are not having a gpu available, please **freeze** tha backbone of your network as otherwise one forward pass will take to muchtime. With a frozenweights, you won't get as much performance as otherwise.
- You will have to write a [training and validation/test](https://pytorch.org/tutorials/beginner/basics/optimization_tutorial.html) loop to train your detector. Make sure you measure the convergence of the training by monitoring a detection metric like the [mAP](https://torchmetrics.readthedocs.io/en/stable/detection/mean_average_precision.html). Also, you will have to find a way to select the best model during training based on some metric.
- You will have to train your model until convergence using the  class you created for the last milestone. Also you will have to pass your dataset to a dataloader to be able to use multithreading as well as automatic batching.
- At the end, you will have to save the **state_dict** of your trained object detector, to be able to reuse it later.

Please use a jupyter notebook for coding your training/testing pipeline. In the end, you will have to submit that jupiter notebook at moodle.

# If you run the notebook in colab, you have to mount the google drive with the images. Proceed as follows:

- **First**: Open the following **[link](https://drive.google.com/drive/folders/18P74V8kli6qDZtGBLN-tPrJFu3O2NPEK?usp=sharing)** in a new tab.
- **Second**: Add a link to your google Drive.
Example: [Link](https://drive.google.com/file/d/1IcFGGIoktPkDj9-4j5IQ3evInn0c2aq-/view?usp=sharing)
- **Third**: Run the line of code below
- **Fourth**: Grant Google access to your Drive

In [4]:
from google.colab import drive

# path to the link you created
path_to_slides = '/content/gdrive/MyDrive/AgNORs/'
# mount the data
drive.mount('/content/gdrive')

Mounted at /content/gdrive


# 1. Initializing the model

In this project, we will use a pre-trained RetinaNet model as the backbone for our object detection task. The model and weights can be easily loaded from the torchvision library. It is important to note that the anchor boxes used by the model may need to be adjusted to suit the specific task.

The behavior of the RetinaNet model changes depending on whether it is in training or evaluation mode. During training, the model expects both an image and a dictionary of targets as input. It returns a dictionary containing the losses and predictions. During validation, the model only expects images as input and returns the predictions without calculating any losses.

In [1]:
import torch
import torchvision
from torchvision.models.detection import RetinaNet
from torchvision.models.detection.anchor_utils import AnchorGenerator
from torchvision.models import MobileNet_V2_Weights

# load a pre-trained model for classification and return
# only the features
backbone = torchvision.models.mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT).features

# RetinaNet needs to know the number of
# output channels in a backbone. For mobilenet_v2, it's 1280,
# so we need to add it here
backbone.out_channels = 1280
# let's make the network generate 5 x 3 anchors per spatial
# location, with 5 different sizes and 3 different aspect
# ratios. We have a Tuple[Tuple[int]] because each feature
# map could potentially have different sizes and
# aspect ratios
anchor_generator = AnchorGenerator(
     sizes=((32, 64, 128, 256, 512),),
     aspect_ratios=((0.5, 1.0, 2.0),)
)
# put the pieces together inside a RetinaNet model
model = RetinaNet(backbone,
                  num_classes=2,
                  anchor_generator=anchor_generator)

######## uncomment these lines to freeze you network ############
for p in model.backbone.parameters():
    p.requires_grad = False



Downloading: "https://download.pytorch.org/models/mobilenet_v2-7ebf99e0.pth" to /Users/alexandervaptsarov/.cache/torch/hub/checkpoints/mobilenet_v2-7ebf99e0.pth
100%|███████████████████████████████████████████| 13.6M/13.6M [00:18<00:00, 772kB/s]


In [2]:
import torch
import numpy as np
import platform
import torch
from torch.utils.data import DataLoader
import pickle
from PIL import Image
from torchvision import transforms
from numpy import random
class Dataset(torch.utils.data.Dataset):

    def __init__(self, annotations_frame,
                 path_to_slides,
                 crop_size = (128,128),
                 pseudo_epoch_length:int = 1000,
                 transformations = None):
        
        super().__init__()
        
        if platform.system() == 'Linux':
            self.separator = '/'
        else:
            self.separator = '\\'


        with open("/content/gdrive/MyDrive/annotation_frame.p", 'rb') as f:
              annotations_frame = pickle.load(f)
        self.anno_frame=annotations_frame
        
        self.path_to_slides = path_to_slides
        self.crop_size = crop_size
        self.pseudo_epoch_length = pseudo_epoch_length
        
        # list which holds annotations of all slides in slide_names in the format
        # slide_name, annotation, label, min_x, max_x, min_y, max_y
        
        self.slide_dict, self.annotations_list = self._initialize()
        self.sample_cord_list = self._sample_cord_list()

        # set up transformations
        self.transformations = transformations
        self.transform_to_tensor = transforms.Compose([transforms.ToTensor()])


    def _initialize(self):
        # open all images and store them in self.slide_dict with their name as key value
        slide_dict = {}
        annotations_list = []
        for slide in self.anno_frame.filename.unique():
            # open slide
            slide_dict[slide] =  Image.open(self.path_to_slides + self.separator + slide).convert('RGB')
            im_obj = Image.open(self.path_to_slides + self.separator + slide).convert('RGB')
            slide_dict[slide] = im_obj
            # setting up a list with all bounding boxes
            for idx,annotations in self.anno_frame[self.anno_frame.filename == slide][['max_x','max_y','min_x','min_y','label']].iterrows():
                annotations_list.append([slide, annotations['label'], annotations['min_x'], annotations['min_y'], annotations['max_x'], annotations['max_y']])

        return slide_dict, annotations_list


    def __getitem__(self,index):
        slide, x_cord, y_cord = self.sample_cord_list[index]
        x_cord = np.int64(x_cord)
        y_cord = np.int64(y_cord)
        # load image
        img = self.slide_dict[slide].crop((x_cord,y_cord,x_cord + self.crop_size[0],y_cord + self.crop_size[1]))
        # transform image
        #img = self.transformations(img)
        
        # load boxes for the image
        labels_boxes = self._get_boxes_and_label(slide,x_cord,y_cord)
        
        labels_boxes = [[i[1] - x_cord, i[2] - y_cord, i[3] - x_cord, i[4] - y_cord] + [i[0]] for i in labels_boxes]
        
        
        # applay transformations
        if self.transformations != None:
            if len(labels_boxes) > 0:
                transformed = self.transformations(image = np.array(img), bboxes = labels_boxes)
                boxes = torch.tensor([line[:-1] for line in transformed['bboxes']], dtype = torch.float32)
                labels = torch.ones(boxes.shape[0], dtype = torch.int64)
                img = self.transform_to_tensor(transformed['image'])
                
            # check if there is no labeld instance on the image
            if len(labels_boxes) == 0:
                labels = torch.tensor([0], dtype = torch.int64)
                boxes = torch.zeros((0,4),dtype = torch.float32)
                img = self.transform_to_tensor(img)

        else:
            if len(labels_boxes) == 0:
                labels = torch.tensor([0], dtype = torch.int64)
                boxes = torch.zeros((0,4),dtype = torch.float32)
                img = self.transform_to_tensor(img)
            else:
                # now, you need to change the originale box cordinates to the cordinates of the image
                boxes = torch.tensor([line[:-1] for line in labels_boxes],dtype=torch.float32)
                labels = torch.ones(boxes.shape[0], dtype = torch.int64)
                img = self.transform_to_tensor(img)

        target = {
            'boxes': boxes,
            'labels':labels
        }

        return img, target
        

    def _sample_cord_list(self):
        # select slides from which to sample an image
        slide_names = np.array(list(self.slide_dict.keys()))
        slide_indice = random.choice(np.arange(len(slide_names)), size = self.pseudo_epoch_length, replace = True)
        slides = slide_names[slide_indice]
        # select coordinates from which to load images
        # only works if all images have the same size
        width,height = self.slide_dict[slides[0]].size
        cordinates = random.randint(low = (0,0), high=(width - self.crop_size[0], height - self.crop_size[1]), size = (self.pseudo_epoch_length,2))
        return np.concatenate((slides.reshape(-1,1),cordinates), axis = -1)

    def __len__(self):
        return self.pseudo_epoch_length

    def _get_boxes_and_label(self,slide,x_cord,y_cord):
        return [line[1::] for line in self.annotations_list if line[0] == slide and line[2] > x_cord and line [3] > y_cord and line[4] < x_cord + self.crop_size[0] and line[5] < y_cord + self.crop_size[1]]

    def collate_fn(self, batch):
        """
        Since each image may have a different number of objects, we need a collate function (to be passed to the DataLoader).
        This describes how to combine these tensors of different sizes. We use lists.
        Note: this need not be defined in this Class, can be standalone.
        :param batch: an iterable of N sets from __iter__()
        :return: a tensor of images, lists of varying-size tensors of bounding boxes, labels, and difficulties
        """

        images = []
        targets = []

        for img, target in batch:
            images.append(img)
            targets.append(target)

        images = torch.stack(images, dim=0)

        return images, targets

    def trigger_sampling(self):
        
        self.sample_cord_list = self._sample_cord_list()





# 2. Setting up an optimzer, a detection metric and the train and validation dataloaders

To train the object detector, it is necessary to select an appropriate optimizer. Additionally, the torchmetrics class needs to be instantiated before it can be used for evaluation or tracking metrics during training.
Additionally, initialize a training and validation dataloader your dataset. For more information on how to set up your dataloaders have a look [here](https://pytorch.org/tutorials/beginner/basics/data_tutorial.html)


In [5]:
# add your code
import torch
import torch.optim as optim
import torchmetrics

dataset = Dataset("/content/gdrive/MyDrive/annotation_frame.p", '/content/gdrive/MyDrive/AgNORs/', crop_size=(128, 128), pseudo_epoch_length=1000, transformations=None)


train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])


batch_size = 16


train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=dataset.collate_fn)
val_loader = DataLoader(val_dataset, batch_size=batch_size, collate_fn=dataset.collate_fn)


optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)


metrics = torchmetrics.MetricCollection([
    torchmetrics.Accuracy(num_classes=2,task='binary'),
    torchmetrics.Precision(num_classes=2,task='binary'),
    torchmetrics.Recall(num_classes=2,task='binary'),
])


FileNotFoundError: [Errno 2] No such file or directory: '/content/gdrive/MyDrive/annotation_frame.p'

#3. Train and validation loop

Please write two functions, one for training and one for evaluating your object detector. Use these functions to train the detector for a few epochs. During training, track both the training losses and validation metrics to monitor the model's performance. Save the best detector as observerd by the validation metric.

In [None]:
import torch
import torch.optim as optim
import torchmetrics
import torchvision

def train(model, train_loader, optimizer, metrics, device):
    model.train()
    total_loss = 0.0
    
    for images, targets in train_loader:
        images = tuple(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        
        optimizer.zero_grad()
        
        # Forward pass
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        total_loss += losses.item()
        
        # Backward pass and optimization
        losses.backward()
        optimizer.step()
        
    avg_loss = total_loss / len(train_loader)
    return avg_loss


def evaluate(model, data_loader, metrics, device):
    model.eval()
    metric_results = {}
    
    with torch.no_grad():
        for images, targets in data_loader:
            images = tuple(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
            
            # Forward pass
            outputs = model(images)
            
            # Compute metrics
            metrics(outputs, targets)
        
    for metric_name, metric in metrics.items():
        metric_results[metric_name] = metric.compute()
        metric.reset()
    
    return metric_results


# Training loop
best_metric = 0.0
best_model = None
num_epochs = 3

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

metrics = torchmetrics.MetricCollection([
    torchmetrics.Accuracy(num_classes=2, task='binary'),
    torchmetrics.Precision(num_classes=2, task='binary'),
    torchmetrics.Recall(num_classes=2, task='binary'),
])

for epoch in range(num_epochs):
    train_loss = train(model, train_loader, optimizer, metrics, device)
    val_metrics = evaluate(model, val_loader, metrics, device)
    
    print(f"Epoch {epoch+1}/{num_epochs}: Train Loss: {train_loss:.4f}")
    print("Validation Metrics:")
    for metric_name, metric_value in val_metrics.items():
        print(f"{metric_name}: {metric_value:.4f}")
    
    # Save the best model based on validation metric
    if val_metrics['mAP'] > best_metric:
        best_metric = val_metrics['mAP']
        best_model = model.state_dict().copy()

# Load the best model
model.load_state_dict(best_model)

#4. Show some results and save your detector

In [None]:
import matplotlib.pyplot as plt
import torchvision.transforms.functional as F

def visualize_results(model, dataloader, num_images=5):
    model.eval()
    with torch.no_grad():
        for images, targets in dataloader:
            images = images.to(device)
            
           
            outputs = model(images)
            
         
            pred_boxes = outputs[0]['boxes'].cpu()
            pred_labels = outputs[0]['labels'].cpu()
            
          
            true_boxes = targets[0]['boxes'].cpu()
            true_labels = targets[0]['labels'].cpu()
            
         
            for i in range(num_images):
                img = F.to_pil_image(images[i].cpu())
                plt.imshow(img)
             
                for box, label in zip(pred_boxes[i], pred_labels[i]):
                    plt.rectangle((box[0], box[1]), (box[2], box[3]), edgecolor='r')
                    plt.text(box[0], box[1], f'Label: {label.item()}', color='r')
               
                for box, label in zip(true_boxes[i], true_labels[i]):
                    plt.rectangle((box[0], box[1]), (box[2], box[3]), edgecolor='g')
                    plt.text(box[0], box[1], f'Label: {label.item()}', color='g')
                    
                plt.axis('off')
                plt.show()
                
            break  


visualize_results(model, val_loader)


In [13]:
from torchvision.models import efficientnet_b0
from torchviz import make_dot

model = efficientnet_b0(weights='IMAGENET1K_V1')

# list_models()
# get_model_weights(efficientnet_b0)
# output = 
for name, param in model.named_parameters():
    param.requires_grad = False
    # print(name[-1])
    # print(name, '-->', param.shape)
# model.state_dict()
# param_list = [*model.named_parameters()]
# param_list[-2]
# make_dot(yhat, params=dict(model.named_parameters()))
model.classifier.requires_grad = True # --> torch.Size([1000, 1280])
# classifier.1.bias #  --> torch.Size([1000])