In [1]:
import os
import cv2
import torch
import torchvision
import numpy as np
import pandas as pd
import torch.nn as nn
import albumentations as A
import torch.optim as optim
import matplotlib.pyplot as plt

from glob import glob
from tqdm import tqdm
from termcolor import colored
from matplotlib import patches
from collections import Counter
from torchvision import transforms
from torch.utils.data import DataLoader
from albumentations.pytorch import ToTensorV2

In [None]:
def convert_to_yolo_format(target, img_width, img_height, class_mapping):
    annotations = target["annotation"]["object"]
    
    real_width = int(target["annotation"]["size"]["width"])
    real_height = int(target["annotation"]["size"]["height"])
    
    if not isinstance(annotations, list):
        annotations = [annotations]
        
    boxes = []
    
    for anno in annotations:
        xmin = int(anno["bndbox"]["xmin"]) / real_width
        xmax = int(anno["bndbox"]["xmax"]) / real_height
        ymin = int(anno["bndbox"]["ymin"]) / real_width
        ymax = int(anno["bndbox"]["ymax"]) / real_height
        
        x_center = (xmin + xmax) / 2
        y_center = (ymin + ymax) / 2
        
        width = xmax - xmin
        height = ymax - ymin
        
        class_name = anno["name"]
        class_id = class_mapping[class_name] if class_name in class_mapping else 0
        
        boxes.append([class_id, x_center, y_center, width, height])
        
    return np.array(boxes)

In [None]:
class CustomVOCDataset(torchvision.datasets.VOCDetection):
    def init_config_yolo(self, class_mapping, S=7, B=2, C=20, custom_transforms=None):
        self.S = S
        self.B = B
        self.C = C
        self.class_mapping = class_mapping
        self.custom_transforms = custom_transforms
        
    def __getitem__(self, index):
        image, target = super(CustomVOCDataset, self).__getitem__(index)
        img_width, img_height = image.size
        
        boxes = convert_to_yolo_format(target, img_width, img_height, self.class_mapping)
        just_boxes = boxes[:, 1:]
        labels = boxes[:, 0]
        
        if self.custom_transforms:
            samples = {
                'image': np.array(image),
                'bboxes': just_boxes,
                'labels': labels
            }
            
            sample = self.custom_transforms(**sample)
            image = sample['image']
            boxes = sample['bboxes']
            labels = sample['labels']
            
        label_matrix = torch.zeros((self.S, self.S, self.C+5*self.B))
        boxes = torch.tensor(boxes, dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.float32)
        image = torch.as_tensor(image, dtype=torch.float32)
        
        for box, class_label in zip(boxes, labels):
            x, y, width, height = box.tolist()
            class_label = int(class_label)
            
            i, j = int(self.S*y), int(self.S*x)
            x_cell, y_Cell = self.S *x - j, self.S * y - i
            
            width_cell, height_cell = (
                width * self.S,
                height * self.S,
            )
            
            if label_matrix[i, j, 20] == 0:
                label_matrix[i, j, 20] = 1
                
                box_coordinates = torch.tensor([x_cell, y_Cell, width_cell, height_cell])
                
                label_matrix[i, j, 21:25] = box_coordinates
                
                label_matrix[i, j, class_label] = 1
                
        return image, label_matrix

In [None]:
def intersection_over_union(boxes_preds, boxes_labels, box_format="midpoint"):
    if box_format == "midpoint":
        box1_x1 = boxes_preds[..., 0:1] - boxes_preds[..., 2:3] / 2
        box1_y1 = boxes_preds[..., 1:2] - boxes_preds[..., 3:4] / 2
        box1_x2 = boxes_preds[..., 0:1] + boxes_preds[..., 2:3] / 2
        box1_y2 = boxes_preds[..., 1:2] + boxes_preds[..., 3:4] / 2
        
        box2_x1 = boxes_labels[..., 0:1] - boxes_labels[..., 2:3] / 2
        box2_y1 = boxes_labels[..., 1:2] - boxes_labels[..., 3:4] / 2
        box2_x2 = boxes_labels[..., 0:1] + boxes_labels[..., 2:3] / 2
        box2_y2 = boxes_labels[..., 1:2] + boxes_labels[..., 3:4] / 2
        
    if box_format == "corners":
        box1_x1 = boxes_preds[..., 0:1]
        box1_y1 = boxes_preds[..., 1:2]
        box1_x2 = boxes_preds[..., 2:3]
        box1_y2 = boxes_preds[..., 3:4]
        
        box2_x1 = boxes_labels[..., 0:1]
        box2_y1 = boxes_labels[..., 1:2]
        box2_x2 = boxes_labels[..., 2:3]
        box2_y2 = boxes_labels[..., 3:4]
        
    x1 = torch.max(box1_x1, box2_x1)
    y1 = torch.max(box1_y1, box2_y1)
    x2 = torch.max(box1_x2, box2_x2)
    y2 = torch.max(box1_y2, box2_y2)
    
    intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
    
    box1_area = abs((box1_x2 - box1_x1) * (box1_y2 - box1_y1))
    box2_area = abs((box2_x2 - box2_x1) * (box2_y2 - box2_y1))
    
    return intersection / (box1_area + box2_area - intersection + 1e-6)

In [None]:
def non_max_suppression(bboxes, iou_threshold, threshold, boxformat="corners"):
    assert type(bboxes) == list
    
    bboxes = [box for box in bboxes if box[1] > threshold]
    
    bboxes = sorted(bboxes, key=lambda x : x[1], reverse=True)
    
    bboxes_after_nms = []
    
    while bboxes:
        chosen_box = bboxes.pop(0)