In [1]:
import torch
import torch.nn as nn
import pandas as pd
import math

In [7]:
# Setting hyperparameters
if torch.backends.mps.is_available():
    device = 'mps'
elif torch.cuda.is_available():
    device = 'cuda'
else:
    device = 'cpu'

CHECKPOINT_PTH = 'checkpoint.pth'

In [5]:
def iou(box1, box2):
    """
    box1: tuple containing (x, y, w, h, p)
    box2: tuple containing (x, y, w, h, p)

    return: float representing iou
    """
    # Unpack the coordinates and dimensions
    x1_1, y1_1, w1, h1 = box1
    x1_2, y1_2, w2, h2 = box2

    # Calculate the bottom-right coordinates
    x2_1, y2_1 = x1_1 + w1, y1_1 + h1
    x2_2, y2_2 = x1_2 + w2, y1_2 + h2

    # Calculate the coordinates of the intersection rectangle
    x1_inter = max(x1_1, x1_2)
    y1_inter = max(y1_1, y1_2)
    x2_inter = min(x2_1, x2_2)
    y2_inter = min(y2_1, y2_2)

    # Compute the area of the intersection rectangle
    inter_width = max(0, x2_inter - x1_inter)
    inter_height = max(0, y2_inter - y1_inter)
    inter_area = inter_width * inter_height

    # Compute the area of both bounding boxes
    box1_area = w1 * h1
    box2_area = w2 * h2

    # Compute the area of the union
    union_area = box1_area + box2_area - inter_area

    # Compute the IoU
    iou = inter_area / union_area if union_area != 0 else 0

    return iou

def non_max_suppression(boxes, scores, iou_threshold):
    """
    Perform Non-Maximum Suppression (NMS) on the bounding boxes.

    Parameters:
    boxes: shape (batch_size, 5)
           Each row contains [x, y, w, h, score] coordinates of a bounding box.
    iou_threshold: float
                   IoU threshold for suppressing boxes.

    Returns:
    indices: list of int
             Indices of the bounding boxes to keep.
    """
    if len(boxes) == 0:
        return []

    # Convert boxes to numpy arrays (if not already)
    boxes = torch.tensor(boxes)
    scores = torch.tensor(scores)

    # Compute the bottom-right coordinates and area of each box
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    w = boxes[:, 2]
    h = boxes[:, 3]
    x2 = x1 + w
    y2 = y1 + h
    areas = w * h

    # Get the indices of the boxes sorted by scores in descending order
    order = boxes[:, 4].argsort()[::-1]

    keep = []
    while order.size > 0:
        # The index of the current box with the highest score
        i = order[0]
        keep.append(i)

        # Compute the IoU of the kept box with the remaining boxes
        xx1 = np.maximum(x1[i], x1[order[1:]])
        yy1 = np.maximum(y1[i], y1[order[1:]])
        xx2 = np.minimum(x2[i], x2[order[1:]])
        yy2 = np.minimum(y2[i], y2[order[1:]])

        w_inter = np.maximum(0, xx2 - xx1)
        h_inter = np.maximum(0, yy2 - yy1)
        inter_area = w_inter * h_inter

        iou = inter_area / (areas[i] + areas[order[1:]] - inter_area)

        # Keep boxes with IoU less than the threshold
        inds = np.where(iou <= iou_threshold)[0]
        order = order[inds + 1]

    return keep

        

In [6]:
def save_checkpoint(epoch, model, optimizer,loss):
    checkpoint = {
    'epoch': epoch,
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'loss': loss,
    }
    
    torch.save(checkpoint, CHECKPOINT_PTH)

def load_checkpoint():
    checkpoint = torch.load(CHECKPOINT_PTH)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    epoch = checkpoint['epoch']
    loss = checkpoint['loss']

In [12]:
class Yolo(nn.Module):
    def __init__(self, in_channels = 3, split_size = 7, n_bboxes = 2, n_classes = 20):
        super().__init__()
        self.conv_network = nn.Sequential(
            self._conv_layer(in_channels, 64, 7, stride = 2),
            self._get_max_pool(),
            self._conv_layer(64, 192, 3),
            self._get_max_pool(),
            self._conv_layer(192, 128, 1),
            self._conv_layer(128, 256, 3),
            self._conv_layer(256, 256, 1),
            self._conv_layer(256, 512, 3),
            self._get_max_pool(),
            self._get_four_conv_block_with_512_out(),
            self._conv_layer(512, 512, 1),
            self._conv_layer(512, 1024, 3),
            self._get_max_pool(),
            self._get_four_conv_block_with_1024_out(),
            self._conv_layer(1024, 1024, 3),
            self._conv_layer(1024, 1024, 3, stride = 2),
            self._conv_layer(1024, 1024, 3),
            self._conv_layer(1024, 1024, 3),
        )
        self.fc_network = nn.Sequential(
            nn.Flatten(),
            nn.Linear(1024*split_size*split_size, 4906),
            nn.LeakyReLU(0.1),
            nn.Dropout(p = 0.5),
            nn.Linear(4096, split_size * split_size * (n_bboxes * 5 + n_classes))
        )

    def _conv_layer(self,in_channels, out_channels, kernel_size, stride = 1):
        return nn.Sequential(
            nn.Conv2d(in_channels = in_channels, out_channels= out_channels, kernel_size = kernel_size, stride = stride),
            nn.LeakyReLU(negative_slope=0.1)
        )
    
    def _get_max_pool(self):
        return nn.MaxPool2d(kernel_size = 2)

    def _get_four_conv_block_with_512_out(self):
        return nn.Sequential(
            self._get_one_and_three_conv_with_512_out(), 
            self._get_one_and_three_conv_with_512_out(),
            self._get_one_and_three_conv_with_512_out(),
            self._get_one_and_three_conv_with_512_out()
        )

    def _get_one_and_three_conv_with_512_out(self):
        return nn.Sequential(
            nn.Conv2d(512, 256, 1), 
            nn.Conv2d(256, 512, 3)
        )

    def _get_four_conv_block_with_1024_out(self):
        return nn.Sequential(
            self._get_one_and_three_with_1024_out(),
            self._get_one_and_three_with_1024_out()
        )
        
    def _get_one_and_three_with_1024_out(self):
        return nn.Sequential(
            nn.Conv2d(1024, 512, 1), 
            nn.Conv2d(512, 1024, 3)
        )

    def forward(self, inputs):
        conv_output = self.conv_network(inputs)
        return self.fc_network(conv_output)


In [15]:
def lr_scheduler(epoch):
    if epoch < 10:
        return 1e-3 + (1e-2 - 1e-3) * (epoch / 10)  # Linearly increase from 1e-3 to 1e-2
    elif epoch < 85:
        return 1e-2  # Keep constant at 1e-2
    elif epoch < 115:
        return 1e-3  # Decrease to 1e-3
    else:
        return 1e-4  # Decrease to 1e-4


class CustomLRScheduler:
    def __init__(self, optimizer, lr_func):
        self.optimizer = optimizer
        self.lr_func = lr_func

    def step(self, epoch):
        lr = self.lr_func(epoch)
        for param_group in self.optimizer.param_groups:
            param_group['lr'] = lr

In [None]:
def yolo_sum_square_loss():
    