In [11]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torch.optim as optim
from tqdm import tqdm
from torch.utils.data import DataLoader
import time
import sys
import os
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
from collections import Counter



In [12]:
# créer deux dossier images et labels dans home, uploader les images et labels dans ces dossiers 
# et uploader le csv 100examples

In [13]:
os.chdir('/home/')

In [14]:
#model.py
""" 
YOLOv1 architecture config: conv layers
Tuple structure: (kernel_size, filters, stride, padding). The padding is calculated by hand
"M" for maxpooling: stride 2x2, kernel 2x2
List structure: tuples and int, the number of repeats
"""

architecture_config = [
    (7, 64, 2, 3),
    "M",
    (3, 192, 1, 1),
    "M",
    (1, 128, 1, 0),
    (3, 256, 1, 1),
    (1, 256, 1, 0),
    (3, 512, 1, 1),
    "M",
    [(1, 256, 1, 0), (3, 512, 1, 1), 4],
    (1, 512, 1, 0),
    (3, 1024, 1, 1),
    "M",
    [(1, 512, 1, 0), (3, 1024, 1, 1), 2],
    (3, 1024, 1, 1),
    (3, 1024, 2, 1),
    (3, 1024, 1, 1),
    (3, 1024, 1, 1),
]

class CNNBlock(nn.Module): # a general CNN block class that we'll often use
    def __init__(self, in_channels, out_channels, **kwargs):
        super(CNNBlock, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, bias= False, **kwargs)
        self.batchnorm = nn.BatchNorm2d(out_channels) # Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift
        self.leakyrelu = nn.LeakyReLU(0.1)

    def forward(self, x):
        return self.leakyrelu(self.batchnorm(self.conv(x)))
    

class Yolov1(nn.Module):
    def __init__(self, in_channels=3, **kwargs): # in_channels : 3 for RGB
        super(Yolov1, self).__init__()
        self.architecture = architecture_config
        self.in_channels = in_channels
        self.darknet = self._create_conv_layers(self.architecture) # build from architecture
        self.fcs = self._create_fcs(**kwargs) # fcs for fully connected layers

    def forward(self, x):
        x = self.darknet(x)
        return self.fcs(torch.flatten(x, start_dim=1))
    
    def _create_conv_layers(self, architecture): # create the darknet architecture
        layers= []
        in_channels = self.in_channels

        for layer in architecture:
            if type(layer) == tuple:
                layers+= [CNNBlock(in_channels=in_channels, out_channels=layer[1], kernel_size= layer[0],
                                  stride = layer[2], padding = layer[3]) ]
                in_channels = layer[1]
            
            elif type(layer) == str:
                layers+= [nn.MaxPool2d(kernel_size=2, stride = 2)]
            
            elif type(layer) == list:
                conv1 = layer[0] # tuple
                conv2 = layer[1] # tuple
                num_repeats = layer[2]

                for _ in range(num_repeats):
                    layers+= [CNNBlock(in_channels=in_channels, out_channels=conv1[1], kernel_size= conv1[0],
                                  stride = conv1[2], padding = conv1[3]) ]
                    layers+= [CNNBlock(in_channels=conv1[1], out_channels=conv2[1], kernel_size= conv2[0],
                                  stride = conv2[2], padding = conv2[3]) ]
                    
                in_channels = conv2[1]

        return nn.Sequential(*layers) # unpack the list layers and convert it to nn.Sequential
    
    def _create_fcs(self, split_size, num_boxes, num_classes): # create the fully connected layers architecture
        S, B, C= split_size, num_boxes, num_classes

        return nn.Sequential(
            nn.Flatten(),
            nn.Linear(1024*S*S, 496), # 4096 in the original paper, but need to decrease that on a small computer beacause of small VRAM
            nn.Dropout(0.0),
            nn.LeakyReLU(0.1),
            nn.Linear(496, S*S*(C+B*5)) # reshape by (S, S, C+B*5)
        )

# quickly test of the Yolov1 model
"""   
def test(S=7, B=2, C=20):
    model = Yolov1(split_size = S, num_boxes = B, num_classes = C)
    x = torch.randn([2, 3, 448, 448])
    print(model(x).shape)

test()
"""

             




'   \ndef test(S=7, B=2, C=20):\n    model = Yolov1(split_size = S, num_boxes = B, num_classes = C)\n    x = torch.randn([2, 3, 448, 448])\n    print(model(x).shape)\n\ntest()\n'

In [15]:
#loss.py
class YoloLoss(nn.Module):
    def __init__(self, S=7, B=2, C=20) :
        super(YoloLoss, self).__init__()
        self.mse = nn.MSELoss(reduction="sum")
        self.S = S
        self.B = B
        self.C = C
        self.lambda_noobj = 0.5
        self.lambda_coord = 5

    def forward(self, predictions, target):
        predictions = predictions.reshape(-1, self.S, self.S, self.C + self.B*5) # we predict two box per cell, dim = (N, S, S, 30) N batch_size

        iou_b1 = intersection_over_union(predictions[..., 21:25], target[..., 21:25]) 
        iou_b2 = intersection_over_union(predictions[..., 26:30], target[..., 21:25])
        ious = torch.cat([iou_b1.unsqueeze(0), iou_b2.unsqueeze(0)], dim=0) # seen like a tab with two columns
        iou_maxes, bestbox = torch.max(ious, dim=0) # bestbox is the argmax, 0 (if the first box, corresponds to the first tab's col is the best) or 1 (else)
        exists_box = target[...,20].unsqueeze(3) # identity of obj i (1 if there is obj and 0 if not)

        
        #---------------------for box loss-------------------------
        box_predictions = exists_box* (bestbox*predictions[..., 26:30] + (1- bestbox)* predictions[..., 21:25])
        box_targets = exists_box * target[..., 21:25]

        box_predictions[...,2:4] = torch.sign(box_predictions[...,2:4]) * torch.sqrt(torch.abs(box_predictions[...,2:4] + 1e-6)) # take the sqrt of the predicted weight and high cause we predict the square (see the paper)
        box_targets[..., 2:4] = torch.sqrt(box_targets[..., 2:4])

        # box predictions and targets dim : (N, S, S, 4) ->(flatten, end_dim = -2)-> (N*S*S, 4)
        box_loss = self.mse(torch.flatten(box_predictions, end_dim = -2), torch.flatten(box_targets, end_dim = -2))

        
        #--------------------- for obj loss-------------------------------
        pred_box = (bestbox*predictions[..., 25:26] + (1- bestbox)*predictions[..., 20:21])

        # pred_box flattened dim : (N*S*S, 1)
        object_loss = self.mse(torch.flatten(exists_box*pred_box), torch.flatten(exists_box*target[..., 20:21]))

        
        #------------------------- for no obj loss---------------------------------------
        # predictions flattened dim : (N, S, S, 1) -> (N, S*S*1)
        no_object_loss = self.mse(
            torch.flatten((1 - exists_box) * predictions[..., 20:21], start_dim=1),
            torch.flatten((1 - exists_box) * target[..., 20:21], start_dim=1),
        )

        no_object_loss += self.mse(
            torch.flatten((1 - exists_box) * predictions[..., 25:26], start_dim=1),
            torch.flatten((1 - exists_box) * target[..., 20:21], start_dim=1)
        )

        
        #-------------------------for class loss----------------------------------------------
        # (N,S,S,20) -> (N*S*S, 20)
        class_loss = self.mse(
            torch.flatten(exists_box * predictions[..., :20], end_dim=-2,),
            torch.flatten(exists_box * target[..., :20], end_dim=-2,),
        )

        loss = (
            self.lambda_coord * box_loss  # first two rows in paper
            + object_loss  # third row in paper
            + self.lambda_noobj * no_object_loss  # forth row
            + class_loss  # fifth row
        )

        return loss
    
# quick test to make sure that everything work
"""
def test():
    predictions = torch.randn([10, 7, 7, 30])
    target = torch.randn([10, 7, 7, 30])
    Loss = YoloLoss()
    print(Loss(predictions, target))

test()
"""

'\ndef test():\n    predictions = torch.randn([10, 7, 7, 30])\n    target = torch.randn([10, 7, 7, 30])\n    Loss = YoloLoss()\n    print(Loss(predictions, target))\n\ntest()\n'

In [16]:
# dataset.py
class VOCDataset(torch.utils.data.Dataset):
    def __init__(self, csv_file, img_dir, label_dir, S=7, B=2, C=20, transform= None): # transform for data augmentation
        self.annotations = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.label_dir = label_dir
        self.transform = transform
        self.S = S
        self.B = B
        self.C = C

    def __len__(self):
        return len(self.annotations)
    
    def __getitem__(self, index):
        # ------------ processing labels ----------------
        label_path = os.path.join(self.label_dir, self.annotations.iloc[index, 1]) # the label filename is in the second column of the csv_file
        boxes = []
        with open(label_path) as f:
            for label in f.readlines():
                class_label, x, y, width, height = [float(x) if float(x)!=int(float(x)) else int(x)
                                                    for x in label.replace("\n", "").split()]
                
                boxes.append([class_label, x, y, width, height])
        
        # ------------- processing images ----------------
        img_path = os.path.join(self.img_dir, self.annotations.iloc[index, 0])
        image = Image.open(img_path)
        boxes = torch.tensor(boxes)

        if self.transform:
            image, boxes = self.transform(image, boxes)

        label_matrix = torch.zeros((self.S, self.S, self.C + 5*self.B))
        for box in boxes:
            class_label, x, y, width, height = box.tolist()
            class_label = int(class_label)
            i, j = int(self.S*y), int(self.S*x) # to know the cell of the box in images, x, y are normalized to be in [0,1] so multiply by the number of cells to know the corresponding cell of the box
            x_cell, y_cell = self.S*x - j, self.S*y - i 
            width_cell, height_cell = (width* self.S, height* self.S)
            
            
            if label_matrix[i, j, 20] == 0: # if there is no object in the calculated cell i, j
                label_matrix[i, j, 20] = 1
                box_coordinates = torch.tensor([x_cell, y_cell, width_cell, height_cell])
                #box_coordinates = torch.tensor([x_cell, y_cell, width, height])
                label_matrix[i, j, 21:25] = box_coordinates
                label_matrix[i, j, class_label] = 1 # probability of the presence of object in that cell is equal to 1

        return image, label_matrix

"""
csv_file= 'data/8examples.csv'
image_dir= 'data/images'
label_dir = 'data/labels'
index= 2

def test():
    dataloader = VOCDataset(csv_file, image_dir, label_dir)
    l = dataloader.__len__()
    image, label_matrix = dataloader.__getitem__(index)

    print('len csv_file :', l, 'label_matrix dim :', label_matrix.shape)
    print('sum label_matrix :', sum(label_matrix.flatten()))
    print(np.array(image).shape)

test()
"""




"\ncsv_file= 'data/8examples.csv'\nimage_dir= 'data/images'\nlabel_dir = 'data/labels'\nindex= 2\n\ndef test():\n    dataloader = VOCDataset(csv_file, image_dir, label_dir)\n    l = dataloader.__len__()\n    image, label_matrix = dataloader.__getitem__(index)\n\n    print('len csv_file :', l, 'label_matrix dim :', label_matrix.shape)\n    print('sum label_matrix :', sum(label_matrix.flatten()))\n    print(np.array(image).shape)\n\ntest()\n"

In [17]:
#utils.py
def intersection_over_union(boxes_preds, boxes_labels, box_format="midpoint"):
    """
    Calculates intersection over union
    Parameters:
        boxes_preds (tensor): Predictions of Bounding Boxes (BATCH_SIZE, 4)
        boxes_labels (tensor): Correct labels of Bounding Boxes (BATCH_SIZE, 4)
        box_format (str): midpoint/corners, if boxes (x,y,w,h) or (x1,y1,x2,y2)
    Returns:
        tensor: Intersection over union for all examples
    """

    if box_format == "midpoint":
        box1_x1 = boxes_preds[..., 0:1] - boxes_preds[..., 2:3] / 2
        box1_y1 = boxes_preds[..., 1:2] - boxes_preds[..., 3:4] / 2
        box1_x2 = boxes_preds[..., 0:1] + boxes_preds[..., 2:3] / 2
        box1_y2 = boxes_preds[..., 1:2] + boxes_preds[..., 3:4] / 2
        box2_x1 = boxes_labels[..., 0:1] - boxes_labels[..., 2:3] / 2
        box2_y1 = boxes_labels[..., 1:2] - boxes_labels[..., 3:4] / 2
        box2_x2 = boxes_labels[..., 0:1] + boxes_labels[..., 2:3] / 2
        box2_y2 = boxes_labels[..., 1:2] + boxes_labels[..., 3:4] / 2

    if box_format == "corners":
        box1_x1 = boxes_preds[..., 0:1]
        box1_y1 = boxes_preds[..., 1:2]
        box1_x2 = boxes_preds[..., 2:3]
        box1_y2 = boxes_preds[..., 3:4]  # (N, 1)
        box2_x1 = boxes_labels[..., 0:1]
        box2_y1 = boxes_labels[..., 1:2]
        box2_x2 = boxes_labels[..., 2:3]
        box2_y2 = boxes_labels[..., 3:4]

    x1 = torch.max(box1_x1, box2_x1)
    y1 = torch.max(box1_y1, box2_y1)
    x2 = torch.min(box1_x2, box2_x2)
    y2 = torch.min(box1_y2, box2_y2)

    # .clamp(0) is for the case when they do not intersect
    intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)

    box1_area = abs((box1_x2 - box1_x1) * (box1_y2 - box1_y1))
    box2_area = abs((box2_x2 - box2_x1) * (box2_y2 - box2_y1))

    return intersection / (box1_area + box2_area - intersection + 1e-6)

def non_max_suppression(bboxes, iou_threshold, threshold, box_format="corners"):
    """
    Does Non Max Suppression given bboxes
    Parameters:
        bboxes (list): list of lists containing all bboxes with each bboxes
        specified as [class_pred, prob_score, x1, y1, x2, y2]
        iou_threshold (float): threshold where predicted bboxes is correct
        threshold (float): threshold to remove predicted bboxes (independent of IoU) 
        box_format (str): "midpoint" or "corners" used to specify bboxes
    Returns:
        list: bboxes after performing NMS given a specific IoU threshold
    """

    assert type(bboxes) == list

    bboxes = [box for box in bboxes if box[1] > threshold] # select only boxes where an object is detected
    bboxes = sorted(bboxes, key=lambda x: x[1], reverse=True) 
    bboxes_after_nms = []

    while bboxes:
        chosen_box = bboxes.pop(0)

        bboxes = [
            box
            for box in bboxes
            if box[0] != chosen_box[0]
            or intersection_over_union(
                torch.tensor(chosen_box[2:]),
                torch.tensor(box[2:]),
                box_format=box_format,
            )
            < iou_threshold
        ]

        bboxes_after_nms.append(chosen_box)

    return bboxes_after_nms


def mean_average_precision(
    pred_boxes, true_boxes, iou_threshold=0.5, box_format="midpoint", num_classes=20
):
    """
    Calculates mean average precision 
    Parameters:
        pred_boxes (list): list of lists containing all bboxes with each bboxes
        specified as [train_idx, class_prediction, prob_score, x1, y1, x2, y2]
        true_boxes (list): Similar as pred_boxes except all the correct ones 
        iou_threshold (float): threshold where predicted bboxes is correct
        box_format (str): "midpoint" or "corners" used to specify bboxes
        num_classes (int): number of classes
    Returns:
        float: mAP value across all classes given a specific IoU threshold 
    """

    # list storing all AP for respective classes
    average_precisions = []

    # used for numerical stability later on
    epsilon = 1e-6

    for c in range(num_classes):
        detections = []
        ground_truths = []

        # Go through all predictions and targets,
        # and only add the ones that belong to the
        # current class c
        for detection in pred_boxes:
            if detection[1] == c:
                detections.append(detection)

        for true_box in true_boxes:
            if true_box[1] == c:
                ground_truths.append(true_box)

        # find the amount of bboxes for each training example
        # Counter here finds how many ground truth bboxes we get
        # for each training example, so let's say img 0 has 3,
        # img 1 has 5 then we will obtain a dictionary with:
        # amount_bboxes = {0:3, 1:5}
        amount_bboxes = Counter([gt[0] for gt in ground_truths])

        # We then go through each key, val in this dictionary
        # and convert to the following (w.r.t same example):
        # ammount_bboxes = {0:torch.tensor[0,0,0], 1:torch.tensor[0,0,0,0,0]}
        for key, val in amount_bboxes.items():
            amount_bboxes[key] = torch.zeros(val)

        # sort by box probabilities which is index 2
        detections.sort(key=lambda x: x[2], reverse=True)
        TP = torch.zeros((len(detections)))
        FP = torch.zeros((len(detections)))
        total_true_bboxes = len(ground_truths)
        
        # If none exists for this class then we can safely skip
        if total_true_bboxes == 0:
            continue

        for detection_idx, detection in enumerate(detections):
            # Only take out the ground_truths that have the same
            # training idx as detection
            ground_truth_img = [
                bbox for bbox in ground_truths if bbox[0] == detection[0]
            ]

            num_gts = len(ground_truth_img)
            best_iou = 0

            for idx, gt in enumerate(ground_truth_img):
                iou = intersection_over_union(
                    torch.tensor(detection[3:]),
                    torch.tensor(gt[3:]),
                    box_format=box_format,
                )

                if iou > best_iou:
                    best_iou = iou
                    best_gt_idx = idx

            if best_iou > iou_threshold:
                # only detect ground truth detection once
                if amount_bboxes[detection[0]][best_gt_idx] == 0:
                    # true positive and add this bounding box to seen
                    TP[detection_idx] = 1
                    amount_bboxes[detection[0]][best_gt_idx] = 1
                else:
                    FP[detection_idx] = 1

            # if IOU is lower then the detection is a false positive
            else:
                FP[detection_idx] = 1

        TP_cumsum = torch.cumsum(TP, dim=0)
        FP_cumsum = torch.cumsum(FP, dim=0)
        recalls = TP_cumsum / (total_true_bboxes + epsilon)
        precisions = torch.divide(TP_cumsum, (TP_cumsum + FP_cumsum + epsilon))
        precisions = torch.cat((torch.tensor([1]), precisions))
        recalls = torch.cat((torch.tensor([0]), recalls))
        # torch.trapz for numerical integration
        average_precisions.append(torch.trapz(precisions, recalls))

    return sum(average_precisions) / len(average_precisions)


def get_bboxes(
    loader,
    model,
    iou_threshold,
    threshold,
    pred_format="cells",
    box_format="midpoint",
    device="cuda",
):
    all_pred_boxes = []
    all_true_boxes = []

    # make sure model is in eval before get bboxes
    #model.eval()
    train_idx = 0

    for batch_idx, (x, labels) in enumerate(loader):
        x = x.to(device)
        labels = labels.to(device)

        with torch.no_grad():
            predictions = model(x)

        batch_size = x.shape[0]
        true_bboxes = cellboxes_to_boxes(labels)
        bboxes = cellboxes_to_boxes(predictions)

        for idx in range(batch_size):
            nms_boxes = non_max_suppression(
                bboxes[idx],
                iou_threshold=iou_threshold,
                threshold=threshold,
                box_format=box_format,
            )

            for nms_box in nms_boxes:
                all_pred_boxes.append([train_idx] + nms_box)

            for box in true_bboxes[idx]:
                # many will get converted to 0 pred
                if box[1] > threshold:
                    all_true_boxes.append([train_idx] + box)

            train_idx += 1

    #model.train()
    return all_pred_boxes, all_true_boxes



def convert_cellboxes(predictions, S=7):
    """
    Converts bounding boxes output from Yolo with
    an image split size of S into entire image ratios
    rather than relative to cell ratios. Tried to do this
    vectorized, but this resulted in quite difficult to read
    code... Use as a black box? Or implement a more intuitive,
    using 2 for loops iterating range(S) and convert them one
    by one, resulting in a slower but more readable implementation.
    """

    predictions = predictions.to("cpu")
    batch_size = predictions.shape[0]
    predictions = predictions.reshape(batch_size, 7, 7, 30)
    bboxes1 = predictions[..., 21:25]
    bboxes2 = predictions[..., 26:30]
    scores = torch.cat(
        (predictions[..., 20].unsqueeze(0), predictions[..., 25].unsqueeze(0)), dim=0
    )
    best_box = scores.argmax(0).unsqueeze(-1)
    best_boxes = bboxes1 * (1 - best_box) + best_box * bboxes2
    

    cell_indices = torch.arange(7).repeat(batch_size, 7, 1).unsqueeze(-1)
    x = 1 / S * (best_boxes[..., :1] + cell_indices)
    y = 1 / S * (best_boxes[..., 1:2] + cell_indices.permute(0, 2, 1, 3))
    w_y = 1 / S * best_boxes[..., 2:4]
    #w_y = best_boxes[..., 2:4]
    converted_bboxes = torch.cat((x, y, w_y), dim=-1)
    predicted_class = predictions[..., :20].argmax(-1).unsqueeze(-1)
    best_confidence = torch.max(predictions[..., 20], predictions[..., 25]).unsqueeze(
        -1
    )
    converted_preds = torch.cat(
        (predicted_class, best_confidence, converted_bboxes), dim=-1
    )

    return converted_preds


def cellboxes_to_boxes(out, S=7):
    converted_pred = convert_cellboxes(out).reshape(out.shape[0], S * S, -1)
    converted_pred[..., 0] = converted_pred[..., 0].long()
    all_bboxes = []

    for ex_idx in range(out.shape[0]):
        bboxes = []

        for bbox_idx in range(S * S):
            bboxes.append([x.item() for x in converted_pred[ex_idx, bbox_idx, :]])
        all_bboxes.append(bboxes)

    return all_bboxes

def save_checkpoint(state, filename="my_checkpoint.pth.tar"):
    print("=> Saving checkpoint...")
    torch.save(state, filename)
    print("Successfully saved !")


def load_checkpoint(checkpoint, model, optimizer):
    print("=> Loading checkpoint...")
    model.load_state_dict(checkpoint["state_dict"])
    optimizer.load_state_dict(checkpoint["optimizer"])
    print("Successfully loaded !")

In [18]:
seed = 4567
torch.manual_seed(seed)

# Hyperparameters
learning_rate = 2e-5
device = 'cuda' if torch.cuda.is_available() else 'cpu'
batch_size = 16
weight_decay = 0
epochs = 50
num_workers = 2
pin_memory = True
load_model = False
load_model_file = "Yolov1_100images_50epochs.path.tar"
img_dir = 'images'
label_dir = 'labels'


In [19]:
class Compose(object):
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, img, bboxes):
        for t in self.transforms:
            img, bboxes = t(img), bboxes

        return img, bboxes


transform = Compose([transforms.Resize((448, 448)), transforms.ToTensor()])

def train_fn(train_loader, model, optimizer, loss_fn):
    loop = tqdm(train_loader, leave = True)
    mean_loss = []
    model.train()

    for batch_idx, (x,y) in enumerate(loop):
        x, y = x.to(device), y.to(device)
        out = model(x)
        loss = loss_fn(out, y)
        ll = loss.item()
        mean_loss.append(ll)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # update the progress bar
        loop.set_postfix(loss = loss.item())


def main():
    model = Yolov1(split_size = 7, num_boxes = 2, num_classes=20).to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    loss_fn = YoloLoss()
    

    if load_model:
        load_checkpoint(torch.load(load_model_file), model, optimizer)

    train_dataset = VOCDataset('/home/100examples.csv', transform=transform, img_dir=img_dir, label_dir=label_dir)
    
    #test_dataset = VOCDataset('data/test.csv', transform=transform, img_dir=img_dir, label_dir=label_dir)
    
    train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, num_workers=num_workers, pin_memory=pin_memory,
                              shuffle=False, drop_last=True) # drop_last= True: if the last batch'size < batch_size, we'll ignore it
    
    """test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, num_workers=num_workers, pin_memory=pin_memory,
                              shuffle=False, drop_last=False) # drop_last : if the last batch'size < batch_size, we'll ignore it"""
    
    for epoch in range(epochs):    
        print('epoch n°', epoch+1)
        pred_boxes, target_boxes = get_bboxes(train_loader, model, iou_threshold=0.5, threshold=0.4, device=device)
        mean_avg_prec = mean_average_precision(pred_boxes, target_boxes, iou_threshold=0.5, box_format='midpoint')
        print(f'Train mAP: {mean_avg_prec}')
        
        train_fn(train_loader, model, optimizer, loss_fn)

        # to save the model
        if epoch==epochs-1 or mean_avg_prec>0.9: 
            checkpoint = {
                "state_dict": model.state_dict(),
                "optimizer": optimizer.state_dict(),
            }
            save_checkpoint(checkpoint, filename="Yolov1_100images_optim_mAP_0_90.path.tar")
            time.sleep(10)
            break



In [20]:
main()

epoch n° 1
Train mAP: 0.0


100%|██████████| 6/6 [04:18<00:00, 43.12s/it, loss=786]

epoch n° 2





Train mAP: 0.0


100%|██████████| 6/6 [04:12<00:00, 42.00s/it, loss=451]

epoch n° 3





Train mAP: 0.0


100%|██████████| 6/6 [04:12<00:00, 42.05s/it, loss=322]

epoch n° 4





Train mAP: 0.011484049260616302


100%|██████████| 6/6 [04:14<00:00, 42.34s/it, loss=259]

epoch n° 5





Train mAP: 0.006903525907546282


100%|██████████| 6/6 [04:13<00:00, 42.31s/it, loss=203]

epoch n° 6





Train mAP: 0.05285713076591492


100%|██████████| 6/6 [04:14<00:00, 42.41s/it, loss=161]


epoch n° 7
Train mAP: 0.13054294884204865


100%|██████████| 6/6 [04:12<00:00, 42.16s/it, loss=129]

epoch n° 8





Train mAP: 0.24769273400306702


100%|██████████| 6/6 [04:16<00:00, 42.73s/it, loss=105]

epoch n° 9





Train mAP: 0.4340856075286865


100%|██████████| 6/6 [04:13<00:00, 42.30s/it, loss=87.2]

epoch n° 10





Train mAP: 0.5911478400230408


100%|██████████| 6/6 [04:10<00:00, 41.80s/it, loss=73.6]

epoch n° 11





Train mAP: 0.7088414430618286


100%|██████████| 6/6 [04:13<00:00, 42.23s/it, loss=62.8]


epoch n° 12
Train mAP: 0.7558184862136841


100%|██████████| 6/6 [04:15<00:00, 42.51s/it, loss=54.1]

epoch n° 13





Train mAP: 0.8355050683021545


100%|██████████| 6/6 [04:15<00:00, 42.58s/it, loss=46.6]

epoch n° 14





Train mAP: 0.9047496914863586


100%|██████████| 6/6 [04:12<00:00, 42.10s/it, loss=40.5]


=> Saving checkpoint...
Successfully saved !
