In [None]:
import torch
from collections import Counter
import numpy as np

In [None]:
def open_yolo_files(image_name, label_path, pred_path):
    """
    This function opens the associated .txt files with the manually marked ground truth labels
    and the predicted bounding boxes for a given image name.

    Parameters:
        image_name (str): filename of the image with path included.
        label_path (str): path to the directory with the manual annotation files.
        pred_path (str): path to the directory with the prediction files.

    Returns:
        list: all bounding boxes from the given ground truth .txt.
        list: all bounding boxes from the given predictions .txt.
    """
    try: 
        name = image_name.split("/")[-1].split(".")[0]
        label = label_path + name + ".txt"
        pred = pred_path + name + ".txt"
        label_boxes = np.genfromtxt(label, delimiter=' ')
        pred_boxes = np.genfromtxt(pred, delimiter=' ')
        # reshaping the np array is necessary in case a file with a single box is read
        label_boxes = boxes.reshape(boxes.size//5, 5) # class, x or x1, y or x2, w or y1, h or y2
        pred_boxes = boxes.reshape(boxes.size//6, 6) # class, confidence, x or x1, y or x2, w or y1, h or y2
        #print(boxes.shape)
        return ndarray.tolist(label_boxes), ndarray.tolist(pred_boxes)
    except Exception as e:
        #print(e)
        return None, None

In [None]:
def intersection_over_union(boxes_preds, boxes_labels, preds_format="midpoint", labels_format="midpoint"):
    """
    Video explanation of this function:
    https://youtu.be/XXYG5ZWtjj0

    This function calculates intersection over union (iou) given pred boxes
    and target boxes.

    Parameters:
        boxes_preds (numpy array): Predictions of Bounding Boxes (BATCH_SIZE, 4)
        boxes_labels (numpy array): Correct labels of Bounding Boxes (BATCH_SIZE, 5)
        preds_format (str): midpoint/corners, if boxes (x,y,w,h) or (x1,y1,x2,y2) for predictions
        labels_format (str): midpoint/corners, if boxes (x,y,w,h) or (x1,y1,x2,y2) for ground truths

    Returns:
        tensor: Intersection over union for all examples
    """
    if preds_format == "midpoint":
        # 0 class, 1 conf, 2 x, 3 y, 4 w, 5 h
        boxes_preds_x1 = boxes_preds[2] - boxes_preds[4]/2
        boxes_preds_y1 = boxes_preds[3] - boxes_preds[5]/2
        boxes_preds_x2 = boxes_preds[2] + boxes_preds[4]/2
        boxes_preds_y2 = boxes_preds[3] + boxes_preds[5]/2
    else:
        # 0 class, 1 conf, 2 x1, 3 x2, 4 y1, 5 y2
        boxes_preds_x1 = boxes_preds[2]
        boxes_preds_y1 = boxes_preds[4]
        boxes_preds_x2 = boxes_preds[3]
        boxes_preds_y2 = boxes_preds[5]

    if labels_format == "midpoint":
        # 0 class, 1 x, 2 y, 3 w, 4 h
        boxes_labels_x1 = boxes_labels[1] - boxes_labels[3]/2
        boxes_labels_y1 = boxes_labels[2] - boxes_labels[4]/2
        boxes_labels_x2 = boxes_labels[1] + boxes_labels[3]/2
        boxes_labels_y2 = boxes_labels[2] + boxes_labels[4]/2
    else:
        # 0 class, 1 x, 2 y, 3 w, 4 h
        boxes_labels_x1 = boxes_labels[1]
        boxes_labels_y1 = boxes_labels[2]
        boxes_labels_x2 = boxes_labels[3]
        boxes_labels_y2 = boxes_labels[4]

    x1 = max(boxes_preds_x1, boxes_labels_x1)
    y1 = max(boxes_preds_y1, boxes_labels_y1)
    x2 = min(boxes_preds_x2, boxes_labels_x2)
    y2 = min(boxes_preds_y2, boxes_labels_y2)

    intersection = (x2 - x1) * (y2 - y1)
    boxes_preds_area = abs((boxes_preds_x2 - boxes_preds_x1) * (boxes_preds_y2 - boxes_preds_y1))
    boxes_labels_area = abs((boxes_labels_x2 - boxes_labels_x1) * (boxes_labels_y2 - boxes_labels_y1))

    return intersection / (boxes_preds_area + boxes_labels_area - intersection + 1e-6)

def mean_average_precision(pred_boxes, true_boxes, iou_threshold=0.5, box_format="midpoint", num_classes=4):
    """
    Video explanation of this function:
    https://youtu.be/FppOzcDvaDI

    This function calculates mean average precision (mAP)

    Parameters:
        (idx is the image index)
        pred_boxes (list): list of lists containing all bboxes with each bboxes
        specified as [idx, class_prediction, prob_score, ...]. ALL bboxes of ALL images.
        true_boxes (list): Similar as pred_boxes except all the correct ones with
        each bboxes specified as [idx, true_class, ...]. ALL bboxes of ALL images.
        iou_threshold (float): threshold where predicted bboxes is correct
        box_format (str): "midpoint" or "corners" used to specify bboxes
        num_classes (int): number of classes

    Returns:
        float: mAP value across all classes given a specific IoU threshold
        list of floats: class_true_bboxes[0...num_classes - 1] with the number of true bboxes for that
        list of floats: class_precision[0...num_classes - 1] with the precision for each class
        list of floats: class_recall[0...num_classes - 1] with the recall for each class
        list of floats: class_TP[0...num_classes - 1] with the total of TP for each class
        list of floats: class_FP[0...num_classes - 1] with the total of FP for each class
    """

    # list storing all AP for respective classes
    average_precisions = []

    # used for numerical stability later on
    epsilon = 1e-6

    # this process is calculated for EACH respective class
    class_TP = [0]*num_classes
    class_FP = [0]*num_classes
    class_precision = [0]*num_classes
    class_recall = [0]*num_classes
    class_true_bboxes = [0]*num_classes

    for c in range(num_classes):
        detections = []
        ground_truths = []

        # Go through all predictions and targets,
        # and only add the ones that belong to the
        # current class c
        for detection in pred_boxes:
            if detection[1] == c:
                detections.append(detection)

        for true_box in true_boxes:
            if true_box[1] == c:
                ground_truths.append(true_box)

        # find the amount of bboxes for each training example
        # Counter here finds how many ground truth bboxes we get
        # for each training example, so let's say img 0 has 3,
        # img 1 has 5 then we will obtain a dictionary with:
        # amount_bboxes = {0:3, 1:5}
        amount_bboxes = Counter([gt[0] for gt in ground_truths])

        # We then go through each key, val in this dictionary
        # and convert to the following (w.r.t same example):
        # ammount_bboxes = {0:torch.tensor[0,0,0], 1:torch.tensor[0,0,0,0,0]}
        for key, val in amount_bboxes.items():
            amount_bboxes[key] = torch.zeros(val)

        # sort by box probabilities which is index 2 of detections
        detections.sort(key=lambda x: x[2], reverse=True)
        TP = torch.zeros((len(detections)))
        FP = torch.zeros((len(detections)))
        total_true_bboxes = len(ground_truths)

        # If none exists for this class then we can safely skip
        if total_true_bboxes == 0:
            continue

        for detection_idx, detection in enumerate(detections):
            # Only take out the ground_truths that have the same
            # training idx as detection
            ground_truth_img = [
                bbox for bbox in ground_truths if bbox[0] == detection[0]
            ]

            num_gts = len(ground_truth_img)
            best_iou = 0

            for idx, gt in enumerate(ground_truth_img):
                iou = intersection_over_union(
                    torch.tensor(detection[3:]),
                    torch.tensor(gt[3:]),
                    box_format=box_format,
                )

                if iou > best_iou:
                    best_iou = iou
                    best_gt_idx = idx

            if best_iou > iou_threshold:
                # only detect ground truth detection once
                if amount_bboxes[detection[0]][best_gt_idx] == 0:
                    # true positive and add this bounding box to seen
                    TP[detection_idx] = 1
                    amount_bboxes[detection[0]][best_gt_idx] = 1
                else:
                    FP[detection_idx] = 1

            # if IOU is lower then the detection is a false positive
            else:
                FP[detection_idx] = 1

        class_true_bboxes = total_true_bboxes
        TP_cumsum = torch.cumsum(TP, dim=0)
        class_TP[c] = TP_cumsum
        FP_cumsum = torch.cumsum(FP, dim=0)
        class_FP[c] = FP_cumsum
        recalls = TP_cumsum / (total_true_bboxes + epsilon)
        class_recall[c] = recalls
        precisions = TP_cumsum / (TP_cumsum + FP_cumsum + epsilon)
        class_precision[c] = precisions
        precisions = torch.cat((torch.tensor([1]), precisions))
        recalls = torch.cat((torch.tensor([0]), recalls))
        # torch.trapz for numerical integration
        average_precisions.append(torch.trapz(precisions, recalls))

    return sum(average_precisions) / len(average_precisions), class_true_bboxes, class_precision, class_recall, class_TP, class_FP