In [380]:
import torch
from torchvision.ops import nms, box_iou


In [454]:
def non_maximum_suppression(pred, iou_threshold=0.5, C=20):
    """
    pred: list of processed yolo output predictions (num boxes, 7)
    (max_confidence, x1, y1, x2, y2, max_probability, class_idx)
    """
    nms_boxes = []
    
    # perform nms on each class independently
    for i in range(C):
        # get all predicted boxes belonging in this class
        boxes = pred[pred[:, -1] == i]
        
        if boxes.size(0) == 0:
            continue
        
        xyxy = boxes[:, 1:5]
        scores = boxes[:, 0]
        nms_indices = nms(xyxy, scores, iou_threshold=iou_threshold)
        nms_bboxes = boxes[nms_indices]
        nms_boxes.append(nms_bboxes)
        
    nms_boxes = torch.cat(nms_boxes)
    return nms_boxes

In [626]:
def process_yolo_output(output, 
                        confidence_threshold=0.5,
                        iou_threshold=0.5, S=7, B=2, C=20):
    """
    pred: (N, S, S, 5B+C)
    Process output of YOLO
    
    Returns: N x Boxes x tensor(confidence, x1, y1, x2, y2, probability, class)
    """
    N = output.size(0)
    assert output.shape == torch.Size([N, S, S, 5 * B + C])
    
    processed_output = []
    output = output.view(-1, S * S, 5 * B + C)
    
    for i in range(N):
        bboxes = []
        for cell_idx in range(S * S):
            yolo = output[i][cell_idx] # (5*B+C)
            boxes = yolo[:-C]
            probabilities = yolo[-C:]
            
            # select responsible box (box with the highest confidence)
            boxes = boxes.view(B, -1)            
            confidences = boxes[:, 0]
            max_confidence, max_confidence_idx = confidences.max(0) 
            responsible_box = boxes[max_confidence_idx]
            
            if max_confidence < confidence_threshold:
                continue
            
            # convert xywh to xyxy
            xywh = responsible_box[1:]
            gx = cell_idx % S
            gy = cell_idx // S
            
            x = xywh[0]
            y = xywh[1]
            w = xywh[2]
            h = xywh[3]
            
            x_c = (gx + x) / S
            y_c = (gy + y) / S

            
            x1 = x_c - w / 2
            y1 = y_c - h / 2
            x2 = x_c + w / 2
            y2 = y_c + h / 2
            
            # get class idx + probability
            max_probability, class_idx = probabilities.max(0)
            
            box = torch.stack([max_confidence, x1, y1, x2, y2, max_probability, class_idx])
            box[1:5] = box[1:5].clamp(0, 1) # make sure xys are between 0 and 1
            bboxes.append(box)
        
        bboxes = torch.stack(bboxes)
        
        # perform NMS on the bboxes
        nms_bboxes = non_maximum_suppression(bboxes, iou_threshold=iou_threshold, C=C)    
        processed_output.append(nms_bboxes)
            
    return processed_output

In [876]:
def get_bboxes(output, 
               confidence_threshold=0.5,
               iou_threshold=0.5,
               S=7, B=2, C=20):
    """
    output: SxSx(5B+C)
    confidence_threshold: select boxes above this threshold
    iou_threshold: nms is applied at this threshold
    """
    bboxes = []
    output = output.view(S*S, -1)
    
    # for each cell, get bounding boxes above confidence threshold
    for cell_idx in range(S * S):
        # get upper left corner of cell [0, S)
        gx = cell_idx % S
        gy = cell_idx // S
        
        cell = output[cell_idx] # (5B + C)
        
        # get probabilities + localization tensor
        probabilities = cell[-C:]
        boxes = cell[:-C]
        
        # get class idx and probability -> select max probability
        class_probability, class_idx = probabilities.max(0)
        
        # get bounding boxes above threshold
        boxes = boxes.view(B, -1)
        boxes = boxes[boxes[:, 0] >= confidence_threshold]
        
        # no bounding boxes have predicted confidence above threshold
        if len(boxes) == 0:
            continue
        
        # get properties of bounding box
        x_c = boxes[:, 1]
        y_c = boxes[:, 2]
        w = boxes[:, 3]
        h = boxes[:, 4]
        
        # convert to yolo format
        x = (gx + x_c) / S
        y = (gy + y_c) / S
        
        x1 = x - w / 2
        y1 = y - h / 2
        x2 = x + w / 2
        y2 = y + h / 2
        
        # construct bounding box tensor (confidence, x1, y1, x2, y2, class probability, class)
        x1y1x2y2 = torch.stack((x1, y1, x2, y2), dim=1).clamp(0, 1)
        confidences = boxes[:, 0:1]
        num_bboxes = confidences.size(0)
        class_probability = class_probability.repeat(num_bboxes, 1)
        class_idx = class_idx.repeat(num_bboxes, 1)
        bbox = torch.cat((confidences, x1y1x2y2, class_probability, class_idx), dim=1)
        bboxes.append(bbox)
        
    # concat bboxes into one tensor
    bboxes = torch.cat(bboxes, dim=0)
    
    # perform nms on bounding boxes to filter down number of bounding boxes
    bboxes = non_maximum_suppression(bboxes, iou_threshold=iou_threshold, C=C)
    
    return bboxes
        
        
        
        
        
        
        
        

In [879]:
pred = torch.tensor([
    [[[0.9, 0.5, 0.5, .1, .1, 0.8, 0.5, 0.5, 0.1, 0.1, 0.3, 0.7], [0.8, 0.5, 0.5, 0.1, 0.1, 0, 0.5, 0.5, 0.1, 0.1, 0.1, 0.9]],
     [[0, 0.5, 0.5, 0.1, 0.1, 0, 0.5, 0.5, 0.1, 0.1, 0.3, 0.7], [0, 0, 0, 1, 1, 0, 0.5, 0.5, 0.1, 0.1, 0.3, 0.7]]]
])

pred.squeeze_(0)
get_bboxes(pred, S=2, B=2, C=2)

0 0
1 0
0 1
1 1
tensor([[0.9000, 0.2000, 0.2000, 0.3000, 0.3000, 0.7000, 1.0000],
        [0.8000, 0.7000, 0.2000, 0.8000, 0.3000, 0.9000, 1.0000]])


In [870]:
# pred = torch.tensor([
#     [[[1, 0.5, 0.5, 0.1, 0.1, 0.8, 0.5, 0.5, 0.1, 0.1, 0.3, 0.7], [1, 0.5, 0.5, 0.1, 0.1, 0.8, 0.5, 0.5, 0.1, 0.1, 0.3, 0.7]],
#      [[1, 0.5, 0.5, 0.1, 0.1, 0.8, 0.5, 0.5, 0.1, 0.1, 0.3, 0.7], [1, 0.5, 0.5, 0.1, 0.1, 0.8, 0.5, 0.5, 0.1, 0.1, 0.3, 0.7]]]
# ])

pred = torch.tensor([
    [[[0.9, 0.5, 0.5, 1, 1, 0.8, 0.5, 0.5, 0.1, 0.1, 0.3, 0.7], [0.8, 0.5, 0.5, 0.1, 0.1, 0, 0.5, 0.5, 0.1, 0.1, 0.1, 0.9]],
     [[0, 0.5, 0.5, 0.1, 0.1, 0, 0.5, 0.5, 0.1, 0.1, 0.3, 0.7], [0, 0, 0, 1, 1, 0, 0.5, 0.5, 0.1, 0.1, 0.3, 0.7]]]
])

In [871]:
pred.shape

torch.Size([1, 2, 2, 12])

In [834]:
pred = process_yolo_output(pred, S=2, B=2, C=2)
pred

[tensor([[0.9000, 0.0000, 0.0000, 0.7500, 0.7500, 0.7000, 1.0000],
         [0.8000, 0.7000, 0.2000, 0.8000, 0.3000, 0.9000, 1.0000]])]

In [801]:
# def process_yolo_target(target, S=7, C=20):
#     return process_yolo_output(target, S=S, B=1, C=C)

In [785]:
target = [
    torch.tensor([[1.0000, 0.7000, 0.2000, 0.8000, 0.3000, 0.8000, 1.0000],
         [1.0000, 0.0000, 0.0000, 0.7500, 0.7500, 0.7000, 1.0000]])
]

target

[tensor([[1.0000, 0.7000, 0.2000, 0.8000, 0.3000, 0.8000, 1.0000],
         [1.0000, 0.0000, 0.0000, 0.7500, 0.7500, 0.7000, 1.0000]])]

In [786]:
def average_precision(pred, target, c, iou_threshold=0.5):
    # number of targets
    TP_FN = 0
    N = len(pred)
    TP_FP = [] # will be condensed to one-hot vector
    scores = [] # corresponding score for each TP_FN label
    
    # for each image, label predictions as TP or FP (store in a one-hot vector 1=TP, 0=FP)
    for i in range(N):
        pred_boxes = pred[i]
        target_boxes = target[i]
        
        # get boxes classified in the specified class
        pred_boxes = pred_boxes[pred_boxes[:, -1] == c]
        target_boxes = target_boxes[target_boxes[:, -1] == c]
        
        # continue if TP_FN for this image is 0
        if len(target_boxes) == 0:
            continue
        
        num_TP_FN = target_boxes.size(0)
        num_TP_FP = pred_boxes.size(0)
        TP_FP_vec = torch.zeros(num_TP_FP)

        # number of target boxes = TP_FN
        TP_FN += num_TP_FN

        # calculate pairwise IOU between predicted boxes and target boxes -> highest IOU above threshold gets classified
        # as TP, duplicate bounding boxes + boxes below IOU threshold are classified as FP
        pred_x1y1x2y2 = pred_boxes[:, 1:5]
        target_x1y1x2y2 = target_boxes[:, 1:5]
        
        ious = box_iou(target_x1y1x2y2, pred_x1y1x2y2)        
        max_ious, max_iou_indices = ious.max(dim=1)        
        for max_iou, max_iou_idx in zip(max_ious, max_iou_indices):
            # this is the best bounding box for each target, label TP if it is above IOU threshold,
            # rest of bounding boxes are duplicates
            if max_iou >= iou_threshold:
                TP_FP_vec[max_iou_idx] = 1
        
        # get scores for each bounding box
        confidences = pred_boxes[:, 0]
        
        # append TP_FP and scores
        TP_FP.append(TP_FP_vec)
        scores.append(confidences)
    
    # no targets; AP = 0
    if len(TP_FP) == 0:
        return 0
    
    # condense TP_FP and scores into one vector
    TP_FP = torch.cat(TP_FP)
    scores = torch.cat(scores)
    
    # sort by score
    _, sorted_score_indices = scores.sort(descending=True)
    TP_FP = TP_FP[sorted_score_indices]
    
    # get cumulative TPs
    TP = torch.cumsum(TP_FP, dim=0)
    
    # calculate precision and recall
    # precision = TP / (TP + FP) (predictions), recall = TP / (TP + FN) (ground truths)
    # P = torch.cumsum(torch.ones_like(TP), dim=0)
    P = torch.arange(1, TP.size(0) + 1)
    
    precision = TP / P
    recall = TP / TP_FN
    
    # add 1 in front of precision and 0 in front of recall
    precision = torch.cat((torch.tensor([1]), precision))
    recall = torch.cat((torch.tensor([0]), recall))
    
    # area under PR-curve
    auc = torch.trapezoid(precision, recall)
    
    return auc.item()

In [787]:
def mean_average_precision(pred, target, iou_threshold=0.5, C=20):
    """
    pred, target: N x bounding boxes x 7
    """
    assert len(pred) == len(target)
        
    total_ap = 0
    
    # for each class, calculate the average precision (AUC of PR curve)
    for i in range(C):
        print(i)
        ap = average_precision(pred, target, iou_threshold=iou_threshold, c=i)
        print(ap)
        total_ap += ap
    
    # compute the mean of the APs
    mAP = total_ap / C
    return mAP

In [788]:
mean_average_precision(pred, target, C=2)

0
0
1
1.0


0.5

In [789]:
def mean_average_precision(pred, target, iou_threshold=0.5, C=20):
    """
    pred, target: N x bounding boxes x 7
    """
    assert len(pred) == len(target)
    
    
    print(pred, target)
    aps = []
    
    for i in range(C):
        # find all boxes target is in the current class
        print(i)
        ground_truths = 0
        labels = []
        scores = []
        
        for j in range(len(pred)):
            pred_bboxes = pred[j]
            target_bboxes = target[j]
            
            pred_bboxes = pred_bboxes[pred_bboxes[:, -1] == i]
            target_bboxes = target_bboxes[target_bboxes[:, -1] == i]
            
            ground_truths += len(target_bboxes)
            
            if pred_bboxes.size(0) == 0:
                continue
                
            print(pred_bboxes[:, 1:5], target_bboxes[:, 1:5])
            
            tpfp_labels = torch.zeros(pred_bboxes.size(0))
            ious = box_iou(pred_bboxes[:, 1:5], target_bboxes[:, 1:5])
            max_ious, max_ious_indices = ious.max(0)
            
            print("max ious")
            print(ious)
            print(max_ious, max_ious_indices)
            print(tpfp_labels)
            
            # maybe there's a mistake here!
            for max_iou, max_iou_idx in zip(max_ious, max_ious_indices):
                if max_iou >= iou_threshold:
                    print(max_iou_idx)
                    tpfp_labels[max_iou_idx] = 1
            
            print(tpfp_labels)
            
            labels.append(tpfp_labels)
            score = pred_bboxes[:, 0]
            scores.append(score)
        
        if len(labels) == 0:
            aps.append(0)
            continue
            
        labels = torch.cat(labels)
        scores = torch.cat(scores)
        
        print("scores labels")
        print(labels, scores)
        
        # sort by confidence
        sorted_scores, scores_indices = scores.sort()
        print(sorted_scores, scores_indices)
        
        labels = labels[scores_indices]
        
        print(labels)
        
        TPs = torch.cumsum(labels, dim=0)
        rolling_predicted = torch.cumsum(torch.ones(len(TPs)), dim=0)
        recall = labels / ground_truths
        precision = TPs / rolling_predicted
        
        print(precision, recall)
        
        precision = torch.cat((torch.tensor([1]), precision))
        recall = torch.cat((torch.tensor([0]), recall))
        
        print(precision, recall)
        
        ap = torch.trapezoid(precision, recall)
        
        print("ap")
        print(ap)
        
        aps.append(ap.item())
        
    mean_ap = sum(aps) / len(aps)
    
    return mean_ap
        
        
        
    

In [790]:
mean_average_precision(pred, target, C=2)

[tensor([[0.9000, 0.0000, 0.0000, 0.7500, 0.7500, 0.7000, 1.0000],
        [0.8000, 0.7000, 0.2000, 0.8000, 0.3000, 0.9000, 1.0000]])] [tensor([[1.0000, 0.7000, 0.2000, 0.8000, 0.3000, 0.8000, 1.0000],
        [1.0000, 0.0000, 0.0000, 0.7500, 0.7500, 0.7000, 1.0000]])]
0
1
tensor([[0.0000, 0.0000, 0.7500, 0.7500],
        [0.7000, 0.2000, 0.8000, 0.3000]]) tensor([[0.7000, 0.2000, 0.8000, 0.3000],
        [0.0000, 0.0000, 0.7500, 0.7500]])
max ious
tensor([[0.0088, 1.0000],
        [1.0000, 0.0088]])
tensor([1., 1.]) tensor([1, 0])
tensor([0., 0.])
tensor(1)
tensor(0)
tensor([1., 1.])
scores labels
tensor([1., 1.]) tensor([0.9000, 0.8000])
tensor([0.8000, 0.9000]) tensor([1, 0])
tensor([1., 1.])
tensor([1., 1.]) tensor([0.5000, 0.5000])
tensor([1., 1., 1.]) tensor([0.0000, 0.5000, 0.5000])
ap
tensor(0.5000)


0.25

In [791]:
# this result above is incorrect! -> should be 0.5

In [760]:
a = torch.tensor([[1, 2, 3,], 
                  [4, 5, 6]])
a

tensor([[1, 2, 3],
        [4, 5, 6]])

In [761]:
a.max(1)

torch.return_types.max(
values=tensor([3, 6]),
indices=tensor([2, 2]))