In [1]:
import pandas as pd
from average_precision import Box, evaluate_detector

# Load bounding boxes

In [2]:
# ground truth bounding boxes
G = pd.read_csv('../data_utils/voc07_test.csv')
print('number of ground truth boxes:', len(G))

number of ground truth boxes: 12032


In [3]:
# predicted bounding boxes
D = pd.read_csv('../voc_test_results.csv')
print('number of predicted boxes:', len(D))

number of predicted boxes: 10166


# Separate boxes

In [4]:
def to_dict(df, separate_by_image=False):
    
    # bounding boxes of different labels are separated
    boxes_by_label = {}
    unique_labels = df['label'].unique()
    
    for label in unique_labels:
        
        # bounding boxes on different images are separated or not
        boxes = {} if separate_by_image else []
        
        # iterate over all boxes of a particular label
        for _, row in df.loc[df['label'] == label].iterrows():
            
            if separate_by_image:
                image_name = row['image']
                if image_name in boxes:
                    boxes[image_name] += [Box(row)]
                else:
                    boxes[image_name] = [Box(row)]
            else:
                boxes += [Box(row)]
        
        boxes_by_label[label] = boxes
    
    return boxes_by_label

In [5]:
%%time
ground_truth_boxes_by_label = to_dict(G, separate_by_image=True)
predicted_boxes_by_label = to_dict(D)

CPU times: user 4.48 s, sys: 12 ms, total: 4.5 s
Wall time: 4.49 s


# Compute average precision for each class

In [6]:
%%time
APs = []
for label in ground_truth_boxes_by_label:
    
    ground_truth_boxes_by_img = ground_truth_boxes_by_label[label]
    predicted_boxes = predicted_boxes_by_label[label]
    
    ap = evaluate_detector(ground_truth_boxes_by_img, predicted_boxes)
    APs += [(label, ap)]

CPU times: user 120 ms, sys: 0 ns, total: 120 ms
Wall time: 116 ms


In [7]:
sorted(APs, key=lambda x: x[0])

[('aeroplane', 0.7038177319537617),
 ('bicycle', 0.7756083347391495),
 ('bird', 0.6736327788248895),
 ('boat', 0.5990263861667374),
 ('bottle', 0.32174624806179697),
 ('bus', 0.7756380000622487),
 ('car', 0.7654825762497467),
 ('cat', 0.84668875918242),
 ('chair', 0.48815697623397114),
 ('cow', 0.6790430480480868),
 ('diningtable', 0.7084242043726565),
 ('dog', 0.8193585703817695),
 ('horse', 0.8125456390979221),
 ('motorbike', 0.756661308329083),
 ('person', 0.649269763155247),
 ('pottedplant', 0.3506859776208424),
 ('sheep', 0.658439708782686),
 ('sofa', 0.7130594724919583),
 ('train', 0.8046170128233268),
 ('tvmonitor', 0.6508534808445335)]

In [8]:
mAP = sum(ap for label, ap in APs)/len(APs)
mAP

0.6776377988711416