In [1]:
import json

import numpy as np
import pandas as pd

from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval

In [2]:
sample_size = 50

## COCO

### Inspect json

In [3]:
with open('./instances_val2017.json', 'r') as f:
    ann_dict = json.load(f)

In [4]:
ann_dict.keys()

dict_keys(['info', 'licenses', 'images', 'annotations', 'categories'])

In [5]:
len(ann_dict['images'])

5000

In [6]:
# id_list = []
# for i in range(sample_size):
#     id_list.append(ann_dict['annotations'][i]['id'])

In [7]:
len(ann_dict['annotations'])

36781

In [8]:
df = pd.DataFrame(ann_dict['annotations'])

In [9]:
df['id'].sort_values()

20437             283
20438             381
20439             567
20440             760
20441             810
             ...     
36648    908400571893
36718    908600097585
36633    908600370042
36756    908600434230
36742    908800474293
Name: id, Length: 36781, dtype: int64

### Create small GT set

In [10]:
small_ann_dict = ann_dict['annotations'][0:sample_size]
ann_dict['annotations'] = small_ann_dict
assert len(ann_dict['annotations']) == sample_size

In [11]:
image_ids = set([item['image_id'] for item in small_ann_dict[0:sample_size]])

In [12]:
df = pd.DataFrame(ann_dict['images'])

In [13]:
small_images = df[df['id'].isin(image_ids)].to_dict('records')

In [14]:
for i in range(len(image_ids)):
    small_images[i]['height'] = 0
    small_images[i]['width'] = 0

In [15]:
ann_dict['images'] = small_images

In [16]:
with open('./test.json', 'w') as f:
    json.dump(ann_dict, f)

### Load GT

In [17]:
# annFile = './instances_val2017.json'
annFile = './test.json'
cocoGt=COCO(annFile)

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


In [18]:
all_id = list(cocoGt.anns.keys())
# assert len(all_id) == 36781
assert len(all_id) == sample_size

In [19]:
all_gt = cocoGt.loadAnns(all_id)

In [20]:
gt_df = pd.DataFrame(all_gt)

In [21]:
gt_df.head()

Unnamed: 0,segmentation,area,iscrowd,image_id,bbox,category_id,id
0,"[[510.66, 423.01, 511.72, 420.03, 510.45, 416....",702.10575,0,289343,"[473.07, 395.93, 38.65, 28.67]",18,1768
1,"[[289.74, 443.39, 302.29, 445.32, 308.09, 427....",27718.4763,0,61471,"[272.1, 200.23, 151.97, 279.77]",18,1773
2,"[[147.76, 396.11, 158.48, 355.91, 153.12, 347....",78969.3169,0,472375,"[124.71, 196.18, 372.85, 356.81]",18,2551
3,"[[260.4, 231.26, 215.06, 274.01, 194.33, 307.6...",108316.66515,0,520301,"[112.71, 154.82, 367.29, 479.35]",18,3186
4,"[[200.61, 253.97, 273.19, 318.49, 302.43, 336....",75864.5353,0,579321,"[200.61, 89.65, 400.22, 251.02]",18,3419


In [22]:
# every gt box has its own unique id
assert not gt_df['id'].duplicated().any()

In [23]:
# image_id can be duplicated, which means the boxes are in one iamge
gt_df[gt_df['image_id'] == 78823]

Unnamed: 0,segmentation,area,iscrowd,image_id,bbox,category_id,id
7,"[[251.24, 238.5, 252.06, 235.22, 252.06, 234.4...",16694.4047,0,78823,"[197.97, 117.22, 170.45, 222.07]",18,7544
29,"[[69.7, 142.39, 66.32, 134.52, 61.83, 128.9, 5...",9473.41725,0,78823,"[58.45, 81.69, 131.53, 125.9]",64,21771


### Generate fake pred and save as json

In [24]:
ids = all_id[0:sample_size]

In [25]:
anns = cocoGt.loadAnns(ids)

In [26]:
imgIds = []

fake_pred = []
np.random.seed(seed=256)
scores = (0.97 - 0.4)* np.random.random((sample_size,)) + 0.4

for i, annotation in enumerate(anns):
    fake_pred.append({
        'image_id': annotation['image_id'],
        'category_id': annotation['category_id'],
        'bbox': (annotation['bbox'] + 2 * ((1 - (-1)) * np.random.random(4) + 1)).tolist(),
        'score': scores[i]
    })
    imgIds.append(annotation['image_id'])

In [27]:
with open('fake_pred.json', 'wt') as f:
    json.dump(fake_pred, f)

### Load fake pred

In [28]:
#initialize COCO detections api
cocoDt=cocoGt.loadRes('./fake_pred.json')

Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!


In [29]:
# running evaluation
cocoEval = COCOeval(cocoGt,cocoDt, 'bbox')
cocoEval.params.imgIds  = imgIds
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()

Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.01s).
Accumulating evaluation results...
DONE (t=0.00s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.51098362554226495202414071172825
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.96558227251296557813731169517268
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.44306773534496302069385365030030


## FE mAP

### Create pred

In [31]:
batch_size = len(fake_pred)
pred_bbox_per_image = 1

In [32]:
pred_shape = (pred_bbox_per_image, 6)
pred = [np.zeros(pred_shape)] * batch_size

In [33]:
img_ids = []
for i, item in enumerate(fake_pred):
    img_ids.append(fake_pred[i]['image_id'])
    pred[i] = np.expand_dims(
        np.concatenate([fake_pred[i]['bbox'], [fake_pred[i]['category_id']], [fake_pred[i]['score']]]), axis=0)

In [34]:
assert len(pred) == batch_size

In [35]:
for item in pred:
    assert item.shape == pred_shape

### Create bbox

#### small set

In [36]:
len(anns)

50

In [37]:
anns[0].keys()

dict_keys(['segmentation', 'area', 'iscrowd', 'image_id', 'bbox', 'category_id', 'id', 'ignore', '_ignore'])

In [38]:
print(anns[i]['bbox'])
print(anns[i]['category_id'])

[97.28, 202.37, 60.28, 39.91]
72


In [39]:
for i in range(batch_size):
    assert np.array(anns[i]['bbox']).shape == (4, )
    assert isinstance(anns[i]['category_id'], int)

In [40]:
bbox_per_image = 1

In [41]:
bbox_shape = (batch_size, bbox_per_image, 5)
bbox = np.zeros(bbox_shape)

In [42]:
for i, item in enumerate(anns):
    bbox[i, 0] = np.concatenate([anns[i]['bbox'], [anns[i]['category_id']]])

In [43]:
assert bbox.shape == bbox_shape

#### full set

### Calculate mAP

In [44]:
from fastestimator.trace.metric import MeanAveragePrecision

In [45]:
mAP = MeanAveragePrecision(num_classes=90)

In [46]:
data = {}
data['pred'] = pred
data['bbox'] = bbox

In [47]:
mAP.on_epoch_begin(data)

In [48]:
mAP.on_batch_begin(data)

In [49]:
mAP.on_batch_end(data)

In [50]:
mAP.on_epoch_end(data)

In [51]:
data.keys()

dict_keys(['pred', 'bbox', 'mAP', 'AP50', 'AP75'])

In [52]:
mAP = data['mAP']
print(f"{mAP:.32f}")

0.51098362554226495202414071172825


In [53]:
ap50 = data['AP50']
print(f"{ap50:.32f}")

0.96558227251296557813731169517268


In [54]:
ap75 = data['AP75']
print(f"{ap75:.32f}")

0.44306773534496302069385365030030
