In [1]:
import json
import numpy as np
import pandas as pd

from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval

In [2]:
pd.set_option('display.max_colwidth', 800)

In [3]:
sample_size = 36335
offset = 0
area_thresh = 1e5**2

batch_size = 8

## COCO

### Inspect original val2017 GT json

In [4]:
with open('./instances_val2017.json', 'r') as f:
    ann_dict = json.load(f)

In [5]:
assert len(ann_dict['images']) == 5000

In [6]:
assert len(ann_dict['annotations']) == 36781

### Remove iscrowd 

In [7]:
df = pd.DataFrame(ann_dict['annotations'])
ann_dict['annotations'] = df[df['iscrowd'] == 0].reset_index().to_dict('records')
assert len(ann_dict['annotations']) == 36335

In [8]:
df = pd.DataFrame(ann_dict['annotations'])

In [9]:
assert len(df['image_id'].drop_duplicates()) == 4952

### Remove unnecessary key value pairs

In [10]:
ann_dict.keys()

dict_keys(['info', 'licenses', 'images', 'annotations', 'categories'])

From top level.

In [11]:
remove_base = ['info', 'licenses']

In [12]:
for key in remove_base:
    del ann_dict[key]

In [13]:
ann_dict.keys()

dict_keys(['images', 'annotations', 'categories'])

For 'images' sub-dict, keep only 'id'.

In [14]:
remove_images = ['license', 'file_name', 'coco_url', 'height', 'width', 'date_captured', 'flickr_url']

In [15]:
for key in remove_images:
    for item in ann_dict['images']:
        del item[key]

In [16]:
ann_dict['images'][0]

{'id': 397133}

For 'annotations' sub-dict

In [17]:
remove_anno = ['segmentation']

In [18]:
for key in remove_anno:
    for item in ann_dict['annotations']:
        del item[key]

For 'categories' sub-dict

In [19]:
for item in ann_dict['categories']:
    del item['supercategory']

### Create small GT set

Get (sample_size) GT.

In [20]:
small_ann_dict = ann_dict['annotations'][0 + offset:sample_size + offset]
ann_dict['annotations'] = small_ann_dict
assert len(ann_dict['annotations']) == sample_size

From GT, we collect all the images involved.

In [21]:
image_ids = sorted(set([item['image_id'] for item in small_ann_dict[0:sample_size]]))
num_images = len(image_ids)
print(f'{num_images} images used.')
df = pd.DataFrame(ann_dict['images'])
small_images = df[df['id'].isin(image_ids)].to_dict('records')

4952 images used.


In [22]:
ann_dict['images'] = small_images

In [23]:
with open('./test.json', 'w') as f:
    json.dump(ann_dict, f)

### Load GT

In [24]:
# annFile = './instances_val2017.json'
annFile = './test.json'
cocoGt=COCO(annFile)

loading annotations into memory...
Done (t=0.16s)
creating index...
index created!


In [25]:
all_gt_id = list(cocoGt.anns.keys())
# assert len(all_id) == 36781
assert len(all_gt_id) == sample_size

In [26]:
all_gt = cocoGt.loadAnns(all_gt_id)

In [27]:
gt_df = pd.DataFrame(all_gt)

In [28]:
# every gt box has its own unique id
assert not gt_df['id'].duplicated().any()

In [29]:
# image_id can be duplicated, which means the boxes are in one iamge
gt_df[gt_df['image_id'] == 78823]

Unnamed: 0,index,area,iscrowd,image_id,bbox,category_id,id
7,7,16694.4047,0,78823,"[197.97, 117.22, 170.45, 222.07]",18,7544
29,29,9473.41725,0,78823,"[58.45, 81.69, 131.53, 125.9]",64,21771
372,372,91251.4839,0,78823,"[9.29, 160.0, 466.58, 314.84]",3,133097
379,379,26292.74015,0,78823,"[1.29, 150.59, 186.23, 261.24]",3,134130
447,447,4064.2254,0,78823,"[594.08, 175.0, 45.2, 128.07]",3,145780


### Generate fake pred and save as json

In [30]:
anns = cocoGt.loadAnns(all_gt_id[0:sample_size])

In [31]:
fake_pred = []
np.random.seed(seed=256)
scores = np.random.random((sample_size,)) 

for i, annotation in enumerate(anns):
    fake_pred.append({
        'image_id': annotation['image_id'],
        'category_id': annotation['category_id'],
        'bbox': (annotation['bbox'] + 2 * ((1 - (-1)) * np.random.random(4) + 1)).tolist(),
        'score': scores[i]
    })

In [32]:
fake_pred_df = pd.DataFrame(fake_pred).sort_values('image_id').reset_index()

In [33]:
assert len(fake_pred_df) == sample_size
assert len(fake_pred_df['image_id'].drop_duplicates()) == num_images

In [34]:
with open('fake_pred.json', 'wt') as f:
    json.dump(fake_pred, f)

### mAP using fake pred

In [35]:
assert len(cocoGt.anns) == sample_size

In [36]:
for gt_id in cocoGt.anns.keys():
    assert cocoGt.anns[gt_id]['iscrowd'] == 0

In [37]:
for gt_id in cocoGt.anns.keys():
    assert cocoGt.anns[gt_id]['area'] < area_thresh
    cocoGt.anns[gt_id]['area'] == 0

In [38]:
#initialize COCO detections api
cocoDt=cocoGt.loadRes('./fake_pred.json')

Loading and preparing results...
DONE (t=0.26s)
creating index...
index created!


In [39]:
# running evaluation
cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
cocoEval.params.imgIds = image_ids
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()

Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=6.00s).
Accumulating evaluation results...
DONE (t=0.18s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.29160866139359581383772024310019
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.58110729438626751441887563487398
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.26501573488656765542614834885171


In [40]:
for gt_id in cocoGt.anns.keys():
    assert len(cocoGt.anns[gt_id].keys()) == 9
    assert cocoGt.anns[gt_id]['ignore'] == 0
    assert cocoGt.anns[gt_id]['_ignore'] == 0

In [41]:
# sample_size = 300
# offset = 1

# Running per image evaluation...
# Evaluate annotation type *bbox*
# DONE (t=0.09s).
# Accumulating evaluation results...
# DONE (t=0.01s).
#  Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.49353108110035337485754780573188
#  Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.83751546185462277804845143691637
#  Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.54370192869205802299603647043114

## FE mAP

### Create bbox

In [42]:
def generate_bbox(anns):
    df = pd.DataFrame(anns).sort_values('image_id').reset_index()
    max_num_bbox = df['image_id'].value_counts().values[0]
    bbox_shape = (num_images, max_num_bbox, 5)
    bbox = np.zeros(bbox_shape)
    
    for i, image_id in enumerate(image_ids):
        temp = df[df['image_id'].isin([image_id])][['bbox', 'category_id']]
        bbox[i, 0:len(temp), 0:4] = temp['bbox'].tolist()
        bbox[i, 0:len(temp), 4] = temp['category_id'].values
        
    return bbox

In [43]:
def generate_batch_bbox(batch_image_ids):
    df = pd.DataFrame(anns).sort_values('image_id').reset_index()
    df = df[df['image_id'].isin(batch_image_ids)]
    max_num_bbox = df['image_id'].value_counts().values[0]
    bbox_shape = (len(batch_image_ids), max_num_bbox, 5)
    bbox = np.zeros(bbox_shape)
    
    for i, image_id in enumerate(batch_image_ids):
        temp = df[df['image_id'].isin([image_id])][['bbox', 'category_id']]
        bbox[i, 0:len(temp), 0:4] = temp['bbox'].tolist()
        bbox[i, 0:len(temp), 4] = temp['category_id'].values
        
    return bbox

In [44]:
# df = pd.DataFrame(anns).sort_values('image_id').reset_index()

# max_num_bbox = df['image_id'].value_counts().values[0]

# bbox_shape = (num_images, max_num_bbox, 5)
# bbox = np.zeros(bbox_shape)

# for i, image_id in enumerate(image_ids):
#     temp = df[df['image_id'].isin([image_id])][['bbox', 'category_id']]
#     bbox[i, 0:len(temp), 0:4] = temp['bbox'].tolist()
#     bbox[i, 0:len(temp), 4] = temp['category_id'].values

### Create pred

In [45]:
def generate_pred(fake_pred_df):
    pred = []
    for image_id in image_ids:
        temp = fake_pred_df[fake_pred_df['image_id'].isin([image_id])][['bbox', 'category_id', 'score']]
        array = np.zeros((len(temp), 6))
        array[0:len(temp), 0:4] = temp['bbox'].tolist()
        array[0:len(temp), 4] = temp['category_id'].values
        array[0:len(temp), 5] = temp['score'].values
        pred.append(array)
        
    return pred

In [46]:
def generate_batch_pred(batch_image_ids):
    pred = []
    for image_id in batch_image_ids:
        temp = fake_pred_df[fake_pred_df['image_id'].isin([image_id])][['bbox', 'category_id', 'score']]
        array = np.zeros((len(temp), 6))
        array[0:len(temp), 0:4] = temp['bbox'].tolist()
        array[0:len(temp), 4] = temp['category_id'].values
        array[0:len(temp), 5] = temp['score'].values
        pred.append(array)
        
    return pred

### Calculate mAP

In [47]:
from fastestimator.trace.metric import MeanAveragePrecision

In [48]:
mAP = MeanAveragePrecision(num_classes=90)

In [49]:
data = {}
data['pred'] = generate_pred(fake_pred_df)
data['bbox'] = generate_bbox(anns)

In [50]:
mAP.on_epoch_begin(None)

In [51]:
for i in range(int(num_images/batch_size)):
    data = {}
    data['pred'] = generate_batch_pred(image_ids[i*batch_size:(i+1)*batch_size])
    data['bbox'] = generate_batch_bbox(image_ids[i*batch_size:(i+1)*batch_size])
    mAP.on_batch_begin(None)
    mAP.on_batch_end(data)

In [52]:
mAP.on_epoch_end(data)

In [53]:
mAP = data['mAP']
print(f"{mAP:.32f}")

0.29160866139359581383772024310019


In [54]:
ap50 = data['AP50']
print(f"{ap50:.32f}")

0.58110729438626751441887563487398


In [55]:
ap75 = data['AP75']
print(f"{ap75:.32f}")

0.26501573488656765542614834885171
