<h1>MOBDrone Dataset Evaluation</h1>
<h4>Code for reproducing results of the paper <i>"MOBDrone: a Drone Video Dataset for Man OverBoard Rescue"</i> accepted at ICIAP 2021 [<a href='https://arxiv.org/abs/2203.07973'>Pre-print</a>]

<h2>COCO mAP</h2>

In [23]:
from collections import defaultdict
import numpy as np
from pathlib import Path
import random
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt
import pandas as pd

%matplotlib inline

from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval


METHODS = {
    "VarifocalNet": "vfnet_X_101_64x4d",
    "TOOD": "tood_r101",
    "Deformable DETR": "deformable_detr_twostage_refine_r50_16x2_50e",
    "YOLOX": "yolox_x_8x8_300e",
    "Faster RCNN": "fasterrcnn_resnet50",
    "CenterNet": "centernet_resnet18_dcnv2_140e",
    "DETR": "detr_r50_8x2_150e",
    "Mask R-CNN": "mask_rcnn_x101_64x4d_fpn_mstrai_npoly_3x",
    "YOLOv3": "yolov3",
}

GT_COCO_JSON_FILE = "./annotations/annotations_person_coco_classes.json"

<h4>Let's compute COCO mAP on the entire dataset for all the considered methods</h4>

In [24]:
columns = ['Model', 'AP50', 'mAP@[0.50:0.95]']
metrics = []

for method_name, file_name in METHODS.items():
    preds_json_file = "./predictions/{}_results.json".format(file_name)
    
    # Loading coco data for eval
    cocoGt = COCO(GT_COCO_JSON_FILE)
    cocoDt = cocoGt.loadRes(preds_json_file)
    
    # Running eval
    class_ids = [1]     # we consider only the person class    
    imgIds = sorted(cocoGt.getImgIds())
    cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
    cocoEval.params.imgIds  = imgIds
    cocoEval.params.catIds = class_ids
    cocoEval.evaluate()
    cocoEval.accumulate()
    cocoEval.summarize()
    
    map = round(cocoEval.stats[0], 3)
    ap50 = round(cocoEval.stats[1], 3)
    metrics.append([method_name, ap50, map])

loading annotations into memory...
Done (t=0.72s)
creating index...
index created!
Loading and preparing results...
DONE (t=27.26s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=86.10s).
Accumulating evaluation results...
DONE (t=25.58s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.144
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.378
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.087
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.023
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.247
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.639
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.206
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.317
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxD

In [25]:
metrics_df = pd.DataFrame(metrics, columns=columns)
display(metrics_df)

Unnamed: 0,Model,AP50,mAP@[0.50:0.95]
0,VarifocalNet,0.378,0.144
1,TOOD,0.314,0.116
2,Deformable DETR,0.199,0.075
3,YOLOX,0.126,0.049
4,Faster RCNN,0.096,0.028
5,CenterNet,0.124,0.041
6,DETR,0.128,0.04
7,Mask R-CNN,0.109,0.033
8,YOLOv3,0.011,0.009


<h4>Let's now compute COCO AP50 at different altitudes for the best model</h4>

In [27]:
BEST_METHOD = {
    "VarifocalNet": "vfnet_X_101_64x4d",
}


columns = ['Altitude', 'AP50']
metrics = []

imgIds_height = defaultdict(list)
imgIds = sorted(cocoGt.getImgIds())
cocoGt = COCO(GT_COCO_JSON_FILE)

for img_id in imgIds:
    img_name = cocoGt.loadImgs(img_id)[0]['file_name']
    img_height = img_name.split("_")[3]
    imgIds_height[img_height].append(img_id)
    

for method_name, file_name in BEST_METHOD.items():
    preds_json_file = "./predictions/{}_results.json".format(file_name)
    cocoDt = cocoGt.loadRes(preds_json_file)
    cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
    for img_height, img_ids in imgIds_height.items():
        print("Evaluating images at {}".format(img_height))
        cocoEval.params.imgIds  = img_ids
        cocoEval.evaluate()
        cocoEval.accumulate()
        cocoEval.summarize()
        
        ap50 = round(cocoEval.stats[1], 3)
        metrics.append([img_height, ap50])

loading annotations into memory...
Done (t=0.71s)
creating index...
index created!
Loading and preparing results...
DONE (t=31.25s)
creating index...
index created!
Evaluating images at 30m
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=101.06s).
Accumulating evaluation results...
DONE (t=17.74s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.148
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.400
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.086
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.062
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.229
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.267
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.363
 Average Recall     (AR) @[ IoU=0.5

In [28]:
metrics_df = pd.DataFrame(metrics, columns=columns)
display(metrics_df)

Unnamed: 0,Altitude,AP50
0,30m,0.4
1,10m,0.973
2,20m,0.771
3,40m,0.54
4,50m,0.241
5,60m,0.205


<h2>Evaluation using FiftyOne</h2>

In [2]:
from pathlib import Path
from tqdm import tqdm
import json

import matplotlib.pyplot as plt
from sklearn import metrics

import fiftyone as fo
import fiftyone.utils.coco as fouc
from fiftyone import ViewField as F
import fiftyone.brain as fob


GT_COCO_JSON_FILE = "./annotations/annotations_person_coco_classes.json"
IMGS = "./data"

BEST_MODELS = {
    "VarifocalNet": "vfnet_X_101_64x4d",
    "TOOD": "tood_r101",
    "Deformable DETR": "deformable_detr_twostage_refine_r50_16x2_50e",
}



Migrating database to v0.15.1
Dropping 78 orphan collections that were unintentionally left behind when datasets were deleted


<h4>Prepare data to evaluate</h4>

Importing the dataset and make it persistent (it will be available in the future just loading it, using the load_dataset() function)


In [None]:
# Import the dataset
dataset = fo.Dataset.from_dir(
    dataset_type=fo.types.COCODetectionDataset,
    data_path=IMGS,
    labels_path=GT_COCO_JSON_FILE,
    name="mob_drone_person",
    label_field="ground_truth",
    tags=['test']
)

# Print summary of the dataset
# print(dataset)

In [None]:
dataset.persistent = True

In [None]:
dataset = fo.load_dataset("mob_drone_person")

Adding detections (concerning only person category) of specified models to the dataset

In [None]:
for method_name, file_name in tqdm(BEST_MODELS.items()):
    PRED_COCO_JSON_FILE = "./predictions/{}_only_person_results.json".format(file_name)
    
    # Add model predictions
    fouc.add_coco_labels(
        dataset,
        "predictions_{}_only_person".format(method_name),
        PRED_COCO_JSON_FILE,
    )

    # Saving
    dataset.save()

<h4>Add Evaluations</h4>

Evaluating and adding evaluation keys to the dataset

In [None]:
model_results = {}

# Evaluate using coco evaluator and default settings; specifically, classwise is set to True
for method_name, file_name in tqdm(BEST_MODELS.items()):
    model_results[method_name] = dataset.evaluate_detections(
        "predictions_{}_only_person".format(method_name),
        gt_field="ground_truth",
        eval_key="eval_{}".format(method_name),
        compute_mAP=True,
        # classwise = False,
    )
    
    # Saving
    dataset.save()

<h4>Plotting some metrics</h4>

Let's compare PR curves of the three best models

In [None]:
colours = {'Deformable DETR': "royalblue", 'TOOD': "springgreen", "VarifocalNet": "orangered"}

fig, ax = plt.subplots(figsize=(14, 14))

for method_name, file_name in tqdm(BEST_MODELS.items()):
    precision = model_results[method_name].precision[0][1]     # element 0 concerns IoU=0.5, element 1 concerns class 'person'
    recall = model_results[method_name].recall
    auc = metrics.auc(recall, precision)    # computing auc
    
    color = colours[method_name]
    
    # ax.step(recall, precision, color=color, label="{}: AP50 = {}".format(MODELS_TO_EVALUATE[key], round(auc, 2)), linewidth=2.0)
    ax.plot(recall, precision, color=color, label="{}: AP50 = {}".format(method_name, round(auc, 2)), linewidth=2.0)
    # ax.fill_between(recall, precision, step='pre', alpha=0.25, color=color)
    ax.fill_between(recall, precision, alpha=0.25, color=color)
    
# ax.set_title('Comparison PR curves - 3 best models'.format(MODELS_TO_EVALUATE[key]), fontsize=25)
ax.set_ylabel('Precision', fontsize=33)
ax.set_xlabel('Recall', fontsize=33)
ax.legend(fontsize=28, fancybox=True, loc="upper center")
    
ax.set_xlim([0.0, 1.0])
ax.set_ylim([0.0, 1.05])

ax.grid(linestyle='-.', linewidth=0.2)
ax.set_ylim([0.0, 1.05])


plt.show()

Let's plot F1-threhsold curve

In [None]:
colours = {'Deformable DETR': "royalblue", 'TOOD': "springgreen", "VarifocalNet": "orangered"}

fig, ax = plt.subplots(figsize=(14, 14))

for method_name, file_name in tqdm(BEST_MODELS.items()):
    f1_score = []
    precision = model_results[method_name].precision[0][1]     # element 0 concerns IoU=0.5, element 1 concerns class 'person'
    recall = model_results[method_name].recall
    
    color = colours[method_name]

    for prec, rec in zip(precision, recall):
        f1_score.append(2 * (prec * rec) / (prec + rec))
        
    index_max_f1 = max(range(len(f1_score)), key=f1_score.__getitem__)
        
    ax.plot(recall, f1_score, color=color, label="{}".format(method_name), linewidth=3.5)
    ax.plot(recall[index_max_f1], f1_score[index_max_f1], marker="X", color="white", ms=19, mec=color)
    if method_name == 'TOOD':
        ax.text(recall[index_max_f1]-0.11, f1_score[index_max_f1]+0.015, s="F1: {}, thr: {}".format(round(f1_score[index_max_f1], 2), recall[index_max_f1]), color=color, fontweight='bold', fontsize=15)
    elif method_name == 'Deformable DETR':
        ax.text(recall[index_max_f1]-0.08, f1_score[index_max_f1]+0.015, s="F1: {}, thr: {}".format(round(f1_score[index_max_f1], 2), recall[index_max_f1]), color=color, fontweight='bold', fontsize=15)
    else:
        ax.text(recall[index_max_f1]-0.08, f1_score[index_max_f1]+0.02, s="F1: {}, thr: {}".format(round(f1_score[index_max_f1], 2), recall[index_max_f1]), color=color, fontweight='bold', fontsize=15)

# ax.set_title('Comparison F1-threshol curves - 3 best models'.format(MODELS_TO_EVALUATE[key]), fontsize=25)
ax.set_ylabel('F1-score', fontsize=33)
ax.set_xlabel('threshold', fontsize=33)
ax.legend(fontsize=28, fancybox=True, loc="upper center")

ax.set_xlim([0.0, 1.0])
ax.set_ylim([0.0, 1.05])

ax.grid(linestyle='-.', linewidth=0.2)
ax.set_ylim([0.0, 0.65])

plt.show()

<h2>Utility Functions</h2>

Filter detections considering only the ones belonging to person and save in a new json file

In [4]:
METHODS = {
    "VarifocalNet": "vfnet_X_101_64x4d",
    "TOOD": "tood_r101",
    "Deformable DETR": "deformable_detr_twostage_refine_r50_16x2_50e",
}


for method_name, file_name in tqdm(BEST_MODELS.items()):
    dst_ann = []
    pred_json_file = "./predictions/{}_results.json".format(file_name)
    dest_pred_json_file = "./predictions/{}_only_person_results.json".format(file_name)
    
    with open(Path(pred_json_file)) as json_ann_file:
        json_ann = json.load(json_ann_file)
        
    for ann in json_ann:
        if ann['category_id'] == 1:
            dst_ann.append(ann)
            
    with open(dest_pred_json_file, 'w') as fp:
        json.dump(dst_ann, fp)

print("Done")

100%|██████████| 3/3 [01:18<00:00, 25.73s/it]

Done



