In [None]:
import os
from ultralytics import YOLO
from PIL import Image
from pathlib import Path
from typing import List, Dict
from tqdm.auto import tqdm
import json
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from sane_coco.dataset import COCODataset
from sane_coco.metrics import MeanAveragePrecision


%load_ext autoreload
%autoreload 2

def ultralytics_batch_detect(
    image_paths: List[str], 
    batch_size: int = 16, 
    conf: float = 0.25
) -> List[List[Dict]]:
    model = YOLO('yolov8n.pt', verbose=False)
    model.to('cpu')
    results = []
    
    for i in tqdm(range(0, len(image_paths), batch_size)):
        batch = image_paths[i:i + batch_size]
        preds = model(batch, conf=conf, verbose=False)
        batch_results = [
            [
                {'bbox': box.xyxy[0].tolist(), 
                 'conf': float(box.conf), 
                 'class': int(box.cls)}
                for box in pred.boxes
            ]
            for pred in preds
        ]
        results.extend(batch_results)
    return results


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [184]:
yolo_classes = {
    0: "person",
    1: "bicycle", 
    2: "car",
    3: "motorcycle",
    4: "airplane",
    5: "bus",
    6: "train",
    7: "truck", 
    8: "boat",
    9: "traffic light",
    10: "fire hydrant",
    11: "stop sign",
    12: "parking meter",
    13: "bench",
    14: "bird",
    15: "cat",
    16: "dog",
    17: "horse",
    18: "sheep",
    19: "cow",
    20: "elephant",
    21: "bear",
    22: "zebra",
    23: "giraffe",
    24: "backpack",
    25: "umbrella",
    26: "handbag",
    27: "tie",
    28: "suitcase",
    29: "frisbee",
    30: "skis",
    31: "snowboard",
    32: "sports ball",
    33: "kite",
    34: "baseball bat",
    35: "baseball glove",
    36: "skateboard",
    37: "surfboard",
    38: "tennis racket",
    39: "bottle",
    40: "wine glass",
    41: "cup",
    42: "fork",
    43: "knife",
    44: "spoon",
    45: "bowl",
    46: "banana",
    47: "apple",
    48: "sandwich",
    49: "orange",
    50: "broccoli",
    51: "carrot",
    52: "hot dog",
    53: "pizza",
    54: "donut",
    55: "cake",
    56: "chair",
    57: "couch",
    58: "potted plant",
    59: "bed",
    60: "dining table",
    61: "toilet",
    62: "tv",
    63: "laptop",
    64: "mouse",
    65: "remote",
    66: "keyboard",
    67: "cell phone",
    68: "microwave",
    69: "oven",
    70: "toaster",
    71: "sink",
    72: "refrigerator",
    73: "book",
    74: "clock",
    75: "vase",
    76: "scissors",
    77: "teddy bear",
    78: "hair drier",
    79: "toothbrush"
}


In [177]:
images_dir = 'COCO/DIR/val2017'
image_filenames = os.listdir(images_dir)
image_paths = [f'{images_dir}/{image}' for image in image_filenames]
results = ultralytics_batch_detect(image_paths, batch_size=10)

100%|██████████| 500/500 [04:27<00:00,  1.87it/s]


# Eval using pycocotools

In [183]:
annotations_fpath = '/Users/boris/Documents/datasets/coco/annotations/instances_val2017.json'

predictions_fpath = './predictions.json'

In [188]:
anno = COCO(str(annotations_fpath)) 

yolo_classes_reverse = {v: k for k, v in yolo_classes.items()}
yolo_id_to_coco_id = {}
for cat in anno.cats:
    coco_id = anno.cats[cat]['id']
    name = anno.cats[cat]['name']
    yolo_id = yolo_classes_reverse[name]
    yolo_id_to_coco_id[yolo_id] = coco_id

predictions = []
for i, (image_filename, result) in enumerate(zip(image_filenames, results)):
    image_id = int(Path(image_filename).stem)
    for detection in result:
        predictions.append({
            'image_id': image_id,
            'bbox': detection['bbox'],
            'score': detection['conf'],
            'category_id': yolo_id_to_coco_id[detection['class']]
        })

with open(predictions_fpath, 'w') as f:
    json.dump(predictions, f)

pred = anno.loadRes(str(predictions_fpath))

val = COCOeval(anno, pred, "bbox")
val.params.imgIds = [int(Path(x).stem) for x in image_filenames]
val.evaluate()
val.accumulate()
val.summarize()
stats = {}
stats["map_50_95"] = val.stats[0]
stats["map_50"] = val.stats[1]

print('MAP@0.5|0.95:', stats['map_50_95'])
print('MAP@0.5:', stats['map_50'])

loading annotations into memory...
Done (t=0.29s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.04s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=3.89s).
Accumulating evaluation results...
DONE (t=0.57s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.010
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.031
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.006
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.005
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.020
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.031
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.036
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets

# Eval using sane coco

In [190]:
with open(annotations_fpath, 'r') as f:
    annotations = json.load(f)

dataset = COCODataset.from_dict(annotations)
annotations = dataset.get_annotation_dicts()

included_images = [
    i for i in range(len(dataset.images))
    if dataset.images[i].id in [int(Path(x).stem) for x in image_filenames]
]
included_image_ids = [dataset.images[i].id for i in included_images]
included_annotations = [annotations[i] for i in included_images]

In [191]:
predicted_image_ids = [int(Path(x).stem) for x in image_filenames]
predictions = {}
for i, (image_id, result) in enumerate(zip(predicted_image_ids, results)):
    image_predictions = []
    for detection in result:
        category_id = yolo_id_to_coco_id[detection['class']]
        category = dataset.get_category_by_id(category_id)

        image_predictions.append({
            'score': detection['conf'],
            'category': category.name,
            'bbox': detection['bbox'],
        })
    predictions[image_id] = image_predictions

predictions = [predictions[i] for i in included_image_ids]

In [193]:
metrics = MeanAveragePrecision()
metrics.update(included_annotations, predictions)
stats = metrics.compute()
print('MAP@0.5|0.95:', stats['map'])
print('MAP@0.5:', stats['ap'][0.5])

MAP@0.5|0.95: 0.008281212656477256
MAP@0.5: 0.024394058646580518
