# Models evaluation

In [None]:
from ultralytics import YOLO
import json
import os

import numpy as np
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval

import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
import json, cv2, os
import pandas as pd

### YOLO11x

In [None]:
model = YOLO('yolo11x/train/weights/best.pt')

img_dir = '/home/nikitina.alina8/WB/trash/hand_keypoint_dataset_26k/hand_keypoint_dataset_26k/images/val'
filenames = sorted(os.listdir(img_dir))

preds = []
for img_id, fname in enumerate(filenames):
    img_path = os.path.join(img_dir, fname)
    results = model.predict(img_path, task='keypoint', imgsz=640, conf=0.25, verbose=False)
    r = results[0]

    # Извлекаем боксы, keypoints и score
    # boxes.xyxy: N×4, boxes.conf: N, boxes.keypoints: N×(K×3)
    for box, conf, kps in zip(r.boxes.xyxy.cpu().tolist(),
                              r.boxes.conf.cpu().tolist(),
                              r.keypoints.cpu().data.tolist()):
        # box = [x1, y1, x2, y2] → COCO bbox = [x, y, w, h]
        x1, y1, x2, y2 = box
        bbox = [x1, y1, x2 - x1, y2 - y1]

        kp_flat = []
        for x, y, v in kps:
            kp_flat += [x, y, int(v) if v < 0.5 else 2]  # v = 2 if visible else 0

        preds.append({
            'image_id': img_id,
            'category_id': 1,
            'bbox': bbox,
            'score': conf,
            'keypoints': kp_flat
        })

with open('yolo11x_preds.json', 'w') as f:
    json.dump(preds, f, indent=2, ensure_ascii=False)

In [None]:
coco_gt = COCO('./hand_keypoint_dataset_26k/hand_keypoint_dataset_26k/coco_annotation/val/fixed_annotations.coco.json')

coco_dt = coco_gt.loadRes('yolo11x_preds.json')

coco_eval_kpt = COCOeval(coco_gt, coco_dt, iouType='keypoints')

coco_eval_kpt.params.kpt_oks_sigmas = np.array([0.1] * 21)

coco_eval_kpt.evaluate()
coco_eval_kpt.accumulate()
coco_eval_kpt.summarize()

loading annotations into memory...
Done (t=0.06s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.12s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *keypoints*
DONE (t=1.09s).
Accumulating evaluation results...
DONE (t=0.09s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets= 20 ] = 0.803
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets= 20 ] = 0.919
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets= 20 ] = 0.840
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets= 20 ] = 0.862
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets= 20 ] = 0.768
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 20 ] = 0.833
 Average Recall     (AR) @[ IoU=0.50      | area=   all | maxDets= 20 ] = 0.934
 Average Recall     (AR) @[ IoU=0.75      | area=   all | maxDets= 20 ] = 0.862
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | ma

Выведем топ-10 изображений, в которых ответ модели сильнее всего не совпал с исходной аннотацией

In [None]:
coco_eval = COCOeval(coco_gt, coco_dt, iouType='keypoints')
coco_eval.params.kpt_oks_sigmas = np.array([0.1] * 21)
coco_eval.evaluate()

records = []
for (img_id, cat_id), oks_list in coco_eval.ious.items():
    oks_mat = np.array(oks_list)
    if oks_mat.size == 0:
        continue
    # Для каждой детекции берём лучшее совпадение
    best_dt_oks = np.max(oks_mat, axis=0)
    mean_oks = best_dt_oks.mean()
    fname = coco_gt.loadImgs([img_id])[0]['file_name']
    records.append({
        'image_id': img_id,
        'file_name': fname,
        'mean_oks': mean_oks
    })

# Топ-10 худших
df = pd.DataFrame(records)
df_worst = df.sort_values('mean_oks').head(10)
df_worst

loading annotations into memory...
Done (t=0.37s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.13s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *keypoints*
DONE (t=1.14s).


Unnamed: 0,image_id,file_name,mean_oks
4379,4381,IMG_00021930.jpg,1.377963e-159
3040,3041,IMG_00014544.jpg,6.005941e-113
3037,3038,IMG_00014530.jpg,1.6251539999999998e-63
2108,2109,IMG_00009541.jpg,1.569595e-55
4207,4209,IMG_00020957.jpg,3.0978200000000002e-52
3493,3494,IMG_00017027.jpg,2.478255e-49
3976,3978,IMG_00019685.jpg,3.797629e-48
4199,4201,IMG_00020925.jpg,5.3368360000000005e-43
4340,4342,IMG_00021716.jpg,8.102191e-32
798,798,IMG_00003003.jpg,4.285396e-27


### YOLO11n (https://github.com/chrismuntean/YOLO11n-pose-hands)

In [None]:
model = YOLO('best.pt')

preds = []
for img_id, fname in enumerate(filenames):
    img_path = os.path.join(img_dir, fname)
    results = model.predict(img_path, task='keypoint', imgsz=640, conf=0.25, verbose=False)
    r = results[0]

    # Извлекаем боксы, keypoints и score
    # boxes.xyxy: N×4, boxes.conf: N, boxes.keypoints: N×(K×3)
    for box, conf, kps in zip(r.boxes.xyxy.cpu().tolist(),
                              r.boxes.conf.cpu().tolist(),
                              r.keypoints.cpu().data.tolist()):
        # box = [x1, y1, x2, y2] → COCO bbox = [x, y, w, h]
        x1, y1, x2, y2 = box
        bbox = [x1, y1, x2 - x1, y2 - y1]

        kp_flat = []
        for x, y, v in kps:
            kp_flat += [x, y, int(v) if v < 0.5 else 2]  # v = 2 if visible else 0

        preds.append({
            'image_id': img_id,
            'category_id': 1,
            'bbox': bbox,
            'score': conf,
            'keypoints': kp_flat
        })

with open('yolo_preds_gitmodel.json', 'w') as f:
    json.dump(preds, f, indent=2, ensure_ascii=False)

In [None]:
coco_gt = COCO('/home/nikitina.alina8/WB/trash/hand_keypoint_dataset_26k/hand_keypoint_dataset_26k/coco_annotation/val/fixed_annotations.coco.json')
coco_dt = coco_gt.loadRes('yolo_preds_gitmodel.json')
coco_eval_kpt = COCOeval(coco_gt, coco_dt, iouType='keypoints')
coco_eval_kpt.params.kpt_oks_sigmas = np.array([0.1] * 21)

coco_eval_kpt.evaluate()
coco_eval_kpt.accumulate()
coco_eval_kpt.summarize()

loading annotations into memory...
Done (t=0.16s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.12s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *keypoints*
DONE (t=1.15s).
Accumulating evaluation results...
DONE (t=0.07s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets= 20 ] = 0.764
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets= 20 ] = 0.894
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets= 20 ] = 0.795
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets= 20 ] = 0.825
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets= 20 ] = 0.640
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 20 ] = 0.794
 Average Recall     (AR) @[ IoU=0.50      | area=   all | maxDets= 20 ] = 0.911
 Average Recall     (AR) @[ IoU=0.75      | area=   all | maxDets= 20 ] = 0.824
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | ma

### YOLO11n (local)

In [None]:
model = YOLO('yolo11n/train/weights/best.pt')

preds = []
for img_id, fname in enumerate(filenames):
    img_path = os.path.join(img_dir, fname)
    results = model.predict(img_path, task='keypoint', imgsz=640, conf=0.25, verbose=False)
    r = results[0]

    # Извлекаем боксы, keypoints и score
    # boxes.xyxy: N×4, boxes.conf: N, boxes.keypoints: N×(K×3)
    for box, conf, kps in zip(r.boxes.xyxy.cpu().tolist(),
                              r.boxes.conf.cpu().tolist(),
                              r.keypoints.cpu().data.tolist()):
        # box = [x1, y1, x2, y2] → COCO bbox = [x, y, w, h]
        x1, y1, x2, y2 = box
        bbox = [x1, y1, x2 - x1, y2 - y1]

        kp_flat = []
        for x, y, v in kps:
            kp_flat += [x, y, int(v) if v < 0.5 else 2]  # v = 2 if visible else 0

        preds.append({
            'image_id': img_id,
            'category_id': 1,
            'bbox': bbox,
            'score': conf,
            'keypoints': kp_flat
        })

with open('yolo11n_preds.json', 'w') as f:
    json.dump(preds, f, indent=2, ensure_ascii=False)

In [None]:
coco_gt = COCO('/home/nikitina.alina8/WB/trash/hand_keypoint_dataset_26k/hand_keypoint_dataset_26k/coco_annotation/val/fixed_annotations.coco.json')
coco_dt = coco_gt.loadRes('yolo11n_preds.json')
coco_eval_kpt = COCOeval(coco_gt, coco_dt, iouType='keypoints')
coco_eval_kpt.params.kpt_oks_sigmas = np.array([0.1] * 21)

coco_eval_kpt.evaluate()
coco_eval_kpt.accumulate()
coco_eval_kpt.summarize()

loading annotations into memory...
Done (t=0.41s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.13s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *keypoints*
DONE (t=1.10s).
Accumulating evaluation results...
DONE (t=0.09s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets= 20 ] = 0.759
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets= 20 ] = 0.885
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets= 20 ] = 0.786
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets= 20 ] = 0.823
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets= 20 ] = 0.677
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 20 ] = 0.791
 Average Recall     (AR) @[ IoU=0.50      | area=   all | maxDets= 20 ] = 0.909
 Average Recall     (AR) @[ IoU=0.75      | area=   all | maxDets= 20 ] = 0.820
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | ma

### MediaPipe Hands

In [None]:
base = python.BaseOptions(model_asset_path='hand_landmarker.task')
options = vision.HandLandmarkerOptions(base_options=base, num_hands=2)
detector = vision.HandLandmarker.create_from_options(options)

preds = []
for img_id, fname in enumerate(sorted(os.listdir('./hand_keypoint_dataset_26k/hand_keypoint_dataset_26k/images/val'))):
    img_path = os.path.join('./hand_keypoint_dataset_26k/hand_keypoint_dataset_26k/images/val', fname)
    # Детекция
    mp_img = mp.Image.create_from_file(img_path)
    res = detector.detect(mp_img)
    # Извлечение bbox из keypoints
    for score, hand_kps in zip(res.handedness, res.hand_landmarks):
        pts = [(kp.x, kp.y) for kp in hand_kps]
        xs, ys = zip(*pts)
        bbox = [min(xs), min(ys), max(xs)-min(xs), max(ys)-min(ys)]

        kp_flat = []
        for kp in hand_kps:
            # Так как в mediapipe, если объект не виден, то все равно определяются его координаты, то, в случае,
            # если keypoints нашелся, но какие-либо из его относительных координат < 0 или больше 1 - 
            # заменяем на исходный вариант [0, 0, 0] (как в аннотациях датасета)
            kp_dot = [kp.x, kp.y, 2] if (kp.x >=0 and kp.y >=0 and kp.x <= 1 and kp.y <= 1) else [0, 0, 0]
            kp_flat += kp_dot
        preds.append({
            'image_id': img_id,
            'category_id': 1,
            'bbox': [bbox[0], bbox[1], bbox[2], bbox[3]],
            'score': score[0].score,
            'keypoints': kp_flat
        })

In [None]:
# Конвертируем нормализованные координаты от MediaPipe в нужный формат
with open('./hand_keypoint_dataset_26k/hand_keypoint_dataset_26k/coco_annotation/val/fixed_annotations.coco.json', 'r') as f:
    coco = json.load(f)

# Строим словарь image_id → {width, height}
img_info = {img['id']: (img['width'], img['height']) for img in coco['images']}

for pred in preds:
    width, height = img_info[pred['image_id']]
    x_rel, y_rel, w_rel, h_rel = pred['bbox']
    pred['bbox'] = [
        x_rel * width,
        y_rel * height,
        w_rel * width,
        h_rel * height,
    ]
    kp = pred['keypoints']
    abs_kp = []
    for i in range(0, len(kp), 3):
        x_rel, y_rel, v = kp[i], kp[i+1], kp[i+2]
        abs_kp.extend([x_rel * width, y_rel * height, v])
    pred['keypoints'] = abs_kp

with open('mp_preds.json', 'w') as f:
    json.dump(preds, f, indent=2, ensure_ascii=False)

In [None]:
coco_gt = COCO('/home/nikitina.alina8/WB/trash/hand_keypoint_dataset_26k/hand_keypoint_dataset_26k/coco_annotation/val/fixed_annotations.coco.json')
coco_dt = coco_gt.loadRes('mp_preds.json')
coco_eval_kpt = COCOeval(coco_gt, coco_dt, iouType='keypoints')
coco_eval_kpt.params.kpt_oks_sigmas = np.array([0.1] * 21)

coco_eval_kpt.evaluate()
coco_eval_kpt.accumulate()
coco_eval_kpt.summarize()

loading annotations into memory...
Done (t=0.15s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.13s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *keypoints*
DONE (t=1.24s).
Accumulating evaluation results...
DONE (t=0.10s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets= 20 ] = 0.759
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets= 20 ] = 0.901
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets= 20 ] = 0.802
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets= 20 ] = 0.805
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets= 20 ] = 0.786
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 20 ] = 0.848
 Average Recall     (AR) @[ IoU=0.50      | area=   all | maxDets= 20 ] = 0.938
 Average Recall     (AR) @[ IoU=0.75      | area=   all | maxDets= 20 ] = 0.878
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | ma

Выведем топ-10 изображений, в которых ответ модели сильнее всего не совпал с исходной аннотацией

In [None]:
coco_eval = COCOeval(coco_gt, coco_dt, iouType='keypoints')
coco_eval.params.kpt_oks_sigmas = np.array([0.1] * 21)
coco_eval.evaluate()

records = []
for (img_id, cat_id), oks_list in coco_eval.ious.items():
    oks_mat = np.array(oks_list)
    if oks_mat.size == 0:
        continue
    # Для каждой детекции берём лучшее совпадение
    best_dt_oks = np.max(oks_mat, axis=0)
    mean_oks = best_dt_oks.mean()
    fname = coco_gt.loadImgs([img_id])[0]['file_name']
    records.append({
        'image_id': img_id,
        'file_name': fname,
        'mean_oks': mean_oks
    })

# Топ-10 худших
df = pd.DataFrame(records)
df_worst = df.sort_values('mean_oks').head(10)
df_worst

loading annotations into memory...
Done (t=0.06s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.41s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *keypoints*
DONE (t=1.08s).


Unnamed: 0,image_id,file_name,mean_oks
6086,6253,IMG_00029571.jpg,0.019517
817,828,IMG_00003183.jpg,0.02292
1838,1860,IMG_00007887.jpg,0.026231
4067,4200,IMG_00020923.jpg,0.02916
1837,1859,IMG_00007885.jpg,0.034639
3723,3842,IMG_00018857.jpg,0.039532
1836,1858,IMG_00007884.jpg,0.041554
4580,4735,IMG_00023827.jpg,0.041565
1818,1840,IMG_00007803.jpg,0.044391
4154,4297,IMG_00021467.jpg,0.046785
