En este script se realizan las predicciones de los modelos entrenados, sobre el conjunto de test.

Primero comenzamos importando las librerías necesarias

In [1]:
import os
import cv2
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import io
from contextlib import redirect_stdout
from ultralytics import YOLO
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from tqdm import tqdm
from pathlib import Path

In [6]:
# Rutas proporcionadas

ROOT_DIR = Path.cwd()
DATASET_DIR = ROOT_DIR / "dataset"
IMAGES_DIR = DATASET_DIR / "val2017"  
ANNOTATIONS_FILE = DATASET_DIR / "annotations" / "instances_val2017.json"
RESULTS_DIR = ROOT_DIR / "validation_results"

RESULTS_DIR.mkdir(exist_ok=True)

# Cargar anotaciones una sola vez
coco = COCO(ANNOTATIONS_FILE)
existing_img_ids = os.listdir(IMAGES_DIR)
img_ids = [img_id for img_id in coco.getImgIds() if f"{img_id:012}.jpg" in existing_img_ids]

print(len(img_ids))

loading annotations into memory...
Done (t=0.45s)
creating index...
index created!
100


In [3]:
def calculate_iou(box1, box2):
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])
    intersection = max(0, x2 - x1) * max(0, y2 - y1)
    area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
    area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
    union = area1 + area2 - intersection
    return intersection / union if union > 0 else 0

In [None]:
def evaluate_model(model_name, img_ids, coco, images_dir, conf_threshold_manual=0.25):
    model = YOLO(model_name)
    
    # --- CREAR MAPEO DE CLASES (CRUCIAL PARA mAP) ---
    # Obtenemos los IDs de categorías de COCO ordenados (1, 2, 3... 90)
    # YOLOv8 entrenado en COCO asigna el índice 0 al primer ID de COCO (1), el 1 al segundo, etc.
    coco_cat_ids = sorted(coco.getCatIds())
    # Mapeo: Índice YOLO (0-79) -> ID COCO (1-90)
    yolo_to_coco_map = {i: cat_id for i, cat_id in enumerate(coco_cat_ids)}
    
    # --- Variables para Métricas Manuales ---
    all_tp = 0
    all_fp = 0
    all_fn = 0
    
    results_list = []
    
    for img_id in tqdm(img_ids, desc=f"Evaluando {model_name}"):
        img_info = coco.loadImgs(img_id)[0]
        img_path = os.path.join(images_dir, img_info['file_name'])
        
        # 1. Ground Truth (Formatos)
        ann_ids = coco.getAnnIds(imgIds=img_id)
        anns = coco.loadAnns(ann_ids)
        
        # Guardamos bbox y category_id para manual
        gt_data = [] 
        for ann in anns:
            x, y, w, h = ann['bbox']
            gt_data.append({
                'bbox': [x, y, x+w, y+h], # xyxy para cálculo manual
                'cat_id': ann['category_id'],
                'matched': False
            })
        
        # 2. Inferencia (conf baja para mAP)
        results = model(img_path, verbose=False, conf=0.001)[0]
        pred_boxes_all = results.boxes
        
        # --- PROCESAMIENTO ---
        for i, box in enumerate(pred_boxes_all):
            # Datos básicos
            x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
            w, h = x2 - x1, y2 - y1
            conf = float(box.conf[0].cpu().numpy())
            cls_idx = int(box.cls[0].cpu().numpy())
            
            # --- CORRECCIÓN DE ID ---
            # Si el modelo predice una clase fuera del rango COCO (raro pero posible en custom), lo saltamos
            if cls_idx not in yolo_to_coco_map:
                continue
            coco_real_id = yolo_to_coco_map[cls_idx]
            
            # 1. Guardar para COCOeval (Usando el ID corregido y formato xywh)
            results_list.append({
                'image_id': img_id,
                'category_id': coco_real_id, 
                'bbox': [x1, y1, w, h],
                'score': conf
            })
            
            # 2. Cálculo Manual (Solo si supera umbral de confianza)
            if conf >= conf_threshold_manual:
                p_box = [x1, y1, x2, y2]
                match_found = False
                
                # Buscamos match en GT
                for gt in gt_data:
                    # Chequeamos IoU Y TAMBIÉN LA CLASE (Importante)
                    if not gt['matched'] and gt['cat_id'] == coco_real_id:
                        if calculate_iou(p_box, gt['bbox']) >= 0.5:
                            gt['matched'] = True
                            match_found = True
                            all_tp += 1
                            break
                
                if not match_found:
                    all_fp += 1
        
        # Los que no hicieron match en esta imagen son FN
        all_fn += sum(1 for gt in gt_data if not gt['matched'])

    # --- MÉTRICAS FINALES ---
    precision = all_tp / (all_tp + all_fp) if (all_tp + all_fp) > 0 else 0
    recall = all_tp / (all_tp + all_fn) if (all_tp + all_fn) > 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    
    map50 = 0.0
    map50_95 = 0.0
    
    if len(results_list) > 0:
        coco_dt = coco.loadRes(results_list)
        coco_eval = COCOeval(coco, coco_dt, 'bbox')
        coco_eval.params.imgIds = img_ids
        
        with redirect_stdout(io.StringIO()):
            coco_eval.evaluate()
            coco_eval.accumulate()
            coco_eval.summarize()
            
        map50_95 = coco_eval.stats[0]
        map50 = coco_eval.stats[1]
    
    return precision, recall, f1, map50, map50_95

In [None]:
model_variants = ['n', 's', 'm', 'l', 'x']
benchmark_results = []

for var in model_variants:
    m_name = f"yolov8{var}.pt"
    # Nota: Puedes ajustar el umbral manual aquí si quieres (por defecto 0.25)
    p, r, f1, map50, map50_95 = evaluate_model(m_name, img_ids, coco, IMAGES_DIR, conf_threshold_manual=0.25)
    
    benchmark_results.append({
        "Modelo": m_name,
        "Precision": round(p, 3),
        "Recall": round(r, 3),
        "F1-Score": round(f1, 3),
        "mAP@50": round(map50, 3),
        "mAP@50-95": round(map50_95, 3)
    })

# Crear DataFrame y mostrar ordenado por mAP@50-95
df_results = pd.DataFrame(benchmark_results)
df_results.sort_values(by="mAP@50-95", ascending=False, inplace=True)
display(df_results)

Evaluando yolov8n.pt: 100%|██████████| 100/100 [00:06<00:00, 16.51it/s]


Loading and preparing results...
DONE (t=0.02s)
creating index...
index created!


Evaluando yolov8s.pt: 100%|██████████| 100/100 [00:10<00:00,  9.24it/s]


Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!


Evaluando yolov8m.pt: 100%|██████████| 100/100 [00:23<00:00,  4.34it/s]


Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!


Evaluando yolov8l.pt: 100%|██████████| 100/100 [00:40<00:00,  2.47it/s]


Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!


Evaluando yolov8x.pt: 100%|██████████| 100/100 [01:01<00:00,  1.63it/s]


Loading and preparing results...
DONE (t=0.02s)
creating index...
index created!


Unnamed: 0,Modelo,Precision,Recall,F1-Score,mAP@50,mAP@50-95
4,yolov8x.pt,0.722,0.683,0.702,0.805,0.617
3,yolov8l.pt,0.734,0.66,0.695,0.794,0.617
2,yolov8m.pt,0.688,0.665,0.677,0.777,0.596
1,yolov8s.pt,0.693,0.608,0.648,0.694,0.537
0,yolov8n.pt,0.713,0.553,0.623,0.624,0.456


OSError: Cannot save file into a non-existent directory: 'c:\Users\carlo\Desktop\practicas-aprendizaje\profundo\validation_results'

In [8]:
df_results.to_csv(RESULTS_DIR / "metrics_comparison.csv", index=False)
df_results.to_latex(RESULTS_DIR / "metrics_comparison.tex", index=False)