In [7]:
import os
import numpy as np
import cv2
from ultralytics import YOLO
from pathlib import Path
from sklearn.metrics import mean_squared_error
from tqdm import tqdm

# === CONFIG ===
MODEL_PATH = '/kaggle/input/simplified-yolo-v8/pytorch/default/1/best_tb_model.pt'
DATA_DIR = '/kaggle/input/yolo-tbx11k-simplified/yolo-tbx11k-simplified'
IMG_DIR = f"{DATA_DIR}/images/val"
LABEL_DIR = f"{DATA_DIR}/labels/val"
IMG_SIZE = 512
IOU_THRESHOLD = 0.5
CONF_THRESHOLD = 0.05

# === Load model (explicitly specify task) ===
model = YOLO(MODEL_PATH, task='detect')

# === Helper Functions ===
def load_ground_truth(label_path):
    boxes = []
    with open(label_path, 'r') as file:
        for line in file:
            parts = line.strip().split()
            _, cx, cy, w, h = map(float, parts)
            x1 = (cx - w / 2) * IMG_SIZE
            y1 = (cy - h / 2) * IMG_SIZE
            x2 = (cx + w / 2) * IMG_SIZE
            y2 = (cy + h / 2) * IMG_SIZE
            boxes.append([x1, y1, x2, y2])
    return np.array(boxes)

def compute_iou(box1, box2):
    xA = max(box1[0], box2[0])
    yA = max(box1[1], box2[1])
    xB = min(box1[2], box2[2])
    yB = min(box1[3], box2[3])
    interArea = max(0, xB - xA) * max(0, yB - yA)
    box1Area = max(1e-6, (box1[2] - box1[0]) * (box1[3] - box1[1]))
    box2Area = max(1e-6, (box2[2] - box2[0]) * (box2[3] - box2[1]))
    iou = interArea / float(box1Area + box2Area - interArea)
    return iou

def bbox_to_array(bbox):
    x1, y1, x2, y2 = bbox
    return [x1, y1, x2 - x1, y2 - y1]

# === Evaluation ===
ious, recalls, mses = [], [], []

img_files = sorted(list(Path(IMG_DIR).glob("*.jpg")) + list(Path(IMG_DIR).glob("*.png")))

for img_file in tqdm(img_files, desc="Evaluating"):
    img = cv2.imread(str(img_file))
    if img is None:
        continue
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    # Predict
    results = model.predict(source=img, conf=CONF_THRESHOLD, iou=0.01, verbose=False)
    pred_boxes = [box.tolist() for box in results[0].boxes.xyxy.cpu().numpy()]

    # Ground Truth
    label_file = LABEL_DIR + "/" + img_file.stem + ".txt"
    if not os.path.exists(label_file):
        continue
    gt_boxes = load_ground_truth(label_file)

    for gt_box in gt_boxes:
        ious_per_gt = [compute_iou(gt_box, pred_box) for pred_box in pred_boxes]
        max_iou = max(ious_per_gt) if ious_per_gt else 0
        ious.append(max_iou)
        recalls.append(1 if max_iou >= IOU_THRESHOLD else 0)

        if max_iou >= IOU_THRESHOLD:
            best_pred_idx = np.argmax(ious_per_gt)
            mse = mean_squared_error(bbox_to_array(gt_box), bbox_to_array(pred_boxes[best_pred_idx]))
            mses.append(mse)

# === Results ===
print(f"\nTotal Samples Evaluated: {len(ious)}")
print(f"Average IoU: {np.mean(ious):.4f}")
print(f"Recall (IoU ≥ {IOU_THRESHOLD}): {np.mean(recalls) * 100:.2f}%")
print(f"Mean MSE of Matching Boxes: {np.mean(mses):.4f}" if mses else "No matches for MSE")


Evaluating: 100%|██████████| 200/200 [00:11<00:00, 17.90it/s]


Total Samples Evaluated: 200
Average IoU: 0.5847
Recall (IoU ≥ 0.5): 70.50%
Mean MSE of Matching Boxes: 333.1193



