# YOLOv11 Model Training and Evaluation for Maize Disease Detection

This notebook provides a complete pipeline for training and evaluating YOLOv11 object detection models on the **maize lesion dataset**, structured across multiple dataset splits: `SID01`, `SID02`, and `SID03`.

Key features of this pipeline include:

- **Automated training** across different YOLO variants (`n`, `m`, `l`), batch sizes, and splits.
- Support for:
  - `SID02` subtype-specific training (`boom`, `drone`, `handheld`)
  - `SID03` K-Fold cross-validation.
- **Custom evaluation using the RAAD metric** (Relative Affected Area Difference), assessing bounding box area agreement between predictions and ground truth.
- **Integration with Weights & Biases (wandb)** for:
  - Tracking training runs and hyperparameters.
  - Logging test metrics and qualitative predictions.
  - Uploading per-image evaluation tables and **collage visualizations** for best/worst cases.



In [1]:
!pip install -U ultralytics wandb
!pip install dotenv
!pip install shapely
!pip install wandb opencv-python



In [2]:
import os
import numpy as np
import matplotlib.pyplot as plt
import cv2
import wandb
from ultralytics import YOLO
from tqdm.notebook import tqdm
from shapely.geometry import box, MultiPolygon
import yaml
import torch
import pandas as pd
import logging
from pathlib import Path
from dotenv import load_dotenv
from typing import Dict, List, Tuple
import traceback
import random
import shutil
from pathlib import Path
from PIL import Image, ImageOps

os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

In [3]:
!yolo settings wandb=True

✅ Updated 'wandb=True'
JSONDict("/home/jovyan/.config/Ultralytics/settings.json"):
{
  "settings_version": "0.0.6",
  "datasets_dir": "/home/jovyan/DSPRO2/M-AI-ZE-Maize-diseases-detection/notebooks/datasets",
  "weights_dir": "weights",
  "runs_dir": "runs",
  "uuid": "8a115bbf5049f0fe55cf2ccd8be54ca8bfded6b963fd272724a959bb525556d2",
  "sync": true,
  "api_key": "",
  "openai_api_key": "",
  "clearml": true,
  "comet": true,
  "dvc": true,
  "hub": true,
  "mlflow": true,
  "neptune": true,
  "raytune": true,
  "tensorboard": false,
  "wandb": true,
  "vscode_msg": true,
  "openvino_msg": true
}
💡 Learn more about Ultralytics Settings at https://docs.ultralytics.com/quickstart/#ultralytics-settings


In [4]:
load_dotenv()

wandb_api_key = os.getenv("WANDB_API_KEY")
print(f"WANDB_API_KEY: [{wandb_api_key[:4]}...]")

WANDB_API_KEY: [69ca...]


In [5]:
wandb.login(key=wandb_api_key)

[34m[1mwandb[0m: [32m[41mERROR[0m Failed to detect the name of this notebook. You can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/jovyan/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mrueedi-tobias[0m ([33mrueedi-tobias-hochschule-luzern[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

## Get Data and set Parameter

In [6]:
BATCH_SIZES = [8, 16, 32]
DEFAULT_EPOCHS = 40
DEFAULT_KFOLDS = 5
BASE_PATH = Path("/exchange/dspro2/M-AI-ZE/data/adjusted/1.2/splits")
MODEL_WEIGHTS = {"s": "yolo11s.pt", "n": "yolo11n.pt", "m": "yolo11m.pt", "l": "yolo11l.pt"}
SUBTYPES = ["boom", "drone", "handheld"]
IMG_SIZE  = 640
TRAIN_PROJECT_PREFIX = "V1_2-maize_disease_detection_train"
EVAL_PROJECT_PREFIX  = "V1_2-maize_disease_detection_eval"

## Helper functions

In [7]:
def area_coverage_score(pred_boxes, gt_boxes):
    """
    Returns a score from 0 to 100 based on how well the predicted boxes cover the ground truth boxes.
    Full coverage (prediction area == label area) gives 100.
    Over-prediction (prediction area > label area) is penalized.
    Under-prediction (prediction area < label area) is also penalized.
    Args:
        pred_boxes: list or np.array of [ymin, xmin, ymax, xmax] (normalized 0-1)
        gt_boxes: list or np.array of [ymin, xmin, ymax, xmax] (normalized 0-1)
    Returns:
        float: score between 0 and 100
    """
    def total_area(boxes):
        if len(boxes) == 0:
            return 0.0
        boxes = np.array(boxes)
        heights = np.clip(boxes[:, 2] - boxes[:, 0], 0, 1)
        widths = np.clip(boxes[:, 3] - boxes[:, 1], 0, 1)
        return np.sum(heights * widths)
 
    area_pred = total_area(pred_boxes)
    area_gt = total_area(gt_boxes)
 
    if area_gt == 0:
        return 0.0 if area_pred > 0 else 100.0
 
    # Score is 100 if areas match, penalized for over/under
    ratio = area_pred / area_gt
    if ratio <= 1:
        score = ratio * 100
    else:
        # Over-prediction: penalize more as prediction exceeds label
        score = max(0, 100 - (ratio - 1) * 100)
    return score

def compute_iou(box1, box2):
    x1_min, y1_min, x1_max, y1_max = box1
    x2_min, y2_min, x2_max, y2_max = box2

    inter_x_min = max(x1_min, x2_min)
    inter_y_min = max(y1_min, y2_min)
    inter_x_max = min(x1_max, x2_max)
    inter_y_max = min(y1_max, y2_max)

    inter_w = max(0, inter_x_max - inter_x_min)
    inter_h = max(0, inter_y_max - inter_y_min)
    inter_area = inter_w * inter_h

    area1 = (x1_max - x1_min) * (y1_max - y1_min)
    area2 = (x2_max - x2_min) * (y2_max - y2_min)
    union_area = area1 + area2 - inter_area

    return inter_area / union_area if union_area > 0 else 0.0

def mean_precision_at_iou(pred_boxes_list, gt_boxes_list, iou_threshold=0.5):
    aps = []
    for pred_boxes, gt_boxes in zip(pred_boxes_list, gt_boxes_list):
        ap = average_precision(pred_boxes, gt_boxes, iou_threshold)
        aps.append(ap)
    return np.mean(aps)

def average_precision(pred_boxes, gt_boxes, iou_threshold=0.5):
    if len(pred_boxes) == 0:
        return 0.0
    matched = set()
    tp = 0
    for pb in pred_boxes:
        for i, gb in enumerate(gt_boxes):
            if i in matched:
                continue
            if compute_iou(pb, gb) >= iou_threshold:
                tp += 1
                matched.add(i)
                break
    fp = len(pred_boxes) - tp
    fn = len(gt_boxes) - tp
    precision = tp / (tp + fp + 1e-8)
    recall = tp / (tp + fn + 1e-8)
    return precision 
    

def calculate_raad(pred_boxes, true_boxes, img_w=IMG_SIZE, img_h=IMG_SIZE,
                   epsilon=1e-6, normalize=True):
    if not pred_boxes and not true_boxes:
        return 0.0, 0.0, 0.0
    if not pred_boxes:
        true_area = sum((b[2]-b[0])*(b[3]-b[1]) for b in true_boxes)
        return 1.0, 0.0, true_area
    if not true_boxes:
        pred_area = sum((b[2]-b[0])*(b[3]-b[1]) for b in pred_boxes)
        return 1.0, pred_area, 0.0

    if normalize:
        pred_boxes = [[b[0]*img_w, b[1]*img_h, b[2]*img_w, b[3]*img_h] for b in pred_boxes]
        true_boxes = [[b[0]*img_w, b[1]*img_h, b[2]*img_w, b[3]*img_h] for b in true_boxes]

    pred_poly = MultiPolygon([box(*b) for b in pred_boxes]).buffer(0)
    true_poly = MultiPolygon([box(*b) for b in true_boxes]).buffer(0)

    pred_area = pred_poly.area
    true_area = true_poly.area
    raad = abs(pred_area - true_area) / max(true_area, epsilon)
    return raad, pred_area, true_area


def load_bbox_csv(csv_path: Path) -> Dict[str, List[Tuple[int,int,int,int]]]:
    df = pd.read_csv(csv_path, header=None, skiprows=1)
    out = {}
    for _, row in df.iterrows():
        out.setdefault(row[0], []).append(tuple(map(int, row[1:5])))
    return out


def suppress_yolo_logging():
    """Suppress the YOLO logging temporarily."""
    logger = logging.getLogger("ultralytics")
    original_level = logger.level
    logger.setLevel(logging.ERROR)
    return logger, original_level

def restore_yolo_logging(logger, original_level):
    """Restore the YOLO logging to its original level."""
    logger.setLevel(original_level)

def _make_sid02_subset(root: Path, subtype: str) -> Path:
    """
    Creates subfolder for split SID02, because Yolo cant use subfolder.
    Example ../train/boom need to be ../train 
    Therefore a temporary Folder gets created.
    """
    tmp = root.parent / f"{root.name}_{subtype}"
    if tmp.exists():
        return tmp

    for p in ("images", "labels"):
        for split in ("train", "val"):
            (tmp / p / split).mkdir(parents=True, exist_ok=True)
    (tmp / "images" / "test").symlink_to(root / "images" / "test")
    (tmp / "labels" / "test").symlink_to(root / "labels" / "test")

    for phase in ("train", "val"):
        lbl_src_root = root / "labels" / phase / subtype
        img_src_root = root / "images" / phase
        for lbl in lbl_src_root.glob("*.txt"):
            shutil.copy(lbl, tmp / "labels" / phase / lbl.name)
            img_src = img_src_root / f"{lbl.stem}.jpg"
            if img_src.exists():
                (tmp / "images" / phase / img_src.name).symlink_to(img_src)

    yaml_dict = {
        "train": str(tmp / "images" / "train"),
        "val":   str(tmp / "images" / "val"),
        "test":  str(tmp / "images" / "test"),
        "nc": 1,
        "names": ["lesion"],
    }
    (tmp / "data.yaml").write_text(yaml.safe_dump(yaml_dict))
    return tmp

## Training 

In [8]:
def train_model(
        model_size="n",    
        split="SID01",         
        subtype=None,           
        fold_id=None,            
        epochs=10,
        batch=16,
        lr=0.01,
        project = "maize_disease_detection",
        run_name = "not_set"):

    split_root = BASE_PATH / split


    if split == "SID02" and subtype:
        split_root = _make_sid02_subset(split_root, subtype)

    if split == "SID03" and fold_id is not None:
        split_root = split_root.parent / f"{split}_kfold" / f"fold{fold_id}"

    dataset_yaml = split_root / "data.yaml"
    if not dataset_yaml.exists():
        raise FileNotFoundError(f"YAML nicht gefunden: {dataset_yaml}")

    model_file = MODEL_WEIGHTS[model_size]

    try:
        model = YOLO(model_file)
        results = model.train(
            data=str(dataset_yaml),
            epochs=epochs,
            imgsz=IMG_SIZE,
            lr0=lr,
            batch=batch,
            name=run_name,
            project=project,
            exist_ok=True
        )

        ckpt_dir = Path("runs") / "detect" / run_name / "weights"
        weights_path = next((ckpt_dir / f).as_posix() for f in ("best.pt", "last.pt") if (ckpt_dir / f).exists())

        if wandb.run is not None:
            wandb.finish()

        cfg = dict(model=model_file, split=split, subtype=subtype,
                   fold_id=fold_id, epochs=epochs, batch=batch,
                   lr=lr, weights_path=weights_path)
        wb = wandb.init(project=project,
                        name=run_name, config=cfg, reinit=True)

        mAP50     = results.results_dict.get("metrics/mAP50", 0)
        mAP50_95  = results.results_dict.get("metrics/mAP50-95", 0)
        wb.log({"mAP50": mAP50, "mAP50-95": mAP50_95})

        val_img_dir = split_root / "images" / "val"
        for i, img_file in enumerate(list(val_img_dir.glob("*.jpg"))[:5]):
            pred = model.predict(str(img_file), conf=0.25)[0]
            img = cv2.cvtColor(cv2.imread(str(img_file)), cv2.COLOR_BGR2RGB)
            for b in pred.boxes:
                x1, y1, x2, y2 = map(int, b.xyxy[0])
                cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
            wb.log({f"val_image_{i}": wandb.Image(img, caption=img_file.name)})

        wb.finish()
        return weights_path

    except Exception as e:
        print("Training Error:", e)
        traceback.print_exc()
        return MODEL_WEIGHTS[model_size] 


## Evaluate RAAD on Testdata

In [13]:
def evaluate_test_raad(model_path, split="SID01", split_root=None,
                       project="maize_disease_detection_eval", run_name=None):
    target_size = 640

    if split_root is None:
        split_root = BASE_PATH / split
    else:
        split_root = Path(split_root)

    test_img_dir = split_root / "images" / "test"
    test_csv_path = split_root / "labels" / "test" / "bboxes_test.csv"

    if not test_csv_path.exists():
        raise FileNotFoundError(f"Missing: {test_csv_path}")
    if not test_img_dir.exists():
        raise FileNotFoundError(f"Missing: {test_img_dir}")

    bounding_boxes = load_bbox_csv(test_csv_path)
    model_name = run_name
    model_variant = run_name.split("_")[0]


    run = wandb.init(
        project=project,
        name=run_name or f"test_eval_{model_name}_{split_root.name}",
        config={
            "model_path": str(model_path),
            "model_name": model_name,
            "model_variant": model_variant,
            "split": split_root.name    
        }
    )

    try:
        model = YOLO(model_path)

        results = {
            "image": [],
            "raad": [],
            "pred_area": [],
            "true_area": [],
            "prediction_count": [],
            "truth_count": [],
            "area_score": [],
            "model_name": [],
            "model_variant": [],
            "split": []
        }

        pred_boxes_all = []
        true_boxes_all = []

        test_imgs = sorted([f for f in os.listdir(test_img_dir) if f.endswith(('.jpg', '.png'))])
        for img_file in tqdm(test_imgs, desc=f"Evaluating {split_root.name}"):
            img_path = test_img_dir / img_file

            if img_file not in bounding_boxes:
                print(f"[!] Missing GT: {img_file}")
                continue

            img = Image.open(img_path)
            img = ImageOps.exif_transpose(img).convert("RGB")
            w, h = img.size
            img_resized = img.resize((target_size, target_size), Image.BILINEAR)
            img_resized_np = np.array(img_resized)

            logger, original_level = suppress_yolo_logging()
            try:
                preds = model.predict(img_resized_np, save=False, imgsz=target_size, conf=0.1)[0]
            finally:
                restore_yolo_logging(logger, original_level)

            pred_boxes = preds.boxes.xyxy.cpu().numpy().tolist()
            true_boxes = bounding_boxes[img_file]

            scale_x = target_size / w
            scale_y = target_size / h
            true_boxes_scaled = [
                [x1 * scale_x, y1 * scale_y, x2 * scale_x, y2 * scale_y]
                for (x1, y1, x2, y2) in true_boxes
            ]

            raad, pred_area, true_area = calculate_raad(pred_boxes, true_boxes_scaled, target_size, target_size, normalize=False)

            norm_pred = [[b[0]/target_size, b[1]/target_size, b[2]/target_size, b[3]/target_size] for b in pred_boxes]
            norm_true = [[b[0]/target_size, b[1]/target_size, b[2]/target_size, b[3]/target_size] for b in true_boxes_scaled]
            area_score = area_coverage_score(norm_pred, norm_true)

            pred_boxes_all.append(pred_boxes)
            true_boxes_all.append(true_boxes_scaled)

            results["image"].append(img_file)
            results["raad"].append(raad)
            results["pred_area"].append(pred_area)
            results["true_area"].append(true_area)
            results["prediction_count"].append(len(pred_boxes))
            results["truth_count"].append(len(true_boxes_scaled))
            results["area_score"].append(area_score)
            results["model_name"].append(model_name)
            results["model_variant"].append(model_variant)
            results["split"].append(split)

        approx_mAP50 = mean_precision_at_iou(pred_boxes_all, true_boxes_all)
        df = pd.DataFrame(results)

        run.log({
            "avg_raad": df["raad"].mean(),
            "median_raad": df["raad"].median(),
            "max_raad": df["raad"].max(),
            "avg_pred_area": df["pred_area"].mean(),
            "avg_true_area": df["true_area"].mean(),
            "avg_pred_count": df["prediction_count"].mean(),
            "avg_truth_count": df["truth_count"].mean(),
            "avg_count_ratio": df["prediction_count"].mean() / max(df["truth_count"].mean(), 1e-6),
            "avg_area_score": df["area_score"].mean(),
            "mean_precision@0.5": approx_mAP50,
            "model_name": model_name,
            "model_variant": model_variant,
            "split": split
        })

        table = wandb.Table(columns=[
            "image", "raad", "pred_area", "true_area",
            "prediction_count", "truth_count", "count_ratio", "area_score",
            "model_name", "model_variant", "split"
        ])
        for _, row in df.iterrows():
            count_ratio = row["prediction_count"] / max(row["truth_count"], 1e-6)
            table.add_data(
                row["image"], row["raad"], row["pred_area"], row["true_area"],
                row["prediction_count"], row["truth_count"], count_ratio, row["area_score"],
                row["model_name"], row["model_variant"], row["split"]
            )
        run.log({"per_image_results": table})
        run.finish()

        return df, df["raad"].mean()

    except Exception as e:
        print(f"[!] Evaluation error: {e}")
        run.finish()
        return None, None


In [10]:
def generate_kfold_yamls(split_dir: Path, k: int = 5):
    assert split_dir.name.startswith("SID03"), "Only for SID03 Split."

    train_img_dir = split_dir / "images" / "train"
    all_imgs = list(train_img_dir.glob("*.jpg"))
    random.shuffle(all_imgs)

    fold_size = len(all_imgs) // k
    fold_root = split_dir.parent / f"{split_dir.name}_kfold"
    fold_root.mkdir(exist_ok=True)

    for fold in range(k):
        fold_dir = fold_root / f"fold{fold}"

        for phase in ["train", "val", "test"]:
            (fold_dir / "images" / phase).mkdir(parents=True, exist_ok=True)
            (fold_dir / "labels" / phase).mkdir(parents=True, exist_ok=True)

        val_set = set(all_imgs[fold * fold_size : (fold + 1) * fold_size])

        for img_path in all_imgs:
            phase = "val" if img_path in val_set else "train"
            shutil.copy(img_path, fold_dir / "images" / phase / img_path.name)

            label_found = False
            for subtype in SUBTYPES:
                lbl_src = split_dir / "labels" / "train" / subtype / f"{img_path.stem}.txt"
                if lbl_src.exists():
                    shutil.copy(lbl_src, fold_dir / "labels" / phase / lbl_src.name)
                    label_found = True
                    break
            if not label_found:
                print(f"[!] No Labe data for: {img_path.name}")

        for phase in ["images", "labels"]:
            src = split_dir / phase / "test"
            dst = fold_dir / phase / "test"
            if not dst.exists():
                dst.symlink_to(src)

        yaml_dict = {
            "train": str((fold_dir / "images/train").resolve()),
            "val": str((fold_dir / "images/val").resolve()),
            "test": str((fold_dir / "images/test").resolve()),
            "nc": 1,
            "names": ["lesion"]
        }
        with open(fold_dir / "data.yaml", "w") as f:
            yaml.safe_dump(yaml_dict, f)


        csv_src = split_dir / "labels" / "test" / "bboxes_test.csv"
        csv_dst = fold_dir / "labels" / "test" / "bboxes_test.csv"
        if csv_src.exists():
            shutil.copy(csv_src, csv_dst)
        else:
            print(f"[Didnt found bboxes_test.csv: {csv_src}")
        
        print(f"Fold {fold} created: {fold_dir}")

# Start Train and Eval

In [11]:
def run_training_and_eval(
    splits=["SID01", "SID02"],            
    models=["n", "m", "l"],                
    batches=[16, 32],                     
    epochs=40,
    lr=0.01,
    train_project="maize_train",
    eval_project="maize_eval",
    sid02_subtypes=[None, "drone", "boom", "handheld"],  
    sid03_folds=None,                     
    device="0"
):
    for split in splits:
        if split == "SID01":
            for model in models:
                for batch in batches:
                    run_name = f"yolo11{model}_{split}_e{epochs}_b{batch}"
                    weights = train_model(
                        model_size=model,
                        split=split,
                        epochs=epochs,
                        batch=batch,
                        lr=lr,
                        project=train_project,
                        run_name = run_name
                    )
                    evaluate_test_raad(
                        model_path=weights,
                        split=split,
                        split_root=BASE_PATH / split,
                        project =  eval_project,
                        run_name=run_name
                    )

        elif split == "SID02":
            for subtype in sid02_subtypes:
                subtype_tag = subtype if subtype else "all"
                subset_root = _make_sid02_subset(BASE_PATH / "SID02", subtype) if subtype else BASE_PATH / "SID02"

                for model in models:
                    for batch in batches:
                        run_name = f"yolo11{model}_{split}_{subtype}_e{epochs}_b{batch}"
                        weights = train_model(
                            model_size=model,
                            split="SID02",
                            subtype=subtype,
                            epochs=epochs,
                            batch=batch,
                            lr=lr,
                            project=train_project,
                            run_name=run_name
                        )
                        evaluate_test_raad(
                            model_path=weights,
                            split="SID02",
                            split_root=subset_root,
                            project =  eval_project,
                            run_name=run_name
                        )

        elif split == "SID03":
            sid03_kfold_root = BASE_PATH / "SID03_kfold"
            if not sid03_kfold_root.exists():
                print("SID03_kfold not found – generate K-Folds...")
                generate_kfold_yamls(BASE_PATH / "SID03", k=5)
        
            folds = sid03_folds if sid03_folds else [None]

            for fold in folds:
                for model in models:
                    for batch in batches:
                        run_name = f"yolo11{model}_{split}_f{fold}_e{epochs}_b{batch}"
                        weights = train_model(
                            model_size=model,
                            split="SID03",
                            fold_id=fold,
                            epochs=epochs,
                            batch=batch,
                            lr=lr,
                            project=train_project,
                            run_name=run_name
                        )
                        fold_path = (BASE_PATH / "SID03_kfold" / f"fold{fold}") if fold is not None else BASE_PATH / "SID03"
                        sid03_test_root = BASE_PATH / "SID03"
                        evaluate_test_raad(
                            model_path=weights,
                            split="SID03",
                            split_root=sid03_test_root,
                            project=eval_project,
                            run_name=run_name
                        )


In [11]:
run_training_and_eval(
    splits=["SID03"],
    models=["s","n","m","l"],
    batches=[8, 16],
    epochs=25,
    train_project=TRAIN_PROJECT_PREFIX,
    eval_project=EVAL_PROJECT_PREFIX,
    sid02_subtypes=["boom", "drone","handheld"],
    sid03_folds=[3, 4],
    device="0"
)

Ultralytics 8.3.154 🚀 Python-3.12.10 torch-2.7.1+cu126 CUDA:0 (NVIDIA A16, 14891MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=8, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/exchange/dspro2/M-AI-ZE/data/adjusted/1.2/splits/SID03_kfold/fold3/data.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=25, erasing=0.4, exist_ok=True, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolo11s.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=yolo11s_SID03_f3_e25_b8, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, ove

Freezing layer 'model.23.dfl.conv.weight'
[34m[1mAMP: [0mrunning Automatic Mixed Precision (AMP) checks...
[34m[1mAMP: [0mchecks passed ✅
[34m[1mtrain: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 1966.6±931.6 MB/s, size: 51.9 KB)


[34m[1mtrain: [0mScanning /exchange/dspro2/M-AI-ZE/data/adjusted/1.2/splits/SID03_kfold/fold3/labels/train.cache... 8693 images, 0 backgrounds, 0 corrupt: 100%|██████████| 8693/8693 [00:00<?, ?it/s]


[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 1871.0±1022.9 MB/s, size: 54.6 KB)


[34m[1mval: [0mScanning /exchange/dspro2/M-AI-ZE/data/adjusted/1.2/splits/SID03_kfold/fold3/labels/val.cache... 2173 images, 0 backgrounds, 0 corrupt: 100%|██████████| 2173/2173 [00:00<?, ?it/s]
Exception ignored in: <function _releaseLock at 0x7f6b6cd5aac0>
Traceback (most recent call last):
  File "/opt/conda/lib/python3.12/logging/__init__.py", line 243, in _releaseLock
    def _releaseLock():
    
KeyboardInterrupt: 
Exception in thread Thread-9 (_pin_memory_loop):
Traceback (most recent call last):
  File "/opt/conda/lib/python3.12/threading.py", line 1075, in _bootstrap_inner
    self.run()
  File "/opt/conda/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 766, in run_closure
    _threading_Thread_run(self)
  File "/opt/conda/lib/python3.12/threading.py", line 1012, in run
    self._target(*self._args, **self._kwargs)
  File "/opt/conda/lib/python3.12/site-packages/torch/utils/data/_utils/pin_memory.py", line 61, in _pin_memory_loop
    do_one_step()
  File "/opt/con

Plotting labels to V1_2-maize_disease_detection_train/yolo11s_SID03_f3_e25_b8/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 81 weight(decay=0.0), 88 weight(decay=0.0005), 87 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1mV1_2-maize_disease_detection_train/yolo11s_SID03_f3_e25_b8[0m
Starting training for 25 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


  0%|          | 0/1087 [00:00<?, ?it/s]

Training Error: Pin memory thread exited unexpectedly



Traceback (most recent call last):
  File "/tmp/ipykernel_747/3208005418.py", line 29, in train_model
    results = model.train(
              ^^^^^^^^^^^^
  File "/opt/conda/lib/python3.12/site-packages/ultralytics/engine/model.py", line 797, in train
    self.trainer.train()
  File "/opt/conda/lib/python3.12/site-packages/ultralytics/engine/trainer.py", line 227, in train
    self._do_train(world_size)
  File "/opt/conda/lib/python3.12/site-packages/ultralytics/engine/trainer.py", line 388, in _do_train
    for i, batch in pbar:
                    ^^^^
  File "/opt/conda/lib/python3.12/site-packages/tqdm/std.py", line 1181, in __iter__
    for obj in iterable:
               ^^^^^^^^
  File "/opt/conda/lib/python3.12/site-packages/ultralytics/data/build.py", line 67, in __iter__
    yield next(self.iterator)
          ^^^^^^^^^^^^^^^^^^^
  File "/opt/conda/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 733, in __next__
    data = self._next_data()
           ^^^



Evaluating SID03:   0%|          | 0/2717 [00:00<?, ?it/s]

Evaluation error: CUDA out of memory. Tried to allocate 8.23 GiB. GPU 0 has a total capacity of 14.54 GiB of which 5.15 GiB is free. Process 490049 has 9.38 GiB memory in use. Of the allocated memory 9.22 GiB is allocated by PyTorch, and 62.86 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)


Ultralytics 8.3.154 🚀 Python-3.12.10 torch-2.7.1+cu126 CUDA:0 (NVIDIA A16, 14891MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/exchange/dspro2/M-AI-ZE/data/adjusted/1.2/splits/SID03_kfold/fold3/data.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=25, erasing=0.4, exist_ok=True, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolo11s.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=yolo11s_SID03_f3_e25_b16, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, o

Freezing layer 'model.23.dfl.conv.weight'
[34m[1mAMP: [0mrunning Automatic Mixed Precision (AMP) checks...
[34m[1mAMP: [0mchecks passed ✅
[34m[1mtrain: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 2321.5±774.4 MB/s, size: 51.9 KB)


[34m[1mtrain: [0mScanning /exchange/dspro2/M-AI-ZE/data/adjusted/1.2/splits/SID03_kfold/fold3/labels/train.cache... 8693 images, 0 backgrounds, 0 corrupt: 100%|██████████| 8693/8693 [00:00<?, ?it/s]


[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 785.8±235.7 MB/s, size: 54.6 KB)


[34m[1mval: [0mScanning /exchange/dspro2/M-AI-ZE/data/adjusted/1.2/splits/SID03_kfold/fold3/labels/val.cache... 2173 images, 0 backgrounds, 0 corrupt: 100%|██████████| 2173/2173 [00:00<?, ?it/s]


Plotting labels to V1_2-maize_disease_detection_train/yolo11s_SID03_f3_e25_b16/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 81 weight(decay=0.0), 88 weight(decay=0.0005), 87 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1mV1_2-maize_disease_detection_train/yolo11s_SID03_f3_e25_b16[0m
Starting training for 25 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


Exception in thread Thread-13 (_pin_memory_loop):
Traceback (most recent call last):
  File "/opt/conda/lib/python3.12/threading.py", line 1075, in _bootstrap_inner
    self.run()
  File "/opt/conda/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 766, in run_closure
  0%|          | 0/544 [00:00<?, ?it/s]    _threading_Thread_run(self)
  File "/opt/conda/lib/python3.12/threading.py", line 1012, in run
    self._target(*self._args, **self._kwargs)
  File "/opt/conda/lib/python3.12/site-packages/torch/utils/data/_utils/pin_memory.py", line 61, in _pin_memory_loop
    do_one_step()
  File "/opt/conda/lib/python3.12/site-packages/torch/utils/data/_utils/pin_memory.py", line 37, in do_one_step
    r = in_queue.get(timeout=MP_STATUS_CHECK_INTERVAL)
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/conda/lib/python3.12/multiprocessing/queues.py", line 122, in get
    return _ForkingPickler.loads(res)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/conda/lib/pyth

KeyboardInterrupt: 

### New Eval after correcting the img load function.

In [None]:
detect_root = Path("V1_2-maize_disease_detection_train")
eval_project = "V1_2-maize_disease_detection_eval_3"

for run_dir in detect_root.glob("*/"):
    run_name = run_dir.name
    weights_path = run_dir / "weights" / "best.pt"
    
    if not weights_path.exists():
        print(f"[!] Skipping {run_name} – no weights found.")
        continue

    parts = run_name.split("_")
    if len(parts) < 4:
        print(f"[!] Skipping invalid run_name: {run_name}")
        continue

    model_id = parts[0]
    split = parts[1]
    subtype = None
    fold_id = None

    if split == "SID02" and len(parts) >= 5:
        subtype = parts[2]
        split_root = _make_sid02_subset(BASE_PATH / "SID02", subtype)
    elif split == "SID03" and "f" in parts[2]:
        fold_id = int(parts[2][1:])
        split_root = BASE_PATH / "SID03"
    else:
        split_root = BASE_PATH / split

    print(f"[i] Re-evaluating {run_name} ...")
    evaluate_test_raad(
        model_path=weights_path,
        split=split,
        split_root=split_root,
        project=eval_project,
        run_name=run_name
    )

[i] Re-evaluating yolo11s_SID01_e25_b8 ...


Evaluating SID01:   0%|          | 0/1359 [00:00<?, ?it/s]

0,1
avg_area_score,▁
avg_count_ratio,▁
avg_pred_area,▁
avg_pred_count,▁
avg_raad,▁
avg_true_area,▁
avg_truth_count,▁
max_raad,▁
mean_precision@0.5,▁
median_raad,▁

0,1
avg_area_score,41.15236
avg_count_ratio,0.98708
avg_pred_area,13156.83313
avg_pred_count,7.47535
avg_raad,0.95299
avg_true_area,18183.19095
avg_truth_count,7.57322
max_raad,123.42963
mean_precision@0.5,0.06942
median_raad,0.5011


[i] Re-evaluating yolo11s_SID01_e25_b16 ...


Evaluating SID01:   0%|          | 0/1359 [00:00<?, ?it/s]

0,1
avg_area_score,▁
avg_count_ratio,▁
avg_pred_area,▁
avg_pred_count,▁
avg_raad,▁
avg_true_area,▁
avg_truth_count,▁
max_raad,▁
mean_precision@0.5,▁
median_raad,▁

0,1
avg_area_score,41.07896
avg_count_ratio,1.03012
avg_pred_area,13633.61746
avg_pred_count,7.80132
avg_raad,1.00135
avg_true_area,18187.41672
avg_truth_count,7.57322
max_raad,89.09292
mean_precision@0.5,0.07154
median_raad,0.50646


[i] Re-evaluating yolo11n_SID01_e25_b8 ...


Evaluating SID01:   0%|          | 0/1359 [00:00<?, ?it/s]