# YOLOv11 Model Training and Evaluation for Maize Disease Detection

This notebook provides a complete pipeline for training and evaluating YOLOv11 object detection models on the **maize lesion dataset**, structured across multiple dataset splits: `SID01`, `SID02`, and `SID03`.

Key features of this pipeline include:

- **Automated training** across different YOLO variants (`n`, `m`, `l`), batch sizes, and splits.
- Support for:
  - `SID02` subtype-specific training (`boom`, `drone`, `handheld`)
  - `SID03` K-Fold cross-validation.
- **Custom evaluation using the RAAD metric** (Relative Affected Area Difference), assessing bounding box area agreement between predictions and ground truth.
- **Integration with Weights & Biases (wandb)** for:
  - Tracking training runs and hyperparameters.
  - Logging test metrics and qualitative predictions.
  - Uploading per-image evaluation tables and **collage visualizations** for best/worst cases.



In [1]:
!pip install -U ultralytics wandb
!pip install dotenv
!pip install shapely
!pip install wandb opencv-python



In [2]:
import os
import numpy as np
import matplotlib.pyplot as plt
import cv2
import wandb
from ultralytics import YOLO
from tqdm.notebook import tqdm
from shapely.geometry import box, MultiPolygon
import yaml
import torch
import pandas as pd
import logging
from pathlib import Path
from dotenv import load_dotenv
from typing import Dict, List, Tuple
import traceback
import random
import shutil

In [3]:
!yolo settings wandb=True

JSONDict("/home/jovyan/.config/Ultralytics/settings.json"):
{
  "settings_version": "0.0.6",
  "datasets_dir": "/home/jovyan/DSPRO2/M-AI-ZE-Maize-diseases-detection/notebooks/datasets",
  "weights_dir": "weights",
  "runs_dir": "runs",
  "uuid": "8a115bbf5049f0fe55cf2ccd8be54ca8bfded6b963fd272724a959bb525556d2",
  "sync": true,
  "api_key": "",
  "openai_api_key": "",
  "clearml": true,
  "comet": true,
  "dvc": true,
  "hub": true,
  "mlflow": true,
  "neptune": true,
  "raytune": true,
  "tensorboard": false,
  "wandb": true,
  "vscode_msg": true,
  "openvino_msg": true
}
💡 Learn more about Ultralytics Settings at https://docs.ultralytics.com/quickstart/#ultralytics-settings


In [4]:
load_dotenv()

wandb_api_key = os.getenv("WANDB_API_KEY")
print(f"WANDB_API_KEY: [{wandb_api_key[:4]}...]")

WANDB_API_KEY: [69ca...]


In [5]:
wandb.login(key=wandb_api_key)

[34m[1mwandb[0m: [32m[41mERROR[0m Failed to detect the name of this notebook. You can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/jovyan/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mrueedi-tobias[0m ([33mrueedi-tobias-hochschule-luzern[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

## Get Data and set Parameter

In [6]:
BATCH_SIZES = [8, 16, 32]
DEFAULT_EPOCHS = 40
DEFAULT_KFOLDS = 5
BASE_PATH = Path("/exchange/dspro2/M-AI-ZE/data/adjusted/1.2/splits")
MODEL_WEIGHTS = {"s": "yolo11s.pt", "n": "yolo11n.pt", "m": "yolo11m.pt", "l": "yolo11l.pt"}
SUBTYPES = ["boom", "drone", "handheld"]
IMG_SIZE  = 640
TRAIN_PROJECT_PREFIX = "V1_2-maize_disease_detection_train"
EVAL_PROJECT_PREFIX  = "V1_2-maize_disease_detection_eval"

## Helper functions

In [7]:
def calculate_raad(pred_boxes, true_boxes, img_w=IMG_SIZE, img_h=IMG_SIZE,
                   epsilon=1e-6, normalize=True):
    if not pred_boxes and not true_boxes:
        return 0.0, 0.0, 0.0
    if not pred_boxes:
        true_area = sum((b[2]-b[0])*(b[3]-b[1]) for b in true_boxes)
        return 1.0, 0.0, true_area
    if not true_boxes:
        pred_area = sum((b[2]-b[0])*(b[3]-b[1]) for b in pred_boxes)
        return 1.0, pred_area, 0.0

    if normalize:
        pred_boxes = [[b[0]*img_w, b[1]*img_h, b[2]*img_w, b[3]*img_h] for b in pred_boxes]
        true_boxes = [[b[0]*img_w, b[1]*img_h, b[2]*img_w, b[3]*img_h] for b in true_boxes]

    pred_poly = MultiPolygon([box(*b) for b in pred_boxes]).buffer(0)
    true_poly = MultiPolygon([box(*b) for b in true_boxes]).buffer(0)

    pred_area = pred_poly.area
    true_area = true_poly.area
    raad = abs(pred_area - true_area) / max(true_area, epsilon)
    return raad, pred_area, true_area


def load_bbox_csv(csv_path: Path) -> Dict[str, List[Tuple[int,int,int,int]]]:
    df = pd.read_csv(csv_path, header=None, skiprows=1)
    out = {}
    for _, row in df.iterrows():
        out.setdefault(row[0], []).append(tuple(map(int, row[1:5])))
    return out


def suppress_yolo_logging():
    """Suppress the YOLO logging temporarily."""
    logger = logging.getLogger("ultralytics")
    original_level = logger.level
    logger.setLevel(logging.ERROR)
    return logger, original_level

def restore_yolo_logging(logger, original_level):
    """Restore the YOLO logging to its original level."""
    logger.setLevel(original_level)

def _make_sid02_subset(root: Path, subtype: str) -> Path:
    """
    Creates subfolder for split SID02, because Yolo cant use subfolder.
    Example ../train/boom need to be ../train 
    Therefore a temporary Folder gets created.
    """
    tmp = root.parent / f"{root.name}_{subtype}"
    if tmp.exists():
        return tmp

    for p in ("images", "labels"):
        for split in ("train", "val"):
            (tmp / p / split).mkdir(parents=True, exist_ok=True)
    (tmp / "images" / "test").symlink_to(root / "images" / "test")
    (tmp / "labels" / "test").symlink_to(root / "labels" / "test")

    for phase in ("train", "val"):
        lbl_src_root = root / "labels" / phase / subtype
        img_src_root = root / "images" / phase
        for lbl in lbl_src_root.glob("*.txt"):
            shutil.copy(lbl, tmp / "labels" / phase / lbl.name)
            img_src = img_src_root / f"{lbl.stem}.jpg"
            if img_src.exists():
                (tmp / "images" / phase / img_src.name).symlink_to(img_src)

    yaml_dict = {
        "train": str(tmp / "images" / "train"),
        "val":   str(tmp / "images" / "val"),
        "test":  str(tmp / "images" / "test"),
        "nc": 1,
        "names": ["lesion"],
    }
    (tmp / "data.yaml").write_text(yaml.safe_dump(yaml_dict))
    return tmp

## Training 

In [8]:
def train_model(
        model_size="n",    
        split="SID01",         
        subtype=None,           
        fold_id=None,            
        epochs=10,
        batch=16,
        lr=0.01,
        project = "maize_disease_detection",
        run_name = "not_set"):

    split_root = BASE_PATH / split


    if split == "SID02" and subtype:
        split_root = _make_sid02_subset(split_root, subtype)

    if split == "SID03" and fold_id is not None:
        split_root = split_root.parent / f"{split}_kfold" / f"fold{fold_id}"

    dataset_yaml = split_root / "data.yaml"
    if not dataset_yaml.exists():
        raise FileNotFoundError(f"YAML nicht gefunden: {dataset_yaml}")

    model_file = MODEL_WEIGHTS[model_size]

    try:
        model = YOLO(model_file)
        results = model.train(
            data=str(dataset_yaml),
            epochs=epochs,
            imgsz=IMG_SIZE,
            lr0=lr,
            batch=batch,
            name=run_name,
            project=project,
            exist_ok=True
        )

        ckpt_dir = Path("runs") / "detect" / run_name / "weights"
        weights_path = next((ckpt_dir / f).as_posix() for f in ("best.pt", "last.pt") if (ckpt_dir / f).exists())

        if wandb.run is not None:
            wandb.finish()

        cfg = dict(model=model_file, split=split, subtype=subtype,
                   fold_id=fold_id, epochs=epochs, batch=batch,
                   lr=lr, weights_path=weights_path)
        wb = wandb.init(project=project,
                        name=run_name, config=cfg, reinit=True)

        mAP50     = results.results_dict.get("metrics/mAP50", 0)
        mAP50_95  = results.results_dict.get("metrics/mAP50-95", 0)
        wb.log({"mAP50": mAP50, "mAP50-95": mAP50_95})

        val_img_dir = split_root / "images" / "val"
        for i, img_file in enumerate(list(val_img_dir.glob("*.jpg"))[:5]):
            pred = model.predict(str(img_file), conf=0.25)[0]
            img = cv2.cvtColor(cv2.imread(str(img_file)), cv2.COLOR_BGR2RGB)
            for b in pred.boxes:
                x1, y1, x2, y2 = map(int, b.xyxy[0])
                cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
            wb.log({f"val_image_{i}": wandb.Image(img, caption=img_file.name)})

        wb.finish()
        return weights_path

    except Exception as e:
        print("Training Error:", e)
        traceback.print_exc()
        return MODEL_WEIGHTS[model_size] 


## Evaluate RAAD on Testdata

In [9]:
def evaluate_test_raad(model_path, split="SID01", split_root=None, project="maize_disease_detection_eval", run_name=None):
    TARGET_SIZE = (640, 640)
    
    if split_root is None:
        split_root = BASE_PATH / split
    else:
        split_root = Path(split_root)

    test_img_dir = split_root / "images" / "test"
    test_csv_path = split_root / "labels" / "test" / "bboxes_test.csv"

    if not test_csv_path.exists():
        raise FileNotFoundError(f"Missing: {test_csv_path}")
    if not test_img_dir.exists():
        raise FileNotFoundError(f"Missing: {test_img_dir}")

    bounding_boxes = load_bbox_csv(test_csv_path)
    model_name = Path(model_path).stem

    run = wandb.init(
        project=project,
        name=run_name or f"test_eval_{Path(model_path).stem}_{split_root.name}",        
        config={"model_path": str(model_path), "split": split_root.name},
        reinit=True
    )

    try:
        model = YOLO(model_path)

        results = {
            "image": [],
            "raad": [],
            "pred_area": [],
            "true_area": [],
            "prediction_count": [],
            "truth_count": [],
            "area_score" : []
        }

        test_imgs = sorted([f for f in os.listdir(test_img_dir) if f.endswith(('.jpg', '.png'))])
        for img_file in tqdm(test_imgs, desc=f"Evaluating {split_root.name}"):
            img_path = test_img_dir / img_file

            if img_file not in bounding_boxes:
                print(f"[!] Missing GT: {img_file}")
                continue

            img = cv2.imread(str(img_path))
            h, w = img.shape[:2]

            logger, original_level = suppress_yolo_logging()
            try:
                preds = model.predict(str(img_path), save=False)[0]
            finally:
                restore_yolo_logging(logger, original_level)
            
            pred_boxes = preds.boxes.xyxy.cpu().numpy().tolist()
            true_boxes = bounding_boxes[img_file]

            raad, pred_area, true_area = calculate_raad(pred_boxes, true_boxes, w, h, normalize=False)
            area_score = (pred_area / true_area) * 100

            results["image"].append(img_file)
            results["raad"].append(raad)
            results["pred_area"].append(pred_area)
            results["true_area"].append(true_area)
            results["prediction_count"].append(len(pred_boxes))
            results["truth_count"].append(len(true_boxes))
            results["area_score"].append(area_score)

        df = pd.DataFrame(results)
        avg_raad = df["raad"].mean()
        median_raad = df["raad"].median()
        max_raad = df["raad"].max()
        
        avg_area_score = df["area_score"].mean()
        avg_pred_area = df["pred_area"].mean()
        avg_true_area = df["true_area"].mean()
        avg_pred_count = df["prediction_count"].mean()
        avg_truth_count = df["truth_count"].mean()
        avg_count_ratio = avg_pred_count / max(avg_truth_count, 1e-6)

        val_result = model.val(data=str(split_root / "data.yaml"), imgsz=640)
        mAP50 = val_result.results_dict.get("metrics/mAP50", 0)
        
        run.log({
            "avg_raad": avg_raad,
            "median_raad": median_raad,
            "max_raad" : max_raad,
            "avg_pred_area": avg_pred_area,
            "avg_true_area": avg_true_area,
            "avg_pred_count": avg_pred_count,
            "avg_truth_count": avg_truth_count,
            "avg_count_ratio": avg_count_ratio,
            "avg_area_score": avg_area_score,
            "mAP@0.5": mAP50
        })

        print("test1")

        table = wandb.Table(columns=["image", "raad", "pred_area", "true_area", 
                                     "prediction_count", "truth_count", "count_ratio", "area_score"])

        print("test2")
        for _, row in df.iterrows():
            count_ratio = row["prediction_count"] / max(row["truth_count"], 1e-6)
            table.add_data(row["image"], row["raad"], row["pred_area"], row["true_area"],
                           row["prediction_count"], row["truth_count"], count_ratio, row["area_score"])

        print("test3")
        run.log({"per_image_results": table})

        run.finish()
        return df, avg_raad
    except Exception as e:
        print(f"Evaluation error: {e}")
        run.finish()
        return None, None


In [10]:
def generate_kfold_yamls(split_dir: Path, k: int = 5):
    assert split_dir.name.startswith("SID03"), "Only for SID03 Split."

    train_img_dir = split_dir / "images" / "train"
    all_imgs = list(train_img_dir.glob("*.jpg"))
    random.shuffle(all_imgs)

    fold_size = len(all_imgs) // k
    fold_root = split_dir.parent / f"{split_dir.name}_kfold"
    fold_root.mkdir(exist_ok=True)

    for fold in range(k):
        fold_dir = fold_root / f"fold{fold}"

        for phase in ["train", "val", "test"]:
            (fold_dir / "images" / phase).mkdir(parents=True, exist_ok=True)
            (fold_dir / "labels" / phase).mkdir(parents=True, exist_ok=True)

        val_set = set(all_imgs[fold * fold_size : (fold + 1) * fold_size])

        for img_path in all_imgs:
            phase = "val" if img_path in val_set else "train"
            shutil.copy(img_path, fold_dir / "images" / phase / img_path.name)

            label_found = False
            for subtype in SUBTYPES:
                lbl_src = split_dir / "labels" / "train" / subtype / f"{img_path.stem}.txt"
                if lbl_src.exists():
                    shutil.copy(lbl_src, fold_dir / "labels" / phase / lbl_src.name)
                    label_found = True
                    break
            if not label_found:
                print(f"[!] No Labe data for: {img_path.name}")

        for phase in ["images", "labels"]:
            src = split_dir / phase / "test"
            dst = fold_dir / phase / "test"
            if not dst.exists():
                dst.symlink_to(src)

        yaml_dict = {
            "train": str((fold_dir / "images/train").resolve()),
            "val": str((fold_dir / "images/val").resolve()),
            "test": str((fold_dir / "images/test").resolve()),
            "nc": 1,
            "names": ["lesion"]
        }
        with open(fold_dir / "data.yaml", "w") as f:
            yaml.safe_dump(yaml_dict, f)


        csv_src = split_dir / "labels" / "test" / "bboxes_test.csv"
        csv_dst = fold_dir / "labels" / "test" / "bboxes_test.csv"
        if csv_src.exists():
            shutil.copy(csv_src, csv_dst)
        else:
            print(f"[Didnt found bboxes_test.csv: {csv_src}")
        
        print(f"Fold {fold} created: {fold_dir}")

# Start Train and Eval

In [11]:
def run_training_and_eval(
    splits=["SID01", "SID02"],            
    models=["n", "m", "l"],                
    batches=[16, 32],                     
    epochs=40,
    lr=0.01,
    train_project="maize_train",
    eval_project="maize_eval",
    sid02_subtypes=[None, "drone", "boom", "handheld"],  
    sid03_folds=None,                     
    device="0"
):
    for split in splits:
        if split == "SID01":
            for model in models:
                for batch in batches:
                    run_name = f"yolo11{model}_{split}_e{epochs}_b{batch}"
                    weights = train_model(
                        model_size=model,
                        split=split,
                        epochs=epochs,
                        batch=batch,
                        lr=lr,
                        project=train_project,
                        run_name = run_name
                    )
                    evaluate_test_raad(
                        model_path=weights,
                        split=split,
                        split_root=BASE_PATH / split,
                        project =  eval_project,
                        run_name=run_name
                    )

        elif split == "SID02":
            for subtype in sid02_subtypes:
                subtype_tag = subtype if subtype else "all"
                subset_root = _make_sid02_subset(BASE_PATH / "SID02", subtype) if subtype else BASE_PATH / "SID02"

                for model in models:
                    for batch in batches:
                        run_name = f"yolo11{model}_{split}_{subtype}_e{epochs}_b{batch}"
                        weights = train_model(
                            model_size=model,
                            split="SID02",
                            subtype=subtype,
                            epochs=epochs,
                            batch=batch,
                            lr=lr,
                            project=train_project,
                            run_name=run_name
                        )
                        evaluate_test_raad(
                            model_path=weights,
                            split="SID02",
                            split_root=subset_root,
                            project =  eval_project,
                            run_name=run_name
                        )

        elif split == "SID03":
            sid03_kfold_root = BASE_PATH / "SID03_kfold"
            if not sid03_kfold_root.exists():
                print("SID03_kfold not found – generate K-Folds...")
                generate_kfold_yamls(BASE_PATH / "SID03", k=5)
        
            folds = sid03_folds if sid03_folds else [None]

            for fold in folds:
                for model in models:
                    for batch in batches:
                        run_name = f"yolo11{model}_{split}_f{fold}_e{epochs}_b{batch}"
                        weights = train_model(
                            model_size=model,
                            split="SID03",
                            fold_id=fold,
                            epochs=epochs,
                            batch=batch,
                            lr=lr,
                            project=train_project,
                            run_name=run_name
                        )
                        fold_path = (BASE_PATH / "SID03_kfold" / f"fold{fold}") if fold is not None else BASE_PATH / "SID03"
                        sid03_test_root = BASE_PATH / "SID03"
                        evaluate_test_raad(
                            model_path=weights,
                            split="SID03",
                            split_root=sid03_test_root,
                            project=eval_project,
                            run_name=run_name
                        )


In [12]:
run_training_and_eval(
    splits=["SID01"],
    models=["s","n","m","l"],
    batches=[8, 16],
    epochs=25,
    train_project=TRAIN_PROJECT_PREFIX,
    eval_project=EVAL_PROJECT_PREFIX,
    sid02_subtypes=["boom", "drone","handheld"],
    sid03_folds=[3, 4],
    device="0"
)

FileNotFoundError: YAML nicht gefunden: /exchange/dspro2/M-AI-ZE/data/adjusted/1.2/splits/SID01/data.yaml