# YOLO Training & Testing

### GPUs, HF Cache

In [1]:
import os
%matplotlib inline 

# Model
%env CUDA_VISIBLE_DEVICES=1
#%env PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:32
!yolo settings wandb=True

# HF Cache
os.environ["HF_HOME"] = "../../.cache"
!echo $HF_HOME
!huggingface-cli whoami

env: CUDA_VISIBLE_DEVICES=1
JSONDict("/home/stu235269/.config/Ultralytics/settings.json"):
{
  "settings_version": "0.0.6",
  "datasets_dir": "/data22/stu235269/datasets",
  "weights_dir": "/data22/stu235269/TinyML-MT/weights",
  "runs_dir": "/data22/stu235269/TinyML-MT/runs",
  "uuid": "99dfe128dbd9c89f330abe5241cca75f9c72d113cae99d1255fe4ea3855718b6",
  "sync": true,
  "api_key": "",
  "openai_api_key": "",
  "clearml": true,
  "comet": true,
  "dvc": true,
  "hub": true,
  "mlflow": true,
  "neptune": true,
  "raytune": true,
  "tensorboard": false,
  "wandb": true,
  "vscode_msg": true,
  "openvino_msg": false
}
💡 Learn more about Ultralytics Settings at https://docs.ultralytics.com/quickstart/#ultralytics-settings
../../.cache
Maats
[1morgs: [0m DBD-research-group,Basket-AEye


### HP Settings

In [2]:
IMG_SIZE = 500
EPOCHS = 1
DATASET_PATH = "../../huggingface/mvtec_yolo/dataset.yaml"
DATASET_PATH = os.path.abspath(DATASET_PATH)
NAME = "mvtec" # For WANDB (YOLO11n-COCO11-on_mvtec_train_with_augmented)

In [None]:
def log_class_metrics_heatmap(val_results, null_classes=[], wandb_key="class_metrics_heatmap"):
    """
    Erstellt eine Heatmap aus den Klassenspezifischen Metriken (Precision, Recall, F1, AP@0.5)
    aus den val_results eines YOLOv8-Modells und loggt sie zu Weights & Biases.
    Diese Version ist robust gegenüber fehlenden Klassen im Val-Set.
    
    Parameter: 
        val_results: Das Ergebnisobjekt von model.val()
        wandb_key (str): Der Key unter dem das Bild bei W&B geloggt wird
    """
    import numpy as np
    import matplotlib.pyplot as plt
    import wandb

    # Klassennamen sortiert
    names_dict = val_results.names
    sorted_class_ids_and_names = sorted(names_dict.items())
    print("sorted_class_ids_and_names ", sorted_class_ids_and_names)
    
    map_id_on_result_id = {}
    count = 0
    for i, name in sorted_class_ids_and_names:
        if name in null_classes:
            map_id_on_result_id[i] = None 
        else:
            map_id_on_result_id[i] = count
            count += 1
        
    names = [name for _, name in sorted_class_ids_and_names if name not in null_classes]
    class_ids = [i for i, _ in sorted_class_ids_and_names]

    # Zugriff auf Metriken
    p = val_results.box.p if hasattr(val_results.box, 'p') else []
    r = val_results.box.r if hasattr(val_results.box, 'r') else []
    f1 = val_results.box.f1 if hasattr(val_results.box, 'f1') else []
    ap = val_results.box.all_ap if hasattr(val_results.box, 'all_ap') else []

    # Hilfsfunktion zum sicheren Zugriff
    def safe_get(metric_list, idx, default=0.0):
        return metric_list[idx] if idx < len(metric_list) else default

    def safe_ap0(metric_list, idx):
        return metric_list[idx][0] if idx < len(metric_list) and len(metric_list[idx]) > 0 else 0.0

    # Metriken extrahieren pro Klasse
    precisions = [safe_get(p, map_id_on_result_id[i]) for i in class_ids if map_id_on_result_id[i] != None]
    recalls = [safe_get(r, map_id_on_result_id[i]) for i in class_ids if map_id_on_result_id[i] != None]
    f1s = [safe_get(f1, map_id_on_result_id[i]) for i in class_ids if map_id_on_result_id[i] != None]
    ap50s = [safe_ap0(ap, map_id_on_result_id[i]) for i in class_ids if map_id_on_result_id[i] != None]

    metrics_matrix = np.array([
        precisions,
        recalls,
        f1s,
        ap50s
    ])

    metric_names = ['Precision', 'Recall', 'F1', 'AP@0.5']

    # Heatmap erzeugen
    fig, ax = plt.subplots(figsize=(max(8, len(names) * 0.8), 4))
    im = ax.imshow(metrics_matrix, cmap='viridis', vmin=0, vmax=1)

    ax.set_xticks(np.arange(len(names)))
    ax.set_xticklabels(names, rotation=45, ha="right")
    ax.set_yticks(np.arange(len(metric_names)))
    ax.set_yticklabels(metric_names)

    for i in range(metrics_matrix.shape[0]):
        for j in range(metrics_matrix.shape[1]):
            ax.text(j, i, f"{metrics_matrix[i, j]:.2f}", ha="center", va="center",
                    color="white" if metrics_matrix[i, j] < 0.5 else "black")

    plt.colorbar(im, ax=ax)
    plt.title("Metriken pro Klasse")
    plt.tight_layout()

    wandb.log({wandb_key: wandb.Image(fig)})

    plt.close(fig)


## Training YOLO

In [4]:
from ultralytics import YOLO
import wandb
from datetime import datetime
import wandb
NAME_RUN = NAME+datetime.now().strftime("%d%b-%H:%M:%S")
# WANB Init with custom name 
run = wandb.init(
    project="Yolo-Training",
    entity="maats",
    name=NAME_RUN,
    config={  # alle Hyperparameter sauber abspeichern
        "epochs": EPOCHS,
        "imgsz": IMG_SIZE,
        "model": "yolo11n.pt",
        "dataset": DATASET_PATH,
    },
    sync_tensorboard=True,
)
print(f"Run ID: {run.id}")
try:
    # YOLO-Model load
    model = YOLO("yolo11n.pt")

    # Training
    results = model.train(
        data=DATASET_PATH,
        epochs=EPOCHS,
        imgsz=IMG_SIZE,
        project="Yolo-Training",
        name=NAME_RUN,
        verbose=True,
        val=True,
        save=True,
        save_period=3,
        mode="wandb",
        batch=0.70, # 70% ? Check this
        patience=3, # Early Stopping Patience
        pretrained=True, #! Pretrained Model
        multi_scale=False, #! Test this
        cos_lr=False, #! Test this
        freeze=None, #! Test this
        #hsv_h=0.1,
        #degrees=180,
        #shear=10,
        #perspective=0.0003,
        #mixup = 0.3, # das was Jannek meinte 
        #cutmix = 0.3,
        #copy_paste = 0.1, # weis ja nicht ...
    )
    wandb.init(id=run.id, resume="allow", project="Yolo-Training")
    wandb.config.update(model.args) # Log all settings to WANDB

    #best_model_path = f"Yolo-Training/{NAME_RUN}/weights/best.pt"
    #model = YOLO(best_model_path)

    #val_results = model.val(data=DATASET_PATH, imgsz=IMG_SIZE) Already done in train
    log_class_metrics_heatmap(results)
except Exception as e:
    print(f"An error occurred: {e}")

finally:
    wandb.finish()
    print("done")

[34m[1mwandb[0m: Currently logged in as: [33mmatsaustralia[0m ([33mmaats[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Run ID: bgnxulh1
New https://pypi.org/project/ultralytics/8.3.156 available 😃 Update with 'pip install -U ultralytics'
Ultralytics 8.3.144 🚀 Python-3.10.12 torch-2.5.1+cu124 CUDA:0 (NVIDIA TITAN Xp, 12183MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=0.7, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/data22/stu235269/TinyML-MT/huggingface/mvtec_yolo/dataset.yaml, degrees=0.0, deterministic=True, device=1, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=1, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=500, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolo11n.pt, momentum=0.937, mosaic=1.0, multi_

[34m[1mtrain: [0mScanning /data22/stu235269/TinyML-MT/huggingface/mvtec_yolo/labels/train.cache... 11380 images, 0 backgrounds, 0 corrupt: 100%|██████████| 11380/11380 [00:00<?, ?it/s]

[34m[1mAutoBatch: [0mComputing optimal batch size for imgsz=512 at 70.0% CUDA memory utilization.
[34m[1mAutoBatch: [0mCUDA:1 (NVIDIA TITAN Xp) 11.90G total, 0.12G reserved, 0.06G allocated, 11.72G free





      Params      GFLOPs  GPU_mem (GB)  forward (ms) backward (ms)                   input                  output
     2601540       4.163         0.484         38.93         122.5        (1, 3, 512, 512)                    list
     2601540       8.325         0.644         28.71         83.73        (2, 3, 512, 512)                    list
     2601540       16.65         0.921         31.04         103.9        (4, 3, 512, 512)                    list
     2601540        33.3         1.321         34.66         69.75        (8, 3, 512, 512)                    list
     2601540        66.6         2.261         38.73         94.49       (16, 3, 512, 512)                    list
[34m[1mAutoBatch: [0mUsing batch-size 65 for CUDA:1 8.13G/11.90G (68%) ✅
[34m[1mtrain: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 2271.6±1134.7 MB/s, size: 293.0 KB)


[34m[1mtrain: [0mScanning /data22/stu235269/TinyML-MT/huggingface/mvtec_yolo/labels/train.cache... 11380 images, 0 backgrounds, 0 corrupt: 100%|██████████| 11380/11380 [00:00<?, ?it/s]


[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 1597.0±810.3 MB/s, size: 248.9 KB)


[34m[1mval: [0mScanning /data22/stu235269/TinyML-MT/huggingface/mvtec_yolo/labels/val.cache... 6600 images, 0 backgrounds, 0 corrupt: 100%|██████████| 6600/6600 [00:00<?, ?it/s]


Plotting labels to Yolo-Training/mvtec20Jun-00:35:39/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000156, momentum=0.9) with parameter groups 81 weight(decay=0.0), 88 weight(decay=0.0005078125), 87 bias(decay=0.0)
Image sizes 512 train, 512 val
Using 8 dataloader workers
Logging results to [1mYolo-Training/mvtec20Jun-00:35:39[0m
Starting training for 1 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        1/1      6.26G     0.8173      4.501      1.112         40        512: 100%|██████████| 176/176 [01:31<00:00,  1.92it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 51/51 [00:40<00:00,  1.27it/s]


                   all       6600      38422      0.198      0.113        0.1     0.0898

1 epochs completed in 0.039 hours.
Optimizer stripped from Yolo-Training/mvtec20Jun-00:35:39/weights/last.pt, 5.5MB
Optimizer stripped from Yolo-Training/mvtec20Jun-00:35:39/weights/best.pt, 5.5MB

Validating Yolo-Training/mvtec20Jun-00:35:39/weights/best.pt...
Ultralytics 8.3.144 🚀 Python-3.10.12 torch-2.5.1+cu124 CUDA:1 (NVIDIA TITAN Xp, 12183MiB)
YOLO11n summary (fused): 100 layers, 2,593,852 parameters, 0 gradients, 6.4 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 51/51 [00:37<00:00,  1.35it/s]


                   all       6600      38422      0.198      0.112        0.1     0.0898
adelholzener_alpenquelle_classic_075        539        552      0.349      0.109       0.21        0.2
adelholzener_alpenquelle_naturell_075        526        553      0.113      0.599       0.21      0.198
adelholzener_classic_bio_apfelschorle_02        572        598     0.0309     0.0368     0.0125     0.0105
adelholzener_classic_naturell_02        511        707     0.0329     0.0679     0.0223      0.018
adelholzener_gourmet_mineralwasser_02        688        737          0          0    0.00973    0.00864
augustiner_lagerbraeu_hell_05        540        565          0          0      0.005    0.00333
augustiner_weissbier_05        662        779        0.1     0.0385     0.0415     0.0387
          coca_cola_05        478        494          0          0     0.0286     0.0251
    coca_cola_light_05        633        684     0.0115     0.0395     0.0183     0.0159
suntory_gokuri_lemonade       

0,1
lr/pg0,▁
lr/pg1,▁
lr/pg2,▁
metrics/mAP50(B),█▁
metrics/mAP50-95(B),█▁
metrics/precision(B),█▁
metrics/recall(B),█▁
model/GFLOPs,▁▁
model/parameters,▁▁
model/speed_PyTorch(ms),▁█

0,1
lr/pg0,5e-05
lr/pg1,5e-05
lr/pg2,5e-05
metrics/mAP50(B),0.1001
metrics/mAP50-95(B),0.08982
metrics/precision(B),0.19758
metrics/recall(B),0.11247
model/GFLOPs,6.504
model/parameters,2601540.0
model/speed_PyTorch(ms),1.504


sorted_class_ids_and_names  [(0, 'adelholzener_alpenquelle_classic_075'), (1, 'adelholzener_alpenquelle_naturell_075'), (2, 'adelholzener_classic_bio_apfelschorle_02'), (3, 'adelholzener_classic_naturell_02'), (4, 'adelholzener_gourmet_mineralwasser_02'), (5, 'augustiner_lagerbraeu_hell_05'), (6, 'augustiner_weissbier_05'), (7, 'coca_cola_05'), (8, 'coca_cola_light_05'), (9, 'suntory_gokuri_lemonade'), (10, 'tegernseer_hell_03'), (11, 'corny_nussvoll'), (12, 'corny_nussvoll_single'), (13, 'corny_schoko_banane'), (14, 'corny_schoko_banane_single'), (15, 'dr_oetker_vitalis_knuspermuesli_klassisch'), (16, 'koelln_muesli_fruechte'), (17, 'koelln_muesli_schoko'), (18, 'caona_cocoa'), (19, 'cocoba_cocoa'), (20, 'cafe_wunderbar_espresso'), (21, 'douwe_egberts_professional_ground_coffee'), (22, 'gepa_bio_caffe_crema'), (23, 'gepa_italienischer_bio_espresso'), (24, 'apple_braeburn_bundle'), (25, 'apple_golden_delicious'), (26, 'apple_granny_smith'), (27, 'apple_red_boskoop'), (28, 'avocado'), (

0,1
lr/pg0,5e-05
lr/pg1,5e-05
lr/pg2,5e-05
metrics/mAP50(B),0.1001
metrics/mAP50-95(B),0.08982
metrics/precision(B),0.19758
metrics/recall(B),0.11247
model/GFLOPs,6.504
model/parameters,2601540.0
model/speed_PyTorch(ms),1.504


done


In [14]:
# Load best models
best_model_path = f"runs/train/{NAME_RUN}/weights/best.pt"
model = YOLO(best_model_path)

## MVTEC Grids

In [None]:
from ultralytics import YOLO
# Load best model
MODEL_PATH = "runs/train/artificial_created_mult_back_rotated_big_yolol18Jun-00:13:53/weights/best.pt"
model = YOLO(MODEL_PATH)

In [6]:
import wandb

# Deine Run-ID, z. B. "ls3jwotb" aus der URL oder dem lokalen Log
run_id = "h399va08"

# Reaktiviere den Run
wandb.init(
    project="Yolo-Training",
    entity="maats",
    id=run_id,
    resume="allow"
)

[34m[1mwandb[0m: Currently logged in as: [33mtorge-schwark[0m ([33mmaats[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


In [15]:
import wandb
from ultralytics import YOLO
import find_usefull_images_scripts as im_script
import cv2
import matplotlib.pyplot as plt
import numpy as np

# Modell laden (ggf. Pfad anpassen)
#model = YOLO("runs/train/YOLO11n-COCO11-first_artificial_created_dataset/weights/best.pt")

# Alle Beispielbilder laden
# image_paths, _ = im_script.get_mvtec_images_for_first_artificial_dataset_classes()


# 1.
# image_paths, _ = im_script.get_mvtec_images_for_first_artificial_dataset_classes_trained_on_10_clases()


image_paths, _ = im_script.get_mvtec_images_for_10classes_dataset()


batch_size = 20
num_grids = 10

for grid_idx in range(num_grids):
    start_idx = grid_idx * batch_size
    end_idx = start_idx + batch_size
    selected_paths = image_paths[start_idx:end_idx]

    # Vorhersagen durchführen (Batch)
    preds = model.predict(
        selected_paths,
        imgsz=IMG_SIZE,
        save=False,
        stream=False
    )

    # Bilder vorbereiten
    images_drawn = []
    for img_path, pred in zip(selected_paths, preds):
        img = cv2.imread(str(img_path))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        h, w, _ = img.shape
        for box, cls, conf in zip(pred.boxes.xyxy, pred.boxes.cls, pred.boxes.conf):
            x1, y1, x2, y2 = map(int, box)
            class_name = model.names[int(cls)]
            label = f"{class_name} {conf:.2f}"

            # Rechteck zeichnen
            cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 255, 0), thickness=2)

            # Textgröße bestimmen
            font = cv2.FONT_HERSHEY_SIMPLEX
            font_scale = 1.2
            thickness = 2
            (text_w, text_h), baseline = cv2.getTextSize(label, font, font_scale, thickness)

            # Textposition
            text_x = x1
            if y1 - text_h - baseline > 0:
                text_y = y1 - 5
                # Hintergrundrechteck für Text (oben)
                cv2.rectangle(img, (text_x, text_y - text_h - baseline), (text_x + text_w, text_y + baseline), (0, 255, 0), cv2.FILLED)
                cv2.putText(img, label, (text_x, text_y), font, font_scale, (0, 0, 0), thickness)
            else:
                text_y = y2 + text_h + 5
                if text_y > h:
                    text_y = y2 - 5
                # Hintergrundrechteck für Text (unten)
                cv2.rectangle(img, (text_x, text_y - text_h - baseline), (text_x + text_w, text_y + baseline), (0, 255, 0), cv2.FILLED)
                cv2.putText(img, label, (text_x, text_y), font, font_scale, (0, 0, 0), thickness)

        images_drawn.append(img)

    # 5x4 Grid erstellen
    rows, cols = 5, 4
    fig, axs = plt.subplots(rows, cols, figsize=(12, 15), dpi=300)
    plt.subplots_adjust(left=0.05, right=0.95, top=0.95, bottom=0.05, hspace=0.05, wspace=0.05)

    for i, ax in enumerate(axs.flat):
        if i < len(images_drawn):
            ax.imshow(images_drawn[i])
            ax.axis('off')
        else:
            ax.axis('off')

    # Grid als Bild speichern
    grid_img_path = f"prediction_grid_{grid_idx+1}.jpg"
    fig.savefig(grid_img_path, bbox_inches='tight', pad_inches=0)
    plt.close(fig)

    # Bild bei wandb loggen
    wandb.log({f"mvtec/grids/prediction_grid{grid_idx+1}": wandb.Image(grid_img_path)})


FileNotFoundError: [Errno 2] No such file or directory: '../../huggingface/mvtec_annotated/labels'

## MVTEC Metrics

In [None]:
from ultralytics import YOLO

# Modell laden
path = "../../huggingface/full_classes_trained_on_10classes/dataset.yaml"
# null_classes for big 
null_clases = ["lemon", "oatmeal", "tomato sauce"]
# for small dataset 
# null_clases = ["coffee", "lemon", "oatmeal", "pasta", "tomato sauce"]


absolute_path = os.path.abspath(path)
# Evaluation auf dem 'test' Teil des Datasets
metrics = model.val(
    data=absolute_path,  
    split='test',              
    imgsz=IMG_SIZE               
)

print(np.mean(metrics.box.p),np.mean(metrics.box.r), np.mean(metrics.box.f1))
log_class_metrics_heatmap(metrics, null_classes= null_clases, wandb_key="mvtec/heatmap")
wandb.log({
    "mvtec/mAP50_class_normal": float(metrics.box.map50),
    "mvtec/precision_class_normal": float(np.mean(metrics.box.p)),
    "mvtec/recall_class_normal": float(np.mean(metrics.box.r)),
    "mvtec/f1_class_normal": float(np.mean(metrics.box.f1)),
    "mvtec/mAP50-95_class_normal": float(metrics.box.map),
})



Ultralytics 8.3.129 🚀 Python-3.10.12 torch-2.5.1+cu124 CUDA:0 (NVIDIA TITAN Xp, 12183MiB)
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 1946.2±996.5 MB/s, size: 142.6 KB)


[34m[1mval: [0mScanning /data22/stu236894/GitRepos/TinyML-MT/huggingface/full_classes_trained_on_10classes/labels/test.cache... 2124 images, 0 backgrounds, 0 corrupt: 100%|██████████| 2124/2124 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 133/133 [00:13<00:00, 10.22it/s]


                   all       2124       3884      0.375       0.52      0.349      0.131
                 apple        230        656      0.411      0.514      0.434       0.15
               avocado        150        270      0.374      0.784       0.37      0.127
                banana         53         54     0.0822      0.537     0.0982     0.0304
                coffee        604        879      0.286      0.734       0.36      0.165
              cucumber         91         91      0.531       0.56       0.58      0.169
             fruit tea        767       1538      0.507      0.269      0.304      0.133
                 pasta        391        396      0.435      0.245      0.295      0.145
Speed: 0.1ms preprocess, 1.4ms inference, 0.0ms loss, 1.5ms postprocess per image
Results saved to [1m/data22/stu236894/GitRepos/TinyML-MT/runs/detect/val67[0m
0.37501995669451427 0.5204666616041824 0.38957328338065833
sorted_class_ids_and_names  [(0, 'apple'), (1, 'avocado'), (2, 'ban

## CUSTOM Grid

In [None]:
import wandb
from ultralytics import YOLO
import find_usefull_images_scripts as im_script
import cv2
import matplotlib.pyplot as plt
import numpy as np

# Alle Beispielbilder laden
image_paths, _ = im_script.get_custom_10class_class_dataset()

#image_paths, _ = im_script.get_custom_small_class_dataset()

batch_size = 20
num_grids = 10

for grid_idx in range(num_grids):
    start_idx = grid_idx * batch_size
    end_idx = start_idx + batch_size
    selected_paths = image_paths[start_idx:end_idx]

    # Vorhersagen durchführen (Batch)
    preds = model.predict(
        selected_paths,
        imgsz=IMG_SIZE,
        save=False,
        stream=False
    )

    # Bilder vorbereiten
    images_drawn = []
    for img_path, pred in zip(selected_paths, preds):
        img = cv2.imread(str(img_path))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        h, w, _ = img.shape
        for box, cls, conf in zip(pred.boxes.xyxy, pred.boxes.cls, pred.boxes.conf):
            x1, y1, x2, y2 = map(int, box)

            class_name = model.names[int(cls)]
            print("mapping" , class_name, cls)
            label = f"{class_name} {conf:.2f}"

            # Rechteck zeichnen
            cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 255, 0), thickness=2)

            # Textgröße bestimmen
            font = cv2.FONT_HERSHEY_SIMPLEX
            font_scale = 1.2
            thickness = 2
            (text_w, text_h), baseline = cv2.getTextSize(label, font, font_scale, thickness)

            # Textposition
            text_x = x1
            if y1 - text_h - baseline > 0:
                text_y = y1 - 5
                # Hintergrundrechteck für Text (oben)
                cv2.rectangle(img, (text_x, text_y - text_h - baseline), (text_x + text_w, text_y + baseline), (0, 255, 0), cv2.FILLED)
                cv2.putText(img, label, (text_x, text_y), font, font_scale, (0, 0, 0), thickness)
            else:
                text_y = y2 + text_h + 5
                if text_y > h:
                    text_y = y2 - 5
                # Hintergrundrechteck für Text (unten)
                cv2.rectangle(img, (text_x, text_y - text_h - baseline), (text_x + text_w, text_y + baseline), (0, 255, 0), cv2.FILLED)
                cv2.putText(img, label, (text_x, text_y), font, font_scale, (0, 0, 0), thickness)

        images_drawn.append(img)

    # 5x4 Grid erstellen
    rows, cols = 5, 4
    fig, axs = plt.subplots(rows, cols, figsize=(12, 15), dpi=300)
    plt.subplots_adjust(left=0.05, right=0.95, top=0.95, bottom=0.05, hspace=0.05, wspace=0.05)

    for i, ax in enumerate(axs.flat):
        if i < len(images_drawn):
            ax.imshow(images_drawn[i])
            ax.axis('off')
        else:
            ax.axis('off')

    # Grid als Bild speichern
    grid_img_path = f"prediction_grid_{grid_idx+1}.jpg"
    fig.savefig(grid_img_path, bbox_inches='tight', pad_inches=0)
    plt.close(fig)

    # Bild bei wandb loggen
    wandb.log({f"/custom/grids/prediction_grid_{grid_idx+1}": wandb.Image(grid_img_path)})



0: 384x512 2 coffees, 1.1ms
1: 384x512 1 lemon, 1 tomato sauce, 1.1ms
2: 384x512 1 oatmeal, 1 pasta, 1.1ms
3: 384x512 2 coffees, 1.1ms
4: 384x512 1 apple, 1.1ms
5: 384x512 2 coffees, 1.1ms
6: 384x512 2 bananas, 1 coffee, 1 pasta, 1.1ms
7: 384x512 (no detections), 1.1ms
8: 384x512 1 avocado, 1 lemon, 1.1ms
9: 384x512 1 apple, 1 avocado, 1.1ms
10: 384x512 2 apples, 1.1ms
11: 384x512 1 avocado, 1 tomato sauce, 1.1ms
12: 384x512 1 coffee, 2 lemons, 1.1ms
13: 384x512 1 coffee, 1.1ms
14: 384x512 1 banana, 1.1ms
15: 384x512 1 coffee, 2 tomato sauces, 1.1ms
16: 384x512 1 coffee, 1 lemon, 1.1ms
17: 384x512 1 avocado, 1 coffee, 1 oatmeal, 1.1ms
18: 384x512 1 apple, 1 coffee, 1.1ms
19: 384x512 1 apple, 1 banana, 1 tomato sauce, 1.1ms
Speed: 3.4ms preprocess, 1.1ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 512)
mapping coffee tensor(3., device='cuda:0')
mapping coffee tensor(3., device='cuda:0')
mapping lemon tensor(6., device='cuda:0')
mapping tomato sauce tensor(9., device='cu

## CUSTOM Metrics

In [None]:
import torch
import wandb
import numpy as np
import find_usefull_images_scripts as im_script

# # Mapping von Model-Output-Klasse → GT-Klasse
label_translation_trained_on_10classes = {
    0: 1, 1: 3, 2: 4, 3: 13, 4: 48, 5: 26, 6: 2, 7:42, 8: 9, 9: 5
}


# label_translation_trained_on_small_set = {
#     0: 1, 1: 3, 2: 4, 3: 48, 4: 26, 5: 2, 6: 5
# }

def compute_classnorm_metrics(gt_dicts, pred_dicts):
    """
    Berechnet class-normalisierte Precision, Recall, F1,
    wobei Klassen ohne Vorkommen ignoriert werden.
    """
    all_classes = sorted(set().union(*[d.keys() for d in gt_dicts + pred_dicts]))
    
    classwise_precisions = []
    classwise_recalls = []
    classwise_f1s = []
    classwise_gt_count = []
    classwise_pred_count = []
    classwise_fp = []
    classwise_fn = []
    classwise_tp = []

    for cls in all_classes:
        tp, fp, fn = 0, 0, 0
        gt_count_class = 0
        pred_count_class = 0
        for gt, pred in zip(gt_dicts, pred_dicts):
            gt_count = gt.get(cls, 0)
            pred_count = pred.get(cls, 0)
            gt_count_class += gt_count
            pred_count_class += pred_count

            tp += min(gt_count, pred_count)
            fp += max(0, pred_count - gt_count)
            fn += max(0, gt_count - pred_count)


        # Skip class if both gt and pred are zero
        if cls == 3:
            print("avocado", tp, fp, fn)
        if (tp + fp + fn) == 0:
            continue

        precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
        recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
        f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0.0

        classwise_gt_count.append(gt_count_class)
        classwise_pred_count.append(pred_count_class)
        classwise_fp.append(fp)
        classwise_fn.append(fn)
        classwise_tp.append(tp)
        classwise_precisions.append(precision)
        classwise_recalls.append(recall)
        classwise_f1s.append(f1)
    
    mean_precision = np.mean(classwise_precisions) if len(classwise_precisions) > 0 else 0.0
    mean_recall = np.mean(classwise_recalls) if len(classwise_recalls) > 0 else 0.0
    mean_f1 = np.mean(classwise_f1s) if len(classwise_f1s) > 0 else 0.0

    results = {"class_norm_precision": mean_precision, "class_norm_recall": mean_recall, "class_norm_f1": mean_f1, 
               "classwise_gt_count": classwise_gt_count, "classwise_pred_count": classwise_pred_count, "classwise_fp": classwise_fp, "classwise_fn": classwise_fn,
               "classwise_tp": classwise_tp, "classwise_precisions": classwise_precisions, "classwise_recalls": classwise_recalls, "classwise_f1s" :classwise_f1s }

    return results



def compute_global_metrics(gt_dicts, pred_dicts):
    all_classes = sorted(set().union(*[d.keys() for d in gt_dicts + pred_dicts]))

    def dict_to_vec(d, classes):
        return np.array([d.get(c, 0) for c in classes], dtype=np.float32)

    gt_arr = np.stack([dict_to_vec(d, all_classes) for d in gt_dicts])
    pred_arr = np.stack([dict_to_vec(d, all_classes) for d in pred_dicts])
    print(gt_arr, pred_arr)

    tp = np.minimum(gt_arr, pred_arr).sum()
    fp = np.maximum(pred_arr - gt_arr, 0).sum()
    fn = np.maximum(gt_arr - pred_arr, 0).sum()


    precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
    f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0.0

    return precision, recall, f1


def translate_prediction_counts(pred_classes, translation_dict):
    """
    Zählt vorhergesagte Klassen und übersetzt sie in Zielklassen.
    Gibt dict {gt_class_id: count} zurück.
    """
    pred_counts = {}
    for c in pred_classes:
        mapped = translation_dict[c]
        if mapped is not None:
            # get is a cool trick standard value of 0 allows to access even though its not initialised
            pred_counts[mapped] = pred_counts.get(mapped, 0) + 1
    return pred_counts

# === Main ===

image_paths, label_lines = im_script.get_custom_10class_class_dataset()
batch_size = 20

all_gt_counts = []
all_pred_counts = []

for i in range(0, len(image_paths), batch_size):
    batch_paths = image_paths[i:i + batch_size]
    batch_labels = label_lines[i:i + batch_size]  # dicts: class_id -> count (GT-Klassen)

    # GT-Labels direkt übernehmen
    all_gt_counts.extend(batch_labels)

    # Model Predictions holen
    preds_raw = model.predict(batch_paths, imgsz=IMG_SIZE, stream=False)

    for i, pred in enumerate(preds_raw):
        pred_classes = pred.boxes.cls.cpu().tolist()
        translated_pred = translate_prediction_counts(pred_classes, label_translation_trained_on_10classes)
        all_pred_counts.append(translated_pred)


precision, recall, f1 = compute_global_metrics(all_gt_counts, all_pred_counts)


# Zusätzlich: class-normalisierte Metriken berechnen
results = compute_classnorm_metrics(all_gt_counts, all_pred_counts)



print(precision, recall, f1, results["class_norm_precision"], results["class_norm_recall"], results["class_norm_f1"])
wandb.log({
    "custom/test/precision_counts": float(precision),
    "custom/test/recall_counts": float(recall),
    "custom/test/f1_score_counts": float(f1),
    "custom/test/precision_classnorm_CARE": float(results["class_norm_precision"]),
    "custom/test/recall_classnorm_CARE": float( results["class_norm_recall"]),
    "custom/test/f1_classnorm_CARE": float( results["class_norm_f1"])
}, step=wandb.run.step)



ValueError: Sample larger than population or is negative

In [27]:
import wandb


# Deine Run-ID, z. B. "ls3jwotb" aus der URL oder dem lokalen Log
# Deine Run-ID, z. B. "ls3jwotb" aus der URL oder dem lokalen Log
run_id = "havg57rg"

# Reaktiviere den Run
wandb.init(
    project="Yolo-Training",
    entity="maats",
    id=run_id,
    resume="allow"
)

## CUSTOM Heatmap

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# === Labels vorbereiten ===
model_names = model.names  # z. B. {0: "apple", 1: "banana", ...}
translation = label_translation_trained_on_10classes

translated_class_labels = {
    gt_id: model_names[pred_id]
    for pred_id, gt_id in translation.items()
}

# === Werte aus results extrahieren ===
class_ids = sorted(translated_class_labels.keys())  # Nur GT-Klassen, die in der Übersetzung vorkommen
prec_list = [results["classwise_precisions"][class_ids.index(c)] for c in class_ids]
recall_list = [results["classwise_recalls"][class_ids.index(c)] for c in class_ids]
f1_list = [results["classwise_f1s"][class_ids.index(c)] for c in class_ids]

# === Heatmap zeichnen ===
metric_matrix = np.array([prec_list, recall_list, f1_list])
metric_labels = ["Precision", "Recall", "F1"]

fig, ax = plt.subplots(figsize=(max(8, len(class_ids)), 4))
sns.heatmap(
    metric_matrix,
    annot=True,
    fmt=".2f",
    cmap="YlGnBu",
    vmin=0.0,
    vmax=1.0,
    xticklabels=[translated_class_labels.get(c, str(c)) for c in class_ids],
    yticklabels=metric_labels,
    ax=ax
)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
plt.title("Per-Class Precision / Recall / F1")
plt.xlabel("Klasse")
plt.ylabel("Metrik")
plt.tight_layout()

wandb.log({"custom/per_class_metrics_heatmap": wandb.Image(fig)})
plt.close(fig)


## Finish WandB

In [7]:
wandb.finish()