In [2]:
# %% Cell 1: Imports & User Inputs
import os
import json
import pandas as pd
import torch
import shutil
import ray.cloudpickle as pickle
from tqdm import tqdm
from Helper.ml_models import *  # Importiert alle benötigten ML-Modelle (z. B. MapillaryTrainedModel, MapillaryDataLoader, …)

# === Basis-Pfade und Parameter ===
BASE_PATH = "/home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation"
MAIN_FOLDER_HYP = os.path.abspath(os.path.join(BASE_PATH, "HyperparameterLOG_small"))
TARGET_DIR = "/home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/FINAL_DATEN/hyper_map_small"

# Evaluationseinstellungen
NUM_CLASSES = 66  # Neue Klassenanzahl

# Speicherpfade
EVAL_RESULTS_SAVE_PATH = os.path.join(BASE_PATH, "FINAL_DATEN", "evaluation_hyperparameter_Mapillary_small.json")
BEST_CHECKPOINTS_SAVE_PATH = os.path.join(BASE_PATH, "FINAL_DATEN", "best_checkpoints_Mapillary_small.json")

# Debug: Ausgabe der HyperparameterLOG-Unterordner
print("DEBUG: MAIN_FOLDER_HYP =", MAIN_FOLDER_HYP)
print("DEBUG: Exists on disk? =", os.path.isdir(MAIN_FOLDER_HYP))
print("\nDEBUG: Subfolders in HyperparameterLOG:")
for item in os.listdir(MAIN_FOLDER_HYP):
    if os.path.isdir(os.path.join(MAIN_FOLDER_HYP, item)):
        print(" -", item)


DEBUG: MAIN_FOLDER_HYP = /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/HyperparameterLOG_small
DEBUG: Exists on disk? = True

DEBUG: Subfolders in HyperparameterLOG:
 - fcn_resnet101
 - deeplabv3_resnet50
 - fcn_resnet50
 - deeplabv3_resnet101


In [3]:
# %% Cell 2: Funktionendefinitionen (kollabierbarer Bereich)
def load_hyperparameter_runs_as_dict(base_folder: str):
    """
    Lädt alle Hyperparameter-Runs aus der Ordnerstruktur als Dictionary.
    """
    runs_data = {}
    if not os.path.isdir(base_folder):
        print(f"ERROR: Base folder does not exist: {base_folder}")
        return runs_data

    for model_folder in os.listdir(base_folder):
        model_path = os.path.join(base_folder, model_folder)
        if not os.path.isdir(model_path):
            print(f"Skipping invalid model path: {model_path}")
            continue

        runs_data[model_folder] = {}
        for train_folder in os.listdir(model_path):
            train_folder_path = os.path.join(model_path, train_folder)
            if not train_folder.startswith("train_hyper_") or not os.path.isdir(train_folder_path):
                print(f"Skipping non-training folder: {train_folder_path}")
                continue

            params_file = os.path.join(train_folder_path, "params.json")
            progress_file = os.path.join(train_folder_path, "progress.csv")
            result_file = os.path.join(train_folder_path, "result.json")

            if not (os.path.isfile(params_file) and os.path.isfile(progress_file) and os.path.isfile(result_file)):
                print(f"Skipping incomplete run: {train_folder_path}")
                continue

            with open(params_file, "r", encoding="utf-8") as f:
                params_dict = json.load(f)
            progress_df = pd.read_csv(progress_file)
            progress_records = progress_df.to_dict(orient="records")
            result_records = []
            with open(result_file, "r", encoding="utf-8") as f:
                for line in f:
                    line = line.strip()
                    if line:
                        result_records.append(json.loads(line))

            run_dict = {
                "id": train_folder,
                **params_dict,
                "result": result_records,
                "progress": progress_records,
            }
            runs_data[model_folder][train_folder] = run_dict

    return runs_data


def compute_confusion_matrix(predicted, ground_truth, num_classes):
    mask = (ground_truth >= 0) & (ground_truth < num_classes)
    label = num_classes * ground_truth[mask] + predicted[mask]
    count = torch.bincount(label, minlength=num_classes**2)
    confusion_matrix = count.reshape(num_classes, num_classes)
    return confusion_matrix


def compute_miou(confusion_matrix):
    intersection = torch.diag(confusion_matrix)
    ground_truth_set = confusion_matrix.sum(1)
    predicted_set = confusion_matrix.sum(0)
    union = ground_truth_set + predicted_set - intersection
    IoU = intersection / (union + 1e-6)
    mIoU = torch.mean(IoU)
    return mIoU.item(), IoU


def compute_mean_pixel_accuracy(confusion_matrix):
    true_positive = torch.diag(confusion_matrix)
    total_pixels = confusion_matrix.sum(1)
    pixel_accuracy = true_positive / (total_pixels + 1e-6)
    mPA = torch.mean(pixel_accuracy)
    return mPA.item(), pixel_accuracy


def compute_fwiou(confusion_matrix):
    total_pixels = confusion_matrix.sum()
    ground_truth_set = confusion_matrix.sum(1)
    intersection = torch.diag(confusion_matrix)
    union = ground_truth_set + confusion_matrix.sum(0) - intersection
    IoU = intersection / (union + 1e-6)
    FWIoU = (ground_truth_set * IoU) / total_pixels
    return FWIoU.sum().item()


def compute_dice_coefficient(confusion_matrix):
    intersection = torch.diag(confusion_matrix)
    ground_truth_set = confusion_matrix.sum(1)
    predicted_set = confusion_matrix.sum(0)
    dice = (2 * intersection) / (ground_truth_set + predicted_set + 1e-6)
    mean_dice = torch.mean(dice)
    return mean_dice.item(), dice


def load_checkpointed_model_ray(model_name, checkpoint_path, num_classes=None):
    """
    Lädt ein checkpointed Model mithilfe von Ray Cloudpickle.
    """
    loaded_model = MapillaryTrainedModel(
        model_name=model_name,
        width=520,
        height=520,
        weights_name='',
        skip_local_load=True  # WICHTIG!
    )
    with open(checkpoint_path, "rb") as fp:
        checkpoint_data = pickle.load(fp)
    loaded_model.model.load_state_dict(checkpoint_data["model_state"], strict=True)
    if "optimizer_state" in checkpoint_data:
        loaded_model.optimizer.load_state_dict(checkpoint_data["optimizer_state"])
    return loaded_model


def evaluate_model(model: MapillaryTrainedModel, dataset, num_classes: int) -> dict:
    """
    Bewertet das Modell auf dem angegebenen Datensatz und berechnet diverse Metriken.
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.model.to(device)
    confusion_matrix_total = torch.zeros((num_classes, num_classes), dtype=torch.int64).to(device)
    
    with torch.no_grad():
        for i in tqdm(range(len(dataset)), desc="Evaluating Dataset"):
            image, annotation = dataset[i]
            image = image.to(device)
            annotation = annotation.to(device)
            output = model.inference(image)
            predicted = output.argmax(1).squeeze(0)
            conf_mat = compute_confusion_matrix(predicted.cpu(), annotation.cpu(), num_classes)
            confusion_matrix_total += conf_mat.to(device)
    
    miou, iou_per_class = compute_miou(confusion_matrix_total)
    mpa, pa_per_class = compute_mean_pixel_accuracy(confusion_matrix_total)
    fwiou = compute_fwiou(confusion_matrix_total)
    dice_mean, dice_per_class = compute_dice_coefficient(confusion_matrix_total)
    
    metrics = {
        "mIoU": miou,
        "mPA": mpa,
        "FWIoU": fwiou,
        "Dice_Mean": dice_mean,
        "IoU_per_class": iou_per_class.tolist(),
        "PA_per_class": pa_per_class.tolist(),
        "Dice_per_class": dice_per_class.tolist()
    }
    return metrics


In [4]:
# %% Cell 3A: Daten laden & Checkpoint-Extraktion
# 1. Hyperparameter-Runs laden
hyperparameter_data = load_hyperparameter_runs_as_dict(MAIN_FOLDER_HYP)

# 2. Checkpoints finden und nach bestmöglicher Validierungsgenauigkeit sortieren
sorted_hyperparameter_data = {}

for model_name, runs_dict in hyperparameter_data.items():
    runs_list = []
    for run_name, run_data in runs_dict.items():
        # Bestimme die höchste Validierungsgenauigkeit aus der "progress"-CSV
        if "progress" in run_data and run_data["progress"]:
            best_val_acc = max((r.get("val_acc", float("-inf")) for r in run_data["progress"]), default=float("-inf"))
        else:
            best_val_acc = float("-inf")
        
        # Absoluter Pfad zum Run-Ordner
        run_folder_path = os.path.abspath(os.path.join(MAIN_FOLDER_HYP, model_name, run_name))
        
        # Checkpoint-Unterordner suchen
        if os.path.isdir(run_folder_path):
            checkpoint_dirs = [d for d in os.listdir(run_folder_path)
                               if d.startswith("checkpoint_") and os.path.isdir(os.path.join(run_folder_path, d))]
        else:
            checkpoint_dirs = []
        
        print(f"\n[DEBUG] For model='{model_name}' run='{run_name}', found checkpoint dirs:")
        print("       ", checkpoint_dirs)
        
        # Numerische Sortierung der Checkpoint-Ordner und Auswahl des letzten
        if checkpoint_dirs:
            checkpoint_dirs.sort(key=lambda x: int(x.split("_")[1]))
            last_checkpoint_dir = checkpoint_dirs[-1]
            last_checkpoint = os.path.abspath(os.path.join(run_folder_path, last_checkpoint_dir, "checkpoint.pkl"))
        else:
            last_checkpoint = None
        
        runs_list.append((run_name, run_data, best_val_acc, last_checkpoint))
    
    # Sortiere Runs absteigend nach Validierungsgenauigkeit
    sorted_runs = sorted(runs_list, key=lambda x: x[2], reverse=True)
    
    # Erstelle ein neues Dictionary mit fortlaufenden Schlüsseln ("0", "1", …)
    sorted_hyperparameter_data[model_name] = {
        str(i): {
            **run_data,
            "max_validation_accuracy": best_val_acc,
            "path": last_checkpoint
        }
        for i, (run_name, run_data, best_val_acc, last_checkpoint) in enumerate(sorted_runs)
    }

# Optionaler Debug-Output
if "deeplabv3_resnet50" in sorted_hyperparameter_data:
    print("\nDEBUG: Keys for deeplabv3_resnet50 run '0':", sorted_hyperparameter_data["deeplabv3_resnet50"]['0'].keys())
    print("DEBUG: Checkpoint path:", sorted_hyperparameter_data["deeplabv3_resnet50"]['0']['path'])
print("DEBUG: Models found:", list(sorted_hyperparameter_data.keys()))


Skipping non-training folder: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/HyperparameterLOG_small/fcn_resnet101/tuner.pkl
Skipping non-training folder: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/HyperparameterLOG_small/fcn_resnet101/searcher-state-2025-03-11_12-03-54.pkl
Skipping non-training folder: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/HyperparameterLOG_small/fcn_resnet101/search_gen_state-2025-03-11_12-03-54.json
Skipping non-training folder: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/HyperparameterLOG_small/fcn_resnet101/experiment_state-2025-03-11_12-03-54.json
Skipping non-training folder: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/HyperparameterLOG_small/fcn_resnet101/.validate_storage_marker
Skipping non-training folder: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/HyperparameterLOG_small/deeplabv3_resnet50/tuner.pkl
Skipping no

In [5]:
# %% Cell 3B: Datensatz laden & Modelle evaluieren
# Datensatz laden (Mapillary)
mapillary_loader = MapillaryDataLoader(
    train_images_dir=os.path.join(BASE_PATH, "Mapillary_Vistas/training/images"),
    train_annotations_dir=os.path.join(BASE_PATH, "Mapillary_Vistas/training/v2.0/labels_small"),
    val_images_dir=os.path.join(BASE_PATH, "Mapillary_Vistas/validation/images"),
    val_annotations_dir=os.path.join(BASE_PATH, "Mapillary_Vistas/validation/v2.0/labels_small")
)
test_dataset = mapillary_loader.test_dataset
print(f"Dataset loaded with length {len(test_dataset)}")

# Modelle evaluieren (verwende jeweils den besten Run "0")
evaluation_results = {}

for model_name, runs_dict in sorted_hyperparameter_data.items():
    best_run_info = runs_dict.get("0")
    checkpoint_path = best_run_info.get("path", None) if best_run_info else None
    if not checkpoint_path:
        print(f"\n[WARNING] No checkpoint path found for {model_name} run '0'. Skipping.")
        continue
    
    print(f"\nEvaluating model: {model_name}")
    print(f"Checkpoint path: {checkpoint_path}")
    
    try:
        model_loaded = load_checkpointed_model_ray(model_name, checkpoint_path)
    except FileNotFoundError as e:
        print("[WARNING] File not found error:", e)
        continue
    
    metrics = evaluate_model(model_loaded, test_dataset, NUM_CLASSES)
    evaluation_results[model_name] = metrics
    
    print(f"Results for {model_name}:")
    for k, v in metrics.items():
        if isinstance(v, list):
            print(f"  {k}: [list of length {len(v)}]")
        else:
            print(f"  {k}: {v:.4f}" if isinstance(v, float) else f"  {k}: {v}")


Dataset loaded with length 2000

Evaluating model: fcn_resnet101
Checkpoint path: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/HyperparameterLOG_small/fcn_resnet101/train_hyper_7adb91eb_1_auto_cast=True,batch_size=6,learning_rate=0.0001,max_epochs=100,weight_decay=0_2025-03-11_12-03-54/checkpoint_000099/checkpoint.pkl
Using CUDA GPU
Model loaded: fcn_resnet101 | Device: cuda 
Error loading Model with Epoch latest: Error(s) in loading state_dict for FCN:
	size mismatch for classifier.4.weight: copying a param with shape torch.Size([20, 512, 1, 1]) from checkpoint, the shape in current model is torch.Size([124, 512, 1, 1]).
	size mismatch for classifier.4.bias: copying a param with shape torch.Size([20]) from checkpoint, the shape in current model is torch.Size([124]).
Skipping local .pth load due to error above.


Evaluating Dataset: 100%|██████████| 2000/2000 [03:25<00:00,  9.71it/s]


Results for fcn_resnet101:
  mIoU: 0.2044
  mPA: 0.2459
  FWIoU: 0.8601
  Dice_Mean: 0.2665
  IoU_per_class: [list of length 66]
  PA_per_class: [list of length 66]
  Dice_per_class: [list of length 66]

Evaluating model: deeplabv3_resnet50
Checkpoint path: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/HyperparameterLOG_small/deeplabv3_resnet50/train_hyper_a8ae06ed_1_auto_cast=True,batch_size=6,learning_rate=0.0001,max_epochs=100,weight_decay=0_2025-03-07_20-02-56/checkpoint_000099/checkpoint.pkl
Using CUDA GPU
Model loaded: deeplabv3_resnet50 | Device: cuda 


Evaluating Dataset: 100%|██████████| 2000/2000 [02:51<00:00, 11.69it/s]


Results for deeplabv3_resnet50:
  mIoU: 0.2009
  mPA: 0.2478
  FWIoU: 0.8614
  Dice_Mean: 0.2604
  IoU_per_class: [list of length 66]
  PA_per_class: [list of length 66]
  Dice_per_class: [list of length 66]

Evaluating model: fcn_resnet50
Checkpoint path: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/HyperparameterLOG_small/fcn_resnet50/train_hyper_bc32f73a_1_auto_cast=True,batch_size=6,learning_rate=0.0001,max_epochs=100,weight_decay=0_2025-03-10_11-25-21/checkpoint_000099/checkpoint.pkl
Using CUDA GPU
Model loaded: fcn_resnet50 | Device: cuda 


Evaluating Dataset: 100%|██████████| 2000/2000 [02:47<00:00, 11.94it/s]


Results for fcn_resnet50:
  mIoU: 0.2012
  mPA: 0.2426
  FWIoU: 0.8544
  Dice_Mean: 0.2632
  IoU_per_class: [list of length 66]
  PA_per_class: [list of length 66]
  Dice_per_class: [list of length 66]

Evaluating model: deeplabv3_resnet101
Checkpoint path: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/HyperparameterLOG_small/deeplabv3_resnet101/train_hyper_ae167f80_1_auto_cast=True,batch_size=6,learning_rate=0.0001,max_epochs=100,weight_decay=0_2025-03-09_00-01-13/checkpoint_000099/checkpoint.pkl
Using CUDA GPU
Model loaded: deeplabv3_resnet101 | Device: cuda 


Evaluating Dataset: 100%|██████████| 2000/2000 [03:15<00:00, 10.24it/s]

Results for deeplabv3_resnet101:
  mIoU: 0.2087
  mPA: 0.2562
  FWIoU: 0.8661
  Dice_Mean: 0.2697
  IoU_per_class: [list of length 66]
  PA_per_class: [list of length 66]
  Dice_per_class: [list of length 66]





In [6]:
# %% Cell 3C: Ergebnisse speichern & Best Checkpoints kopieren
# Speichere die Evaluationsergebnisse als JSON
os.makedirs(os.path.dirname(EVAL_RESULTS_SAVE_PATH), exist_ok=True)
with open(EVAL_RESULTS_SAVE_PATH, "w") as f:
    json.dump(evaluation_results, f, indent=4)
print(f"Evaluation results saved to: {EVAL_RESULTS_SAVE_PATH}")

# Best Checkpoints extrahieren
best_checkpoints = {}
for model_name, runs_dict in sorted_hyperparameter_data.items():
    best_run_info = runs_dict.get("0")
    best_checkpoint_path = best_run_info.get("path", None) if best_run_info else None
    best_checkpoints[model_name] = best_checkpoint_path

# Speichere die Best Checkpoints als JSON
os.makedirs(os.path.dirname(BEST_CHECKPOINTS_SAVE_PATH), exist_ok=True)
with open(BEST_CHECKPOINTS_SAVE_PATH, "w", encoding="utf-8") as f:
    json.dump(best_checkpoints, f, indent=4)
print(f"Best checkpoint paths saved to: {BEST_CHECKPOINTS_SAVE_PATH}")

# Kopiere die besten Checkpoints in das Zielverzeichnis
os.makedirs(TARGET_DIR, exist_ok=True)
for model_name, checkpoint_path in best_checkpoints.items():
    if checkpoint_path and os.path.isfile(checkpoint_path):
        dest_checkpoint = os.path.join(TARGET_DIR, f"{model_name}_best_checkpoint.pkl")
        shutil.copy2(checkpoint_path, dest_checkpoint)
        print(f"✅ Checkpoint for {model_name} saved: {dest_checkpoint}")
    else:
        print(f"⚠️ No valid checkpoint for {model_name} found!")
print("✅ All checkpoints have been saved!")


Evaluation results saved to: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/FINAL_DATEN/evaluation_hyperparameter_Mapillary_small.json
Best checkpoint paths saved to: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/FINAL_DATEN/best_checkpoints_Mapillary_small.json
✅ Checkpoint for fcn_resnet101 saved: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/FINAL_DATEN/hyper_map_small/fcn_resnet101_best_checkpoint.pkl
✅ Checkpoint for deeplabv3_resnet50 saved: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/FINAL_DATEN/hyper_map_small/deeplabv3_resnet50_best_checkpoint.pkl
✅ Checkpoint for fcn_resnet50 saved: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/FINAL_DATEN/hyper_map_small/fcn_resnet50_best_checkpoint.pkl
✅ Checkpoint for deeplabv3_resnet101 saved: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/FINAL_DATEN/hyper_map_small/deeplabv3_resnet101_best_checkpoint.pkl
✅ 