In [None]:
import os
import json
import pandas as pd
import torch
import ray.cloudpickle as pickle
from Helper.ml_models import *
from tqdm import tqdm


In [8]:
# Basisverzeichnis des Projekts
BASE_PATH = "/home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation"

# Verzeichnis zur Speicherung der Confusion Matrices
CONF_MATRIX_DIR = os.path.join(BASE_PATH, "FINAL_DATEN/confusion_matrices_hyper_small")
os.makedirs(CONF_MATRIX_DIR, exist_ok=True)

# Dataset-Pfade (Mapillary)
TRAIN_IMAGES_DIR = os.path.join(BASE_PATH, "Mapillary_Vistas/training/images")
TRAIN_ANNOTATIONS_DIR = os.path.join(BASE_PATH, "Mapillary_Vistas/training/v2.0/labels_small")
VAL_IMAGES_DIR = os.path.join(BASE_PATH, "Mapillary_Vistas/validation/images")
VAL_ANNOTATIONS_DIR = os.path.join(BASE_PATH, "Mapillary_Vistas/validation/v2.0/labels_small")

# Evaluationseinstellungen
# Hier wird die originale Anzahl der Klassen definiert, die das Modell ausgibt.
# Für gerelabelte Datensätze kann dieser Wert variieren.
NUM_CLASSES_ORIGINAL = 124

# Pfad zum Ordner, in dem die Hyperparameter-Runs (inkl. Checkpoints) gespeichert sind
HYPER_FOLDER = os.path.join(BASE_PATH, "HyperparameterLOG_small")
HYPER_FOLDER = os.path.abspath(HYPER_FOLDER)

# Steuerelement: Sollen alle Modelle automatisch evaluiert werden?
run_all = True

# Ausgabe der gesetzten Parameter zur Kontrolle:
print("BASE_PATH:", BASE_PATH)
print("CONF_MATRIX_DIR:", CONF_MATRIX_DIR)
print("\nDataset-Pfade:")
print("  Train Images      :", TRAIN_IMAGES_DIR)
print("  Train Annotations :", TRAIN_ANNOTATIONS_DIR)
print("  Validation Images :", VAL_IMAGES_DIR)
print("  Validation Annotations:", VAL_ANNOTATIONS_DIR)
print("\nHyperparameter Folder:", HYPER_FOLDER)
print("NUM_CLASSES_ORIGINAL:", NUM_CLASSES_ORIGINAL)
print("run_all =", run_all)


BASE_PATH: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation
CONF_MATRIX_DIR: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/FINAL_DATEN/confusion_matrices_hyper_small

Dataset-Pfade:
  Train Images      : /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/Mapillary_Vistas/training/images
  Train Annotations : /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/Mapillary_Vistas/training/v2.0/labels_small
  Validation Images : /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/Mapillary_Vistas/validation/images
  Validation Annotations: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/Mapillary_Vistas/validation/v2.0/labels_small

Hyperparameter Folder: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/HyperparameterLOG_small
NUM_CLASSES_ORIGINAL: 124
run_all = True


In [9]:
def load_hyperparameter_runs_as_dict(base_folder: str):
    """
    Lädt alle Hyperparameter-Runs aus der Ordnerstruktur als Dictionary.
    """
    runs_data = {}
    if not os.path.isdir(base_folder):
        print(f"ERROR: Base folder does not exist: {base_folder}")
        return runs_data

    for model_folder in os.listdir(base_folder):
        model_path = os.path.join(base_folder, model_folder)
        if not os.path.isdir(model_path):
            continue

        runs_data[model_folder] = {}

        for train_folder in os.listdir(model_path):
            train_folder_path = os.path.join(model_path, train_folder)
            # Nur Trainingsordner berücksichtigen (müssen mit "train_hyper_" beginnen)
            if not train_folder.startswith("train_hyper_") or not os.path.isdir(train_folder_path):
                continue

            # Benötigte Dateien: params.json, progress.csv, result.json
            params_file = os.path.join(train_folder_path, "params.json")
            progress_file = os.path.join(train_folder_path, "progress.csv")
            result_file = os.path.join(train_folder_path, "result.json")

            if not (os.path.isfile(params_file) and os.path.isfile(progress_file) and os.path.isfile(result_file)):
                continue

            with open(params_file, "r", encoding="utf-8") as f:
                params_dict = json.load(f)

            progress_df = pd.read_csv(progress_file)
            progress_records = progress_df.to_dict(orient="records")

            result_records = []
            with open(result_file, "r", encoding="utf-8") as f:
                for line in f:
                    line = line.strip()
                    if line:
                        result_records.append(json.loads(line))

            run_dict = {
                "id": train_folder,
                **params_dict,
                "result": result_records,
                "progress": progress_records,
            }
            runs_data[model_folder][train_folder] = run_dict

    return runs_data

# Lade die Hyperparameter-Daten
hyperparameter_data = load_hyperparameter_runs_as_dict(HYPER_FOLDER)

# Sortiere die Runs pro Modell und extrahiere den Checkpoint des besten Runs
sorted_hyperparameter_data = {}
for model_name, runs_dict in hyperparameter_data.items():
    runs_list = []
    
    for run_name, run_data in runs_dict.items():
        # Bestimme die höchste Validierungsgenauigkeit aus der "progress"-Liste (falls vorhanden)
        if "progress" in run_data and run_data["progress"]:
            best_val_acc = max((r.get("val_acc", float("-inf")) for r in run_data["progress"]), default=float("-inf"))
        else:
            best_val_acc = float("-inf")
        
        # Absoluter Pfad zum Run-Ordner
        run_folder_path = os.path.join(HYPER_FOLDER, model_name, run_name)
        run_folder_path = os.path.abspath(run_folder_path)
        
        # Suche nach Checkpoint-Verzeichnissen in diesem Run-Ordner
        if os.path.isdir(run_folder_path):
            checkpoint_dirs = [
                d for d in os.listdir(run_folder_path)
                if d.startswith("checkpoint_") and os.path.isdir(os.path.join(run_folder_path, d))
            ]
        else:
            checkpoint_dirs = []
        
        # Wähle den letzten Checkpoint numerisch sortiert aus
        if checkpoint_dirs:
            checkpoint_dirs.sort(key=lambda x: int(x.split("_")[1]))
            last_checkpoint_dir = checkpoint_dirs[-1]
            last_checkpoint = os.path.join(run_folder_path, last_checkpoint_dir, "checkpoint.pkl")
            last_checkpoint = os.path.abspath(last_checkpoint)
        else:
            last_checkpoint = None
        
        runs_list.append((run_name, run_data, best_val_acc, last_checkpoint))
    
    # Sortiere Runs absteigend nach der Validierungsgenauigkeit
    sorted_runs = sorted(runs_list, key=lambda x: x[2], reverse=True)
    
    # Erstelle ein neues Dictionary mit fortlaufenden Schlüsseln ("0", "1", …)
    sorted_hyperparameter_data[model_name] = {
        str(i): {
            "max_validation_accuracy": best_val_acc,
            "path": last_checkpoint
        }
        for i, (run_name, run_data, best_val_acc, last_checkpoint) in enumerate(sorted_runs)
    }

# Debug: Ausgabe einiger Informationen
print("Modelle mit extrahierten besten Checkpoints:")
for model_name, runs in sorted_hyperparameter_data.items():
    best_checkpoint = runs.get("0", {}).get("path", "Kein Checkpoint gefunden")
    print(f"  {model_name}: {best_checkpoint}")


Modelle mit extrahierten besten Checkpoints:
  fcn_resnet101: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/HyperparameterLOG_small/fcn_resnet101/train_hyper_7adb91eb_1_auto_cast=True,batch_size=6,learning_rate=0.0001,max_epochs=100,weight_decay=0_2025-03-11_12-03-54/checkpoint_000099/checkpoint.pkl
  deeplabv3_resnet50: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/HyperparameterLOG_small/deeplabv3_resnet50/train_hyper_a8ae06ed_1_auto_cast=True,batch_size=6,learning_rate=0.0001,max_epochs=100,weight_decay=0_2025-03-07_20-02-56/checkpoint_000099/checkpoint.pkl
  fcn_resnet50: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/HyperparameterLOG_small/fcn_resnet50/train_hyper_bc32f73a_1_auto_cast=True,batch_size=6,learning_rate=0.0001,max_epochs=100,weight_decay=0_2025-03-10_11-25-21/checkpoint_000099/checkpoint.pkl
  deeplabv3_resnet101: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/HyperparameterLOG_smal

In [11]:
def load_checkpointed_model_ray(model_name, checkpoint_path, num_classes=None):
    """
    Lädt ein Modell aus einem Checkpoint, wobei das Modell über MapillaryTrainedModel instanziiert wird.
    Der Parameter 'skip_local_load' sorgt dafür, dass keine lokalen Gewichte geladen werden.
    """
    # Instanziiere das Modell; passe 'width' und 'height' bei Bedarf an.
    loaded_model = MapillaryTrainedModel(
        model_name=model_name,
        width=520,
        height=520,
        weights_name='',
        skip_local_load=True  # WICHTIG!
    )
    # Lade den Checkpoint mit ray.cloudpickle (alias pickle)
    with open(checkpoint_path, "rb") as fp:
        checkpoint_data = pickle.load(fp)
    # Lade die Modellgewichte
    loaded_model.model.load_state_dict(checkpoint_data["model_state"], strict=True)
    # Falls vorhanden, lade auch den Optimizer-Zustand
    if "optimizer_state" in checkpoint_data:
        loaded_model.optimizer.load_state_dict(checkpoint_data["optimizer_state"])
    return loaded_model


In [12]:
# Laden des Datensatzes (hier verwenden wir den Validierungsdatensatz als Testset)
mapillary_loader = MapillaryDataLoader(
    train_images_dir=TRAIN_IMAGES_DIR,
    train_annotations_dir=TRAIN_ANNOTATIONS_DIR,
    val_images_dir=VAL_IMAGES_DIR,
    val_annotations_dir=VAL_ANNOTATIONS_DIR
)
test_dataset = mapillary_loader.test_dataset
print(f"Test dataset loaded with {len(test_dataset)} samples.")

# Funktion zur Berechnung der Confusion Matrix für ein einzelnes Bild
def compute_confusion_matrix(predicted, ground_truth, num_classes):
    mask = (ground_truth >= 0) & (ground_truth < num_classes)
    label = num_classes * ground_truth[mask] + predicted[mask]
    count = torch.bincount(label, minlength=num_classes**2)
    confusion_matrix = count.reshape(num_classes, num_classes)
    return confusion_matrix

# Evaluierungsfunktion, die über den gesamten Datensatz läuft, die Confusion Matrix aufsummiert und speichert
def evaluate_and_store_confusion_matrix(model, dataset, num_classes, save_path):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.model.to(device)
    confusion_matrix_total = torch.zeros((num_classes, num_classes), dtype=torch.int64).to(device)
    
    with torch.no_grad():
        for i in tqdm(range(len(dataset)), desc="Evaluating Dataset"):
            image, annotation = dataset[i]
            image = image.to(device)
            annotation = annotation.to(device)
            output = model.inference(image)
            predicted = output.argmax(1).squeeze(0)
            conf_mat = compute_confusion_matrix(predicted.cpu(), annotation.cpu(), num_classes)
            confusion_matrix_total += conf_mat.to(device)
    
    torch.save(confusion_matrix_total, save_path)
    print(f"Confusion Matrix saved to: {save_path}")

# Iteriere über alle Modelle aus sorted_hyperparameter_data und berechne deren Confusion Matrix
for model_name, runs_dict in sorted_hyperparameter_data.items():
    best_run_info = runs_dict.get("0", {})
    checkpoint_path = best_run_info.get("path", None)
    
    if not checkpoint_path or not os.path.isfile(checkpoint_path):
        print(f"[WARNING] No valid checkpoint for {model_name}. Skipping.")
        continue
    
    print(f"\nEvaluating model: {model_name}")
    print(f"Using checkpoint: {checkpoint_path}")
    
    try:
        model_loaded = load_checkpointed_model_ray(model_name, checkpoint_path)
    except Exception as e:
        print(f"[ERROR] Could not load model {model_name}: {e}")
        continue
    
    # Definiere den Speicherpfad für die Confusion Matrix
    conf_matrix_save_path = os.path.join(CONF_MATRIX_DIR, f"{model_name}_confusion_matrix.pt")
    evaluate_and_store_confusion_matrix(model_loaded, test_dataset, NUM_CLASSES_ORIGINAL, conf_matrix_save_path)


Test dataset loaded with 2000 samples.

Evaluating model: fcn_resnet101
Using checkpoint: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/HyperparameterLOG_small/fcn_resnet101/train_hyper_7adb91eb_1_auto_cast=True,batch_size=6,learning_rate=0.0001,max_epochs=100,weight_decay=0_2025-03-11_12-03-54/checkpoint_000099/checkpoint.pkl
Using CUDA GPU
Model loaded: fcn_resnet101 | Device: cuda 
Error loading Model with Epoch latest: Error(s) in loading state_dict for FCN:
	size mismatch for classifier.4.weight: copying a param with shape torch.Size([20, 512, 1, 1]) from checkpoint, the shape in current model is torch.Size([124, 512, 1, 1]).
	size mismatch for classifier.4.bias: copying a param with shape torch.Size([20]) from checkpoint, the shape in current model is torch.Size([124]).
Skipping local .pth load due to error above.


Evaluating Dataset: 100%|██████████| 2000/2000 [03:29<00:00,  9.52it/s]


Confusion Matrix saved to: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/FINAL_DATEN/confusion_matrices_hyper_small/fcn_resnet101_confusion_matrix.pt

Evaluating model: deeplabv3_resnet50
Using checkpoint: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/HyperparameterLOG_small/deeplabv3_resnet50/train_hyper_a8ae06ed_1_auto_cast=True,batch_size=6,learning_rate=0.0001,max_epochs=100,weight_decay=0_2025-03-07_20-02-56/checkpoint_000099/checkpoint.pkl
Using CUDA GPU
Model loaded: deeplabv3_resnet50 | Device: cuda 


Evaluating Dataset: 100%|██████████| 2000/2000 [02:53<00:00, 11.55it/s]


Confusion Matrix saved to: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/FINAL_DATEN/confusion_matrices_hyper_small/deeplabv3_resnet50_confusion_matrix.pt

Evaluating model: fcn_resnet50
Using checkpoint: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/HyperparameterLOG_small/fcn_resnet50/train_hyper_bc32f73a_1_auto_cast=True,batch_size=6,learning_rate=0.0001,max_epochs=100,weight_decay=0_2025-03-10_11-25-21/checkpoint_000099/checkpoint.pkl
Using CUDA GPU
Model loaded: fcn_resnet50 | Device: cuda 


Evaluating Dataset: 100%|██████████| 2000/2000 [02:43<00:00, 12.24it/s]


Confusion Matrix saved to: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/FINAL_DATEN/confusion_matrices_hyper_small/fcn_resnet50_confusion_matrix.pt

Evaluating model: deeplabv3_resnet101
Using checkpoint: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/HyperparameterLOG_small/deeplabv3_resnet101/train_hyper_ae167f80_1_auto_cast=True,batch_size=6,learning_rate=0.0001,max_epochs=100,weight_decay=0_2025-03-09_00-01-13/checkpoint_000099/checkpoint.pkl
Using CUDA GPU
Model loaded: deeplabv3_resnet101 | Device: cuda 


Evaluating Dataset: 100%|██████████| 2000/2000 [03:15<00:00, 10.25it/s]

Confusion Matrix saved to: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/FINAL_DATEN/confusion_matrices_hyper_small/deeplabv3_resnet101_confusion_matrix.pt





In [13]:
# %% Cell 5: Summary of Saved Confusion Matrices
import glob

print("Listing all saved Confusion Matrix files in:", CONF_MATRIX_DIR)
conf_files = glob.glob(os.path.join(CONF_MATRIX_DIR, "*_confusion_matrix.pt"))
summary = {}
for file in conf_files:
    cm = torch.load(file)
    shape = cm.shape
    model_name = os.path.basename(file).split("_confusion_matrix.pt")[0]
    summary[model_name] = {"path": file, "shape": shape}
    print(f"Model: {model_name}, Confusion Matrix shape: {shape}")

# Optionally, save summary to JSON file for reference
summary_save_path = os.path.join(CONF_MATRIX_DIR, "confusion_matrices_summary.json")
with open(summary_save_path, "w", encoding="utf-8") as f:
    # Konvertiere die Shape in eine Liste, damit es JSON-kompatibel ist
    json.dump({k: {"path": v["path"], "shape": list(v["shape"])} for k, v in summary.items()}, f, indent=4)

print(f"Summary saved to: {summary_save_path}")


Listing all saved Confusion Matrix files in: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/FINAL_DATEN/confusion_matrices_hyper_small
Model: fcn_resnet101, Confusion Matrix shape: torch.Size([124, 124])
Model: deeplabv3_resnet101, Confusion Matrix shape: torch.Size([124, 124])
Model: deeplabv3_resnet50, Confusion Matrix shape: torch.Size([124, 124])
Model: fcn_resnet50, Confusion Matrix shape: torch.Size([124, 124])
Summary saved to: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/FINAL_DATEN/confusion_matrices_hyper_small/confusion_matrices_summary.json
