In [1]:
import os
import json
import pandas as pd
import torch
import ray.cloudpickle as pickle  # alias für pickle
from Helper.ml_models import *   # Importiere deine Modelle und DataLoader
from tqdm import tqdm

# Basisverzeichnis des Projekts
BASE_PATH = "/home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation"

# Verzeichnis zur Speicherung der Confusion Matrices (für Mapillary)
CONF_MATRIX_DIR = os.path.join(BASE_PATH, "FINAL_DATEN/confusion_matrices_hyper_big")
os.makedirs(CONF_MATRIX_DIR, exist_ok=True)

# Pfade zu den Mapillary-Datensätzen
TRAIN_IMAGES_DIR = os.path.join(BASE_PATH, "Mapillary_Vistas/training/images")
TRAIN_ANNOTATIONS_DIR = os.path.join(BASE_PATH, "Mapillary_Vistas/training/v2.0/labels_big")
VAL_IMAGES_DIR = os.path.join(BASE_PATH, "Mapillary_Vistas/validation/images")
VAL_ANNOTATIONS_DIR = os.path.join(BASE_PATH, "Mapillary_Vistas/validation/v2.0/labels_big")

# Evaluationseinstellungen: Für Mapillary mit mehr Labels (z. B. 124 Klassen)
NUM_CLASSES_MAPILLARY = 124

# Pfad zur Datei mit den besten Checkpoints (Mapillary)
BEST_CHECKPOINTS_JSON = os.path.join(BASE_PATH, "FINAL_DATEN/best_checkpoints_Mapillary.json")

# Ausgabe zur Kontrolle
print("BASE_PATH:", BASE_PATH)
print("CONF_MATRIX_DIR:", CONF_MATRIX_DIR)
print("\nDataset-Pfade:")
print("  Train Images      :", TRAIN_IMAGES_DIR)
print("  Train Annotations :", TRAIN_ANNOTATIONS_DIR)
print("  Validation Images :", VAL_IMAGES_DIR)
print("  Validation Annotations:", VAL_ANNOTATIONS_DIR)
print("\nBest Checkpoints JSON:", BEST_CHECKPOINTS_JSON)
print("NUM_CLASSES_MAPILLARY:", NUM_CLASSES_MAPILLARY)


2025-03-22 11:53:13.292438: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


BASE_PATH: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation
CONF_MATRIX_DIR: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/FINAL_DATEN/confusion_matrices_hyper_big

Dataset-Pfade:
  Train Images      : /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/Mapillary_Vistas/training/images
  Train Annotations : /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/Mapillary_Vistas/training/v2.0/labels_big
  Validation Images : /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/Mapillary_Vistas/validation/images
  Validation Annotations: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/Mapillary_Vistas/validation/v2.0/labels_big

Best Checkpoints JSON: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/FINAL_DATEN/best_checkpoints_Mapillary.json
NUM_CLASSES_MAPILLARY: 124


In [2]:
# Die JSON-Datei enthält die besten Konfigurationen.
# Möglicherweise ist der Wert direkt der Checkpoint-Pfad (als String)
with open(BEST_CHECKPOINTS_JSON, "r", encoding="utf-8") as f:
    best_checkpoints_mapillary = json.load(f)

print("Gefundene Modelle:")
for model_name, info in best_checkpoints_mapillary.items():
    # Falls info ein Dictionary ist, versuchen wir den Pfad daraus zu extrahieren,
    # andernfalls gehen wir davon aus, dass info direkt der Pfad ist.
    if isinstance(info, dict):
        checkpoint_path = info.get("path", "Kein Checkpoint")
    else:
        checkpoint_path = info
    print(f"  {model_name}: {checkpoint_path}")


Gefundene Modelle:
  fcn_resnet101: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/HyperparameterLOG_Big/fcn_resnet101/train_hyper_c75247d6_1_auto_cast=True,batch_size=4,learning_rate=0.0001,max_epochs=100,weight_decay=0.0000_2025-02-22_22-23-45/checkpoint_000099/checkpoint.pkl
  deeplabv3_resnet50: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/HyperparameterLOG_Big/deeplabv3_resnet50/train_hyper_56ebb659_2_auto_cast=True,batch_size=8,learning_rate=0.0000,max_epochs=100,weight_decay=0.0007_2025-02-16_19-30-53/checkpoint_000099/checkpoint.pkl
  fcn_resnet50: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/HyperparameterLOG_Big/fcn_resnet50/train_hyper_4b822bcf_2_auto_cast=True,batch_size=8,learning_rate=0.0000,max_epochs=100,weight_decay=0.0007_2025-02-20_17-09-00/checkpoint_000099/checkpoint.pkl
  deeplabv3_resnet101: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/HyperparameterLOG_Big/deeplabv3_resnet1

In [3]:
# Wir gehen davon aus, dass du einen MapillaryDataLoader in Helper.ml_models definiert hast.
# Dieser liefert hier das Testset, auf dem die Confusion Matrix berechnet wird.
mapillary_loader = MapillaryDataLoader(
    train_images_dir=TRAIN_IMAGES_DIR,
    train_annotations_dir=TRAIN_ANNOTATIONS_DIR,
    val_images_dir=VAL_IMAGES_DIR,
    val_annotations_dir=VAL_ANNOTATIONS_DIR
)
test_dataset = mapillary_loader.test_dataset  # oder ein passendes Attribut
print(f"Test-Datensatz geladen: {len(test_dataset)} Samples")


Test-Datensatz geladen: 2000 Samples


In [4]:
def load_checkpointed_model_ray(model_name, checkpoint_path, num_classes=None):
    """
    Lädt ein Modell aus einem Checkpoint. Das Modell wird über MapillaryTrainedModel instanziiert.
    Der Parameter 'skip_local_load' sorgt dafür, dass keine lokalen Gewichte überschrieben werden.
    """
    # Instanziiere das Modell (passe width, height ggf. an)
    loaded_model = MapillaryTrainedModel(
        model_name=model_name,
        width=520,
        height=520,
        weights_name='',
        skip_local_load=True  # WICHTIG!
    )
    # Lade den Checkpoint mit ray.cloudpickle (alias pickle)
    with open(checkpoint_path, "rb") as fp:
        checkpoint_data = pickle.load(fp)
    # Lade die Modellgewichte
    loaded_model.model.load_state_dict(checkpoint_data["model_state"], strict=True)
    # Falls vorhanden, lade auch den Optimizer-Zustand
    if "optimizer_state" in checkpoint_data:
        loaded_model.optimizer.load_state_dict(checkpoint_data["optimizer_state"])
    return loaded_model

def compute_confusion_matrix(predicted, ground_truth, num_classes):
    """
    Berechnet die Confusion Matrix für ein Bild.
    """
    mask = (ground_truth >= 0) & (ground_truth < num_classes)
    labels = num_classes * ground_truth[mask] + predicted[mask]
    count = torch.bincount(labels, minlength=num_classes**2)
    cm = count.reshape(num_classes, num_classes)
    return cm

def evaluate_and_store_confusion_matrix(model, dataset, num_classes, save_path):
    """
    Evaluiert das Modell über den gesamten Datensatz, summiert die Confusion Matrix und speichert sie.
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.model.to(device)
    confusion_matrix_total = torch.zeros((num_classes, num_classes), dtype=torch.int64).to(device)
    
    model.model.eval()
    with torch.no_grad():
        for i in tqdm(range(len(dataset)), desc="Evaluating Dataset"):
            image, annotation = dataset[i]
            image = image.to(device)
            annotation = annotation.to(device)
            output = model.inference(image)
            predicted = output.argmax(1).squeeze(0)
            cm = compute_confusion_matrix(predicted.cpu(), annotation.cpu(), num_classes)
            confusion_matrix_total += cm.to(device)
    
    torch.save(confusion_matrix_total, save_path)
    print(f"Confusion Matrix gespeichert: {save_path}")


In [6]:
# Iteriere über alle Modelle aus best_checkpoints_mapillary
for model_name, info in best_checkpoints_mapillary.items():
    # Falls info ein Dictionary ist, extrahiere den Pfad, ansonsten setze info direkt als Pfad.
    if isinstance(info, dict):
        checkpoint_path = info.get("path", None)
    else:
        checkpoint_path = info

    if not checkpoint_path or not os.path.isfile(checkpoint_path):
        print(f"[WARNING] Kein gültiger Checkpoint für {model_name}. Überspringe.")
        continue

    print(f"\nEvaluierung von Modell: {model_name}")
    print(f"Verwende Checkpoint: {checkpoint_path}")

    try:
        model_loaded = load_checkpointed_model_ray(model_name, checkpoint_path)
    except Exception as e:
        print(f"[ERROR] Modell {model_name} konnte nicht geladen werden: {e}")
        continue

    # Speicherpfad für die Confusion Matrix
    conf_matrix_save_path = os.path.join(CONF_MATRIX_DIR, f"{model_name}_confusion_matrix.pt")
    evaluate_and_store_confusion_matrix(model_loaded, test_dataset, NUM_CLASSES_MAPILLARY, conf_matrix_save_path)




In [7]:
import glob

print("Liste aller gespeicherten Confusion Matrix-Dateien in:", CONF_MATRIX_DIR)
conf_files = glob.glob(os.path.join(CONF_MATRIX_DIR, "*_confusion_matrix.pt"))
summary = {}
for file in conf_files:
    cm = torch.load(file)
    shape = cm.shape
    model_name = os.path.basename(file).split("_confusion_matrix.pt")[0]
    summary[model_name] = {"path": file, "shape": list(shape)}
    print(f"Modell: {model_name}, Matrix-Shape: {shape}")

# Speichere die Zusammenfassung als JSON
summary_save_path = os.path.join(CONF_MATRIX_DIR, "confusion_matrices_summary.json")
with open(summary_save_path, "w", encoding="utf-8") as f:
    json.dump(summary, f, indent=4)

print(f"Summary gespeichert unter: {summary_save_path}")


Liste aller gespeicherten Confusion Matrix-Dateien in: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/FINAL_DATEN/confusion_matrices_hyper_big
Summary gespeichert unter: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/FINAL_DATEN/confusion_matrices_hyper_big/confusion_matrices_summary.json
