In [1]:
import pandas as pd
import os
import json

In [2]:
main_folder_hyper = "HyperparameterLOG"

for folder_name in os.listdir(main_folder_hyper):
    folder_path = os.path.join(main_folder_hyper, folder_name)
    if os.path.isdir(folder_path):
        print(f"Subfolder name: {folder_name}")

Subfolder name: lraspp_mobilenet_v3_large
Subfolder name: fcn_resnet101
Subfolder name: deeplabv3_resnet50
Subfolder name: fcn_resnet50
Subfolder name: deeplabv3_mobilenet_v3_large
Subfolder name: deeplabv3_resnet101


In [3]:
def load_hyperparameter_runs_as_dict(base_folder: str):
    """
    Lädt die Hyperparameter-Runs und organisiert sie nach Modellen.
    """
    runs_data = {}

    # Durchlaufe alle Modellordner
    for model_folder in os.listdir(base_folder):
        model_path = os.path.join(base_folder, model_folder, "Hyperparameter_Tuning_Deeplabv3")
        
        # Überspringe, wenn der Pfad ungültig ist
        if not os.path.isdir(model_path):
            print(f"Skipping invalid model path: {model_path}")
            continue
        
        # Initialisiere die Modellgruppe
        if model_folder not in runs_data:
            runs_data[model_folder] = {}

        # Durchlaufe alle 'train_hyper_*'-Ordner innerhalb des Modells
        for train_folder in os.listdir(model_path):
            train_folder_path = os.path.join(model_path, train_folder)
            
            # Prüfe, ob es sich um einen 'train_hyper_*'-Ordner handelt
            if not (os.path.isdir(train_folder_path) and train_folder.startswith("train_hyper_")):
                continue
            
            # Pfade zu den benötigten Dateien
            params_file = os.path.join(train_folder_path, "params.json")
            progress_file = os.path.join(train_folder_path, "progress.csv")
            result_file = os.path.join(train_folder_path, "result.json")
            
            # Debugging: Drucke die überprüften Pfade
            #print(f"Checking files in: {train_folder_path}")
            #print(f"  params.json: {'Exists' if os.path.isfile(params_file) else 'Missing'}")
            #print(f"  progress.csv: {'Exists' if os.path.isfile(progress_file) else 'Missing'}")
            #print(f"  result.json: {'Exists' if os.path.isfile(result_file) else 'Missing'}")
            
            # Überprüfe, ob alle Dateien vorhanden sind
            if not (
                os.path.isfile(params_file)
                and os.path.isfile(progress_file)
                and os.path.isfile(result_file)
            ):
                print(f"Skipping folder '{train_folder}' due to missing files.")
                continue
            
            # Lade params.json
            with open(params_file, "r", encoding="utf-8") as f:
                params_dict = json.load(f)
            
            # Lade progress.csv
            progress_df = pd.read_csv(progress_file)
            progress_records = progress_df.to_dict(orient="records")
            
            # Lade result.json
            result_records = []
            with open(result_file, "r", encoding="utf-8") as f:
                for line in f:
                    line = line.strip()
                    if line:
                        result_records.append(json.loads(line))
            
            # Erstelle das Dictionary für diesen Lauf
            run_dict = {
                "id": train_folder,  # Der Name des 'train_hyper_*'-Ordners
                **params_dict,  # Füge alle Parameter aus params.json hinzu
                "result": result_records,  # Ergebnisse aus result.json
                "progress": progress_records,  # Fortschritte aus progress.csv
            }
            
            # Speichere die Daten unter dem Modellnamen und train_hyper_*
            runs_data[model_folder][train_folder] = run_dict

    return runs_data


# Nutzung
main_folder_hyper = "HyperparameterLOG"

# Rufe die Funktion auf
hyperparameter_data = load_hyperparameter_runs_as_dict(main_folder_hyper)

# print(len(hyperparameter_data.keys()))
# print(hyperparameter_data["deeplabv3_mobilenet_v3_large"].keys())


Skipping folder 'train_hyper_659805fc_2_auto_cast=True,batch_size=8,learning_rate=0.0040,max_epochs=100,weight_decay=0.0076_2025-01-21_10-27-28' due to missing files.
Skipping folder 'train_hyper_457ebcd4_2_auto_cast=True,batch_size=8,learning_rate=0.0040,max_epochs=100,weight_decay=0.0076_2025-01-16_10-24-54' due to missing files.


In [4]:
# Neuer Dictionary für sortierte Daten
sorted_hyperparameter_data = {}

# Iteriere über jedes Modell
for model_name, runs in hyperparameter_data.items():
    # Extrahiere die Runs und deren Validation Accuracy
    runs_list = []
    for run_name, run_data in runs.items():
        # Finde den besten Validation Accuracy-Wert im 'progress'
        if "progress" in run_data and run_data["progress"]:
            best_val_acc = max(
                (record.get("val_acc", float("-inf")) for record in run_data["progress"]),
                default=float("-inf")
            )
        else:
            best_val_acc = float("-inf")
        
        # Bestimme den Pfad zum letzten Checkpoint
        run_folder_path = os.path.join(
            "HyperparameterLOG", model_name, "Hyperparameter_Tuning_Deeplabv3", run_name
        )
        checkpoint_dirs = [
            d for d in os.listdir(run_folder_path)
            if d.startswith("checkpoint_") and os.path.isdir(os.path.join(run_folder_path, d))
        ]
        
        if checkpoint_dirs:
            # Sortiere die Checkpoints numerisch und finde den letzten
            checkpoint_dirs.sort(key=lambda x: int(x.split("_")[1]))
            last_checkpoint = os.path.join(run_folder_path, checkpoint_dirs[-1], "checkpoint.pkl")
        else:
            last_checkpoint = None  # Kein Checkpoint verfügbar
        
        # Füge den Run und seine Validation Accuracy zur Liste hinzu
        runs_list.append((run_name, run_data, best_val_acc, last_checkpoint))
    
    # Sortiere die Runs basierend auf der Validation Accuracy absteigend
    sorted_runs = sorted(runs_list, key=lambda x: x[2], reverse=True)
    
    # Erstelle ein neues Dictionary mit neuen Keys (0 bis n-1)
    sorted_hyperparameter_data[model_name] = {
        str(i): {
            **run_data,
            "max_validation_accuracy": best_val_acc,
            "path": last_checkpoint
        }
        for i, (run_name, run_data, best_val_acc, last_checkpoint) in enumerate(sorted_runs)
    }

# # Überprüfung der Struktur
# print(sorted_hyperparameter_data["deeplabv3_mobilenet_v3_large"]["0"].keys())

# # Überprüfung des Typs von 'result'
# print(type(sorted_hyperparameter_data["deeplabv3_mobilenet_v3_large"]["0"]["result"]))

# # Überprüfung der max_validation_accuracy
# print(sorted_hyperparameter_data["deeplabv3_mobilenet_v3_large"]["0"]["max_validation_accuracy"])

# print(sorted_hyperparameter_data.keys())

# for i in range(len(sorted_hyperparameter_data["deeplabv3_resnet50"].keys())):
#     print(sorted_hyperparameter_data["deeplabv3_mobilenet_v3_large"][str(i)]["max_validation_accuracy"])


for key in sorted_hyperparameter_data.keys():
    if key == "fcn_resnet101":
        continue
    print(f'\n {key}')
    for i in range(3):
        print(sorted_hyperparameter_data[key][str(i)]["max_validation_accuracy"])



 lraspp_mobilenet_v3_large
90.23178158656506
90.08631829743304
89.88125971116598

 deeplabv3_resnet50
91.306187742114
91.26811930633008
91.2159976161083

 fcn_resnet50
91.02365533608616
90.8788871269848
90.56566876676172

 deeplabv3_mobilenet_v3_large
89.87057476799626
89.85961711400111
89.78379219701162

 deeplabv3_resnet101
91.31428991422248
91.26546602954323
91.22656150227746


In [5]:
print(sorted_hyperparameter_data["deeplabv3_resnet50"]["0"].keys())

dict_keys(['id', 'auto_cast', 'batch_size', 'learning_rate', 'max_epochs', 'weight_decay', 'result', 'progress', 'max_validation_accuracy', 'path'])


In [6]:
import torch
import ray.cloudpickle as pickle  # important for reading the checkpoint.pkl
import pandas as pd

from Helper.ml_models import TrainedModel, K_Fold_Dataset

2025-01-21 15:38:44.038831: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [7]:
# STEP 2: DEFINING YOUR METRIC COMPUTATION FUNCTIONS
# --------------------------------------------------
# We'll reuse your previously defined metrics exactly as given:

def compute_confusion_matrix(predicted, ground_truth, num_classes):
    mask = (ground_truth >= 0) & (ground_truth < num_classes)
    label = num_classes * ground_truth[mask] + predicted[mask]
    count = torch.bincount(label, minlength=num_classes**2)
    confusion_matrix = count.reshape(num_classes, num_classes)
    return confusion_matrix

def compute_miou(confusion_matrix):
    intersection = torch.diag(confusion_matrix)
    ground_truth_set = confusion_matrix.sum(1)
    predicted_set = confusion_matrix.sum(0)
    union = ground_truth_set + predicted_set - intersection
    IoU = intersection / (union + 1e-6)  # Avoid division by zero
    mIoU = torch.mean(IoU)
    return mIoU.item(), IoU

def compute_mean_pixel_accuracy(confusion_matrix):
    true_positive = torch.diag(confusion_matrix)
    total_pixels = confusion_matrix.sum(1)
    pixel_accuracy = true_positive / (total_pixels + 1e-6)
    mPA = torch.mean(pixel_accuracy)
    return mPA.item(), pixel_accuracy

def compute_fwiou(confusion_matrix):
    total_pixels = confusion_matrix.sum()
    ground_truth_set = confusion_matrix.sum(1)
    intersection = torch.diag(confusion_matrix)
    union = ground_truth_set + confusion_matrix.sum(0) - intersection
    IoU = intersection / (union + 1e-6)
    FWIoU = (ground_truth_set * IoU) / total_pixels
    FWIoU = FWIoU.sum()
    return FWIoU.item()

def compute_dice_coefficient(confusion_matrix):
    intersection = torch.diag(confusion_matrix)
    ground_truth_set = confusion_matrix.sum(1)
    predicted_set = confusion_matrix.sum(0)
    dice = (2 * intersection) / (ground_truth_set + predicted_set + 1e-6)
    mean_dice = torch.mean(dice)
    return mean_dice.item(), dice


In [8]:
# STEP 3: FUNCTION TO LOAD A CHECKPOINT USING RAY.CLOUDPICKLE
# -----------------------------------------------------------
# This is the key part: your hyperparameter search used ray.cloudpickle to store
# checkpoint.pkl files. Below, we replicate how you loaded them in your training loop.

def load_checkpointed_model_ray(model_name: str, checkpoint_path: str) -> TrainedModel:
    """
    Loads a model from the given Ray Tune 'checkpoint.pkl' (created with ray.cloudpickle).
    It returns an instance of TrainedModel with its model state_dict restored.

    :param model_name: The string identifying which segmentation model to instantiate 
                       (e.g., 'deeplabv3_resnet101', etc.).
    :param checkpoint_path: Full path to the 'checkpoint.pkl' file produced by Ray Tune.
    :return: A TrainedModel object with the loaded state.
    """
    if not os.path.isfile(checkpoint_path):
        raise FileNotFoundError(f"Checkpoint not found at: {checkpoint_path}")

    # 1) Create the TrainedModel object (as you did in your training code).
    #    Adjust the constructor arguments if needed. 
    #    (Here we use placeholders for image size & folder_path, which you can adapt.)
    loaded_model = TrainedModel(
        model_name=model_name,
        height=2048,
        width=1024,
        weights_name="",  # Not relevant if we load from checkpoint
        folder_path="",   # Not relevant here
        start_epoch="latest", 
        skip_local_load=True
    )

    # 2) Load the checkpoint using ray.cloudpickle
    with open(checkpoint_path, "rb") as fp:
        checkpoint_data = pickle.load(fp)

    # 3) Restore the model and optimizer states
    #    (If you only need the model for inference, you don't have to restore the optimizer.)
    loaded_model.model.load_state_dict(checkpoint_data["model_state"])
    if "optimizer_state" in checkpoint_data:
        loaded_model.optimizer.load_state_dict(checkpoint_data["optimizer_state"])
    
    # 4) Switch to evaluation mode
    loaded_model.model.eval()
    
    # Optionally, return the checkpointed epoch if you need it:
    # epoch = checkpoint_data.get("epoch", 0)
    
    return loaded_model


In [9]:
# STEP 4: PREPARE (OR LOAD) THE DATASET FOR EVALUATION
# ----------------------------------------------------
# We'll assume you have a K_Fold_Dataset or similar dataset for evaluating your model.

k_fold_dataset_eval = K_Fold_Dataset(
    image_dir="CityscapesDaten/images",
    annotation_dir="CityscapesDaten/semantic",
    k_fold_csv_dir="Daten/CityscapesDaten",
    leave_out_fold=0
)
k_fold_dataset_eval.check_for_data_leaks()

# For the final evaluation, let's assume we use the "test_dataset"
test_dataset = k_fold_dataset_eval.test_dataset


No data leaks found.


In [10]:
# STEP 5: DEFINE AN EVALUATION FUNCTION
# -------------------------------------
# This function runs inference over the test dataset and computes your desired metrics.

def evaluate_model(model: TrainedModel, dataset, num_classes: int) -> dict:
    """
    Runs inference on the given dataset and computes segmentation metrics.
    Returns a dictionary with these metrics.
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.model.to(device)
    
    confusion_matrix_total = torch.zeros((num_classes, num_classes), dtype=torch.int64)
    
    # Inference loop
    with torch.no_grad():
        for i in range(len(dataset)):
            image, annotation = dataset[i]
            
            # Move data to device
            image = image.to(device)
            annotation = annotation.to(device)
            
            # Model inference -> shape [1, num_classes, H, W]
            output = model.inference(image)
            predicted = output.argmax(1).squeeze(0)
            
            # Update confusion matrix
            conf_mat = compute_confusion_matrix(predicted.cpu(), annotation.cpu(), num_classes)
            confusion_matrix_total += conf_mat
    
    # Compute the final metrics
    miou, iou_per_class = compute_miou(confusion_matrix_total)
    mpa, pa_per_class = compute_mean_pixel_accuracy(confusion_matrix_total)
    fwiou = compute_fwiou(confusion_matrix_total)
    dice_mean, dice_per_class = compute_dice_coefficient(confusion_matrix_total)
    
    metrics = {
        "mIoU": miou,
        "mPA": mpa,
        "FWIoU": fwiou,
        "Dice_Mean": dice_mean,
        "IoU_per_class": iou_per_class.tolist(),
        "PA_per_class": pa_per_class.tolist(),
        "Dice_per_class": dice_per_class.tolist()
    }
    return metrics


In [11]:
# STEP 6: USE YOUR 'sorted_hyperparameter_data' DICTIONARY
# ---------------------------------------------------------
# We'll assume that 'sorted_hyperparameter_data' is exactly as you described:
# {
#   "deeplabv3_mobilenet_v3_large": {
#       "0": {
#           "path": ".../checkpoint.pkl",
#           "max_validation_accuracy": ...,
#           ...
#       },
#       "1": { ... },
#       ...
#   },
#   "deeplabv3_resnet50": { ... },
#   ...
# }
#
# We'll now iterate through the dictionary, load each model checkpoint, and evaluate it.

# Suppose you have:
# NUM_CLASSES in Cityscapes typically is 19 or 20
NUM_CLASSES = 20

evaluation_results = {}

for model_name, runs_dict in sorted_hyperparameter_data.items():
    # If you want to skip certain models (like "fcn_resnet101"), do it here:
    if model_name == "fcn_resnet101":
        continue
    
    # We'll evaluate just the best run (key = "0") for demonstration,
    # but you can easily loop over runs_dict.keys() to evaluate all.
    best_run_info = runs_dict["0"]  # Best run is at index "0"
    checkpoint_path = best_run_info.get("path", None)
    if not checkpoint_path:
        print(f"No checkpoint path found for {model_name} in run '0'. Skipping.")
        continue
    
    print(f"\nEvaluating model: {model_name}")
    print(f"Checkpoint path: {checkpoint_path}")
    
    # STEP 6a: Load the model
    try:
        model_loaded = load_checkpointed_model_ray(model_name, checkpoint_path)
    except FileNotFoundError:
        print(f"Checkpoint file not found at {checkpoint_path}. Skipping.")
        continue
    
    # STEP 6b: Evaluate the model
    metrics = evaluate_model(model_loaded, test_dataset, NUM_CLASSES)
    
    # STEP 6c: Print or store the results
    evaluation_results[model_name] = metrics
    print(f"Results for {model_name}:")
    for k, v in metrics.items():
        if isinstance(v, list):
            # For lists (per-class metrics), you might just show length:
            print(f"  {k}: [list of length {len(v)}]")
        else:
            print(f"  {k}: {v:.4f}" if isinstance(v, float) else f"  {k}: {v}")



Evaluating model: lraspp_mobilenet_v3_large
Checkpoint path: HyperparameterLOG/lraspp_mobilenet_v3_large/Hyperparameter_Tuning_Deeplabv3/train_hyper_f045fc27_12_auto_cast=True,batch_size=8,learning_rate=0.0008,max_epochs=100,weight_decay=0.0000_2025-01-19_15-06-55/checkpoint_000099/checkpoint.pkl
Using CUDA GPU
Model loaded: lraspp_mobilenet_v3_large | Device: cuda 
Checkpoint file not found at HyperparameterLOG/lraspp_mobilenet_v3_large/Hyperparameter_Tuning_Deeplabv3/train_hyper_f045fc27_12_auto_cast=True,batch_size=8,learning_rate=0.0008,max_epochs=100,weight_decay=0.0000_2025-01-19_15-06-55/checkpoint_000099/checkpoint.pkl. Skipping.

Evaluating model: deeplabv3_resnet50
Checkpoint path: HyperparameterLOG/deeplabv3_resnet50/Hyperparameter_Tuning_Deeplabv3/train_hyper_339081a1_34_auto_cast=True,batch_size=8,learning_rate=0.0001,max_epochs=100,weight_decay=0.0001_2025-01-15_14-54-45/checkpoint_000099/checkpoint.pkl
Using CUDA GPU
Model loaded: deeplabv3_resnet50 | Device: cuda 
Chec