In [1]:
import os
import json
import pandas as pd
import torch
import ray.cloudpickle as pickle

In [2]:
main_folder_hyper = "HyperparameterLOG"

for folder_name in os.listdir(main_folder_hyper):
    folder_path = os.path.join(main_folder_hyper, folder_name)
    if os.path.isdir(folder_path):
        print(f"Subfolder name: {folder_name}")

Subfolder name: lraspp_mobilenet_v3_large
Subfolder name: fcn_resnet101
Subfolder name: deeplabv3_resnet50
Subfolder name: fcn_resnet50
Subfolder name: deeplabv3_mobilenet_v3_large
Subfolder name: deeplabv3_resnet101


In [3]:
# -----------------------------
# 1) DEFINE ABSOLUTE BASE PATH
# -----------------------------
BASE_PATH = "/home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation"
# Construct absolute path to your HyperparameterLOG
main_folder_hyper = os.path.join(BASE_PATH, "HyperparameterLOG")
main_folder_hyper = os.path.abspath(main_folder_hyper)
print("DEBUG: main_folder_hyper =", main_folder_hyper)
print("DEBUG: Exists on disk?   =", os.path.isdir(main_folder_hyper))

# (Optional) Print top-level subfolders for confirmation:
print("\nDEBUG: Subfolders in HyperparameterLOG:")
for item in os.listdir(main_folder_hyper):
    if os.path.isdir(os.path.join(main_folder_hyper, item)):
        print(" -", item)

# -----------------------------
# 2) LOAD HYPERPARAMETER RUNS
# -----------------------------
def load_hyperparameter_runs_as_dict(base_folder: str):
    runs_data = {}
    for model_folder in os.listdir(base_folder):
        model_path = os.path.join(base_folder, model_folder, "Hyperparameter_Tuning_Deeplabv3")
        if not os.path.isdir(model_path):
            print(f"Skipping invalid model path: {model_path}")
            continue
        
        if model_folder not in runs_data:
            runs_data[model_folder] = {}
        
        for train_folder in os.listdir(model_path):
            train_folder_path = os.path.join(model_path, train_folder)
            # Looking for folders that start with "train_hyper_"
            if not (os.path.isdir(train_folder_path) and train_folder.startswith("train_hyper_")):
                continue
            
            # Check if required files exist
            params_file = os.path.join(train_folder_path, "params.json")
            progress_file = os.path.join(train_folder_path, "progress.csv")
            result_file = os.path.join(train_folder_path, "result.json")
            
            if not (
                os.path.isfile(params_file)
                and os.path.isfile(progress_file)
                and os.path.isfile(result_file)
            ):
                print(f"Skipping folder '{train_folder}' due to missing params/progress/result.")
                continue
            
            with open(params_file, "r", encoding="utf-8") as f:
                params_dict = json.load(f)

            progress_df = pd.read_csv(progress_file)
            progress_records = progress_df.to_dict(orient="records")

            result_records = []
            with open(result_file, "r", encoding="utf-8") as f:
                for line in f:
                    line = line.strip()
                    if line:
                        result_records.append(json.loads(line))
            
            run_dict = {
                "id": train_folder,
                **params_dict,
                "result": result_records,
                "progress": progress_records,
            }
            
            runs_data[model_folder][train_folder] = run_dict

    return runs_data

hyperparameter_data = load_hyperparameter_runs_as_dict(main_folder_hyper)



DEBUG: main_folder_hyper = /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/HyperparameterLOG
DEBUG: Exists on disk?   = True

DEBUG: Subfolders in HyperparameterLOG:
 - lraspp_mobilenet_v3_large
 - fcn_resnet101
 - deeplabv3_resnet50
 - fcn_resnet50
 - deeplabv3_mobilenet_v3_large
 - deeplabv3_resnet101
Skipping folder 'train_hyper_457ebcd4_2_auto_cast=True,batch_size=8,learning_rate=0.0040,max_epochs=100,weight_decay=0.0076_2025-01-16_10-24-54' due to missing params/progress/result.


In [4]:
# -----------------------------
# 3) SORT + FIND CHECKPOINTS
# -----------------------------
sorted_hyperparameter_data = {}

for model_name, runs_dict in hyperparameter_data.items():
    runs_list = []
    
    for run_name, run_data in runs_dict.items():
        # Determine best validation accuracy from the 'progress' CSV
        if "progress" in run_data and run_data["progress"]:
            best_val_acc = max(
                (r.get("val_acc", float("-inf")) for r in run_data["progress"]),
                default=float("-inf")
            )
        else:
            best_val_acc = float("-inf")
        
        # Build absolute path to this particular run folder
        run_folder_path = os.path.join(
            main_folder_hyper, model_name, "Hyperparameter_Tuning_Deeplabv3", run_name
        )
        run_folder_path = os.path.abspath(run_folder_path)

        # DEBUG: Print out the run folder path
        # print(f"DEBUG: run_folder_path for {run_name} = {run_folder_path}")
        
        # Gather checkpoint directories
        if os.path.isdir(run_folder_path):
            checkpoint_dirs = [
                d for d in os.listdir(run_folder_path)
                if d.startswith("checkpoint_") 
                   and os.path.isdir(os.path.join(run_folder_path, d))
            ]
        else:
            checkpoint_dirs = []
        
        # Debug printing
        print(f"\n[DEBUG] For model='{model_name}' run='{run_name}', found checkpoint dirs:")
        print("       ", checkpoint_dirs)

        # Pick the *last* checkpoint folder numerically
        if checkpoint_dirs:
            checkpoint_dirs.sort(key=lambda x: int(x.split("_")[1]))  # numeric sort by the suffix
            last_checkpoint_dir = checkpoint_dirs[-1]
            last_checkpoint = os.path.join(run_folder_path, last_checkpoint_dir, "checkpoint.pkl")
            last_checkpoint = os.path.abspath(last_checkpoint)
        else:
            last_checkpoint = None
        
        runs_list.append((run_name, run_data, best_val_acc, last_checkpoint))
    
    # Sort all runs by best_val_acc descending
    sorted_runs = sorted(runs_list, key=lambda x: x[2], reverse=True)
    
    # Build a new dictionary with simple int-string keys ("0", "1", ...)
    sorted_hyperparameter_data[model_name] = {
        str(i): {
            **run_data,
            "max_validation_accuracy": best_val_acc,
            "path": last_checkpoint
        }
        for i, (run_name, run_data, best_val_acc, last_checkpoint) in enumerate(sorted_runs)
    }



[DEBUG] For model='lraspp_mobilenet_v3_large' run='train_hyper_db3db980_37_auto_cast=True,batch_size=8,learning_rate=0.0046,max_epochs=100,weight_decay=0.0000_2025-01-20_02-39-29', found checkpoint dirs:
        ['checkpoint_000003', 'checkpoint_000002', 'checkpoint_000004']

[DEBUG] For model='lraspp_mobilenet_v3_large' run='train_hyper_fa4365d7_50_auto_cast=True,batch_size=8,learning_rate=0.0004,max_epochs=100,weight_decay=0.0005_2025-01-20_05-29-34', found checkpoint dirs:
        ['checkpoint_000003', 'checkpoint_000002', 'checkpoint_000004']

[DEBUG] For model='lraspp_mobilenet_v3_large' run='train_hyper_d290127c_49_auto_cast=True,batch_size=8,learning_rate=0.0008,max_epochs=100,weight_decay=0.0000_2025-01-20_05-24-05', found checkpoint dirs:
        ['checkpoint_000014', 'checkpoint_000013', 'checkpoint_000012']

[DEBUG] For model='lraspp_mobilenet_v3_large' run='train_hyper_cf20ac8c_9_auto_cast=False,batch_size=16,learning_rate=0.0000,max_epochs=100,weight_decay=0.0000_2025-01-

In [5]:

# -----------------------------
# 4) PRINT EXAMPLE CHECKPOINT PATH
# -----------------------------
# Example: print the best run's path for 'lraspp_mobilenet_v3_large'
# (Change "1" to "0" or "2" as needed, depending on how many runs you have.)
if "lraspp_mobilenet_v3_large" in sorted_hyperparameter_data:
    if "1" in sorted_hyperparameter_data["lraspp_mobilenet_v3_large"]:
        print("\nDEBUG: 'lraspp_mobilenet_v3_large' run #1 checkpoint path:")
        print(sorted_hyperparameter_data['lraspp_mobilenet_v3_large']['1']['path'])
    else:
        print("\nDEBUG: 'lraspp_mobilenet_v3_large' run #1 does not exist in dictionary.")

# You can print or debug other model-run combos similarly:
# print(sorted_hyperparameter_data["deeplabv3_resnet50"]["0"]["path"])



DEBUG: 'lraspp_mobilenet_v3_large' run #1 checkpoint path:
/home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/HyperparameterLOG/lraspp_mobilenet_v3_large/Hyperparameter_Tuning_Deeplabv3/train_hyper_6b07f155_24_auto_cast=True,batch_size=8,learning_rate=0.0009,max_epochs=100,weight_decay=0.0000_2025-01-19_21-40-50/checkpoint_000099/checkpoint.pkl


In [6]:
print(sorted_hyperparameter_data["deeplabv3_resnet50"]["0"].keys())

dict_keys(['id', 'auto_cast', 'batch_size', 'learning_rate', 'max_epochs', 'weight_decay', 'result', 'progress', 'max_validation_accuracy', 'path'])


In [7]:
# -----------------------------
# 5) REMAINDER OF YOUR EVAL CODE
# -----------------------------
import pandas as pd
from Helper.ml_models import TrainedModel, K_Fold_Dataset

def compute_confusion_matrix(predicted, ground_truth, num_classes):
    mask = (ground_truth >= 0) & (ground_truth < num_classes)
    label = num_classes * ground_truth[mask] + predicted[mask]
    count = torch.bincount(label, minlength=num_classes**2)
    confusion_matrix = count.reshape(num_classes, num_classes)
    return confusion_matrix

def compute_miou(confusion_matrix):
    intersection = torch.diag(confusion_matrix)
    ground_truth_set = confusion_matrix.sum(1)
    predicted_set = confusion_matrix.sum(0)
    union = ground_truth_set + predicted_set - intersection
    IoU = intersection / (union + 1e-6)
    mIoU = torch.mean(IoU)
    return mIoU.item(), IoU

def compute_mean_pixel_accuracy(confusion_matrix):
    true_positive = torch.diag(confusion_matrix)
    total_pixels = confusion_matrix.sum(1)
    pixel_accuracy = true_positive / (total_pixels + 1e-6)
    mPA = torch.mean(pixel_accuracy)
    return mPA.item(), pixel_accuracy

def compute_fwiou(confusion_matrix):
    total_pixels = confusion_matrix.sum()
    ground_truth_set = confusion_matrix.sum(1)
    intersection = torch.diag(confusion_matrix)
    union = ground_truth_set + confusion_matrix.sum(0) - intersection
    IoU = intersection / (union + 1e-6)
    FWIoU = (ground_truth_set * IoU) / total_pixels
    FWIoU = FWIoU.sum()
    return FWIoU.item()

def compute_dice_coefficient(confusion_matrix):
    intersection = torch.diag(confusion_matrix)
    ground_truth_set = confusion_matrix.sum(1)
    predicted_set = confusion_matrix.sum(0)
    dice = (2 * intersection) / (ground_truth_set + predicted_set + 1e-6)
    mean_dice = torch.mean(dice)
    return mean_dice.item(), dice


2025-02-06 10:52:49.496042: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [8]:
def load_checkpointed_model_ray(model_name: str, checkpoint_path: str) -> TrainedModel:
    print("DEBUG => In load_checkpointed_model_ray")
    print("        model_name:", model_name)
    print("        checkpoint_path:", checkpoint_path)
    print("        file exists?", os.path.isfile(checkpoint_path))

    if not os.path.isfile(checkpoint_path):
        print(f"\n[ERROR] load_checkpointed_model_ray: File not found => {checkpoint_path}")
        raise FileNotFoundError(f"Checkpoint not found at: {checkpoint_path}")

    loaded_model =  TrainedModel(
        model_name=model_name,
        width=2048,
        height=1024,
        weights_name="debug_weights",
        folder_path="/tmp/debug_model",
        start_epoch="latest",
        skip_local_load=True  # skip .pth loading
    )

    with open(checkpoint_path, "rb") as fp:
        checkpoint_data = pickle.load(fp)

    loaded_model.model.load_state_dict(checkpoint_data["model_state"])
    if "optimizer_state" in checkpoint_data:
        loaded_model.optimizer.load_state_dict(checkpoint_data["optimizer_state"])
    
    loaded_model.model.eval()
    return loaded_model


In [9]:
k_fold_dataset_eval = K_Fold_Dataset(
    image_dir="CityscapesDaten/images",
    annotation_dir="CityscapesDaten/semantic",
    k_fold_csv_dir="Daten/CityscapesDaten",
    leave_out_fold=0
)
k_fold_dataset_eval.check_for_data_leaks()
test_dataset = k_fold_dataset_eval.test_dataset

No data leaks found.


In [10]:
def evaluate_model(model: TrainedModel, dataset, num_classes: int) -> dict:
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.model.to(device)
    confusion_matrix_total = torch.zeros((num_classes, num_classes), dtype=torch.int64)
    
    with torch.no_grad():
        for i in range(len(dataset)):
            image, annotation = dataset[i]
            image = image.to(device)
            annotation = annotation.to(device)
            output = model.inference(image)
            predicted = output.argmax(1).squeeze(0)
            conf_mat = compute_confusion_matrix(predicted.cpu(), annotation.cpu(), num_classes)
            confusion_matrix_total += conf_mat
    
    miou, iou_per_class = compute_miou(confusion_matrix_total)
    mpa, pa_per_class = compute_mean_pixel_accuracy(confusion_matrix_total)
    fwiou = compute_fwiou(confusion_matrix_total)
    dice_mean, dice_per_class = compute_dice_coefficient(confusion_matrix_total)
    
    metrics = {
        "mIoU": miou,
        "mPA": mpa,
        "FWIoU": fwiou,
        "Dice_Mean": dice_mean,
        "IoU_per_class": iou_per_class.tolist(),
        "PA_per_class": pa_per_class.tolist(),
        "Dice_per_class": dice_per_class.tolist()
    }
    return metrics



In [11]:
NUM_CLASSES = 20
evaluation_results = {}

# Example loop over models in 'sorted_hyperparameter_data'
for model_name, runs_dict in sorted_hyperparameter_data.items():
    # # (Optional) skip certain models
    # if model_name == "fcn_resnet101":
    #     continue

    # Evaluate only the top run ("0") as an example
    best_run_info = runs_dict["0"]  # the best run
    checkpoint_path = best_run_info.get("path", None)
    if not checkpoint_path:
        print(f"\n[WARNING] No checkpoint path found for {model_name} run '0'. Skipping.")
        continue
    
    print(f"\nEvaluating model: {model_name}")
    print(f"Checkpoint path:  {checkpoint_path}")

    try:
        model_loaded = load_checkpointed_model_ray(model_name, checkpoint_path)
    except FileNotFoundError as e:
        print("[WARNING] The exact error was:", e)  # Print the actual error message
        break


    # Evaluate the model on the test set
    metrics = evaluate_model(model_loaded, test_dataset, NUM_CLASSES)
    evaluation_results[model_name] = metrics
    print(f"Results for {model_name}:")
    for k, v in metrics.items():
        if isinstance(v, list):
            print(f"  {k}: [list of length {len(v)}]")
        else:
            print(f"  {k}: {v:.4f}" if isinstance(v, float) else f"  {k}: {v}")





Evaluating model: lraspp_mobilenet_v3_large
Checkpoint path:  /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/HyperparameterLOG/lraspp_mobilenet_v3_large/Hyperparameter_Tuning_Deeplabv3/train_hyper_f045fc27_12_auto_cast=True,batch_size=8,learning_rate=0.0008,max_epochs=100,weight_decay=0.0000_2025-01-19_15-06-55/checkpoint_000099/checkpoint.pkl
DEBUG => In load_checkpointed_model_ray
        model_name: lraspp_mobilenet_v3_large
        checkpoint_path: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/HyperparameterLOG/lraspp_mobilenet_v3_large/Hyperparameter_Tuning_Deeplabv3/train_hyper_f045fc27_12_auto_cast=True,batch_size=8,learning_rate=0.0008,max_epochs=100,weight_decay=0.0000_2025-01-19_15-06-55/checkpoint_000099/checkpoint.pkl
        file exists? True
Using CUDA GPU
Model loaded: lraspp_mobilenet_v3_large | Device: cuda 
Skipping local .pth load logic (likely using external Ray checkpoint).
Results for lraspp_mobilenet_v3_large:
  mIoU: 0

In [12]:
import json

save_path = "/home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/FINAL_DATEN/evaluation_hyperparameter_cityscapes.json"

# 2) Ensure the directory exists. If not, create it.
os.makedirs(os.path.dirname(save_path), exist_ok=True)

# 3) Write the evaluation_results dictionary to JSON.
with open(save_path, "w") as f:
    json.dump(evaluation_results, f, indent=4)

print(f"Evaluation results saved to: {save_path}")

Evaluation results saved to: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/FINAL_DATEN/evaluation_hyperparameter_cityscapes.json


In [13]:
best_checkpoints = {}
for model_name, runs_dict in sorted_hyperparameter_data.items():
    # The best run is at index "0"
    best_run_info = runs_dict["0"]
    best_checkpoint_path = best_run_info.get("path", None)
    
    # Store it in our dictionary
    best_checkpoints[model_name] = best_checkpoint_path

# 2) Specify where you want to save the JSON
save_path = "/home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/FINAL_DATEN/best_checkpoints.json"

# 3) Ensure the directory exists
os.makedirs(os.path.dirname(save_path), exist_ok=True)

# 4) Write the dictionary to JSON
with open(save_path, "w", encoding="utf-8") as f:
    json.dump(best_checkpoints, f, indent=4)

print(f"Best checkpoint paths saved to: {save_path}")


Best checkpoint paths saved to: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/FINAL_DATEN/best_checkpoints.json


In [14]:
import os
import shutil
import json

# Definiere das Zielverzeichnis
target_dir = "/home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/FINAL_DATEN/hyper_city"
os.makedirs(target_dir, exist_ok=True)

# Lade die JSON-Datei mit den besten Checkpoints
best_checkpoints_path = "/home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/FINAL_DATEN/best_checkpoints.json"

with open(best_checkpoints_path, "r", encoding="utf-8") as f:
    best_checkpoints = json.load(f)

# Kopiere die besten Checkpoints in das Zielverzeichnis
for model_name, checkpoint_path in best_checkpoints.items():
    if checkpoint_path and os.path.isfile(checkpoint_path):
        # Bestimme den Zielpfad
        dest_checkpoint = os.path.join(target_dir, f"{model_name}_best_checkpoint.pkl")

        # Kopiere die Datei
        shutil.copy2(checkpoint_path, dest_checkpoint)
        print(f"✅ Checkpoint für {model_name} gesichert: {dest_checkpoint}")
    else:
        print(f"⚠️ Kein gültiger Checkpoint für {model_name} gefunden!")

print("✅ Alle Checkpoints wurden gesichert!")


✅ Checkpoint für lraspp_mobilenet_v3_large gesichert: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/FINAL_DATEN/hyper_city/lraspp_mobilenet_v3_large_best_checkpoint.pkl
✅ Checkpoint für fcn_resnet101 gesichert: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/FINAL_DATEN/hyper_city/fcn_resnet101_best_checkpoint.pkl
✅ Checkpoint für deeplabv3_resnet50 gesichert: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/FINAL_DATEN/hyper_city/deeplabv3_resnet50_best_checkpoint.pkl
✅ Checkpoint für fcn_resnet50 gesichert: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/FINAL_DATEN/hyper_city/fcn_resnet50_best_checkpoint.pkl
✅ Checkpoint für deeplabv3_mobilenet_v3_large gesichert: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/FINAL_DATEN/hyper_city/deeplabv3_mobilenet_v3_large_best_checkpoint.pkl
✅ Checkpoint für deeplabv3_resnet101 gesichert: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_