In [1]:
import pandas as pd
import os
import json

In [2]:
main_folder_hyper = "HyperparameterLOG"

for folder_name in os.listdir(main_folder_hyper):
    folder_path = os.path.join(main_folder_hyper, folder_name)
    if os.path.isdir(folder_path):
        print(f"Subfolder name: {folder_name}")

Subfolder name: lraspp_mobilenet_v3_large
Subfolder name: fcn_resnet101
Subfolder name: deeplabv3_resnet50
Subfolder name: fcn_resnet50
Subfolder name: deeplabv3_mobilenet_v3_large
Subfolder name: deeplabv3_resnet101


In [None]:
def load_hyperparameter_runs_as_dict(base_folder: str):
    """
    Lädt die Hyperparameter-Runs und organisiert sie nach Modellen.
    """
    runs_data = {}

    # Durchlaufe alle Modellordner
    for model_folder in os.listdir(base_folder):
        model_path = os.path.join(base_folder, model_folder, "Hyperparameter_Tuning_Deeplabv3")
        
        # Überspringe, wenn der Pfad ungültig ist
        if not os.path.isdir(model_path):
            print(f"Skipping invalid model path: {model_path}")
            continue
        
        # Initialisiere die Modellgruppe
        if model_folder not in runs_data:
            runs_data[model_folder] = {}

        # Durchlaufe alle 'train_hyper_*'-Ordner innerhalb des Modells
        for train_folder in os.listdir(model_path):
            train_folder_path = os.path.join(model_path, train_folder)
            
            # Prüfe, ob es sich um einen 'train_hyper_*'-Ordner handelt
            if not (os.path.isdir(train_folder_path) and train_folder.startswith("train_hyper_")):
                continue
            
            # Pfade zu den benötigten Dateien
            params_file = os.path.join(train_folder_path, "params.json")
            progress_file = os.path.join(train_folder_path, "progress.csv")
            result_file = os.path.join(train_folder_path, "result.json")
            
            # Debugging: Drucke die überprüften Pfade
            #print(f"Checking files in: {train_folder_path}")
            #print(f"  params.json: {'Exists' if os.path.isfile(params_file) else 'Missing'}")
            #print(f"  progress.csv: {'Exists' if os.path.isfile(progress_file) else 'Missing'}")
            #print(f"  result.json: {'Exists' if os.path.isfile(result_file) else 'Missing'}")
            
            # Überprüfe, ob alle Dateien vorhanden sind
            if not (
                os.path.isfile(params_file)
                and os.path.isfile(progress_file)
                and os.path.isfile(result_file)
            ):
                print(f"Skipping folder '{train_folder}' due to missing files.")
                continue
            
            # Lade params.json
            with open(params_file, "r", encoding="utf-8") as f:
                params_dict = json.load(f)
            
            # Lade progress.csv
            progress_df = pd.read_csv(progress_file)
            progress_records = progress_df.to_dict(orient="records")
            
            # Lade result.json
            result_records = []
            with open(result_file, "r", encoding="utf-8") as f:
                for line in f:
                    line = line.strip()
                    if line:
                        result_records.append(json.loads(line))
            
            # Erstelle das Dictionary für diesen Lauf
            run_dict = {
                "id": train_folder,  # Der Name des 'train_hyper_*'-Ordners
                **params_dict,  # Füge alle Parameter aus params.json hinzu
                "result": result_records,  # Ergebnisse aus result.json
                "progress": progress_records,  # Fortschritte aus progress.csv
            }
            
            # Speichere die Daten unter dem Modellnamen und train_hyper_*
            runs_data[model_folder][train_folder] = run_dict

    return runs_data


# Nutzung
main_folder_hyper = "HyperparameterLOG"

# Rufe die Funktion auf
hyperparameter_data = load_hyperparameter_runs_as_dict(main_folder_hyper)

# print(len(hyperparameter_data.keys()))
# print(hyperparameter_data["deeplabv3_mobilenet_v3_large"].keys())


Skipping folder 'train_hyper_659805fc_2_auto_cast=True,batch_size=8,learning_rate=0.0040,max_epochs=100,weight_decay=0.0076_2025-01-21_10-27-28' due to missing files.
Skipping folder 'train_hyper_457ebcd4_2_auto_cast=True,batch_size=8,learning_rate=0.0040,max_epochs=100,weight_decay=0.0076_2025-01-16_10-24-54' due to missing files.
6
dict_keys(['train_hyper_f103f0c0_46_auto_cast=True,batch_size=8,learning_rate=0.0003,max_epochs=100,weight_decay=0.0000_2025-01-19_06-39-52', 'train_hyper_15b7d5e1_16_auto_cast=True,batch_size=8,learning_rate=0.0007,max_epochs=100,weight_decay=0.0000_2025-01-18_17-58-10', 'train_hyper_9b3b8a48_11_auto_cast=False,batch_size=16,learning_rate=0.0021,max_epochs=100,weight_decay=0.0007_2025-01-18_14-49-09', 'train_hyper_433a06cf_4_auto_cast=False,batch_size=16,learning_rate=0.0000,max_epochs=100,weight_decay=0.0014_2025-01-18_12-04-12', 'train_hyper_f1121c9a_41_auto_cast=False,batch_size=8,learning_rate=0.0009,max_epochs=100,weight_decay=0.0085_2025-01-19_04-14

In [4]:
# Neuer Dictionary für sortierte Daten
sorted_hyperparameter_data = {}

# Iteriere über jedes Modell
for model_name, runs in hyperparameter_data.items():
    # Extrahiere die Runs und deren Validation Accuracy
    runs_list = []
    for run_name, run_data in runs.items():
        # Finde den besten Validation Accuracy-Wert im 'progress'
        if "progress" in run_data and run_data["progress"]:
            best_val_acc = max(
                (record.get("val_acc", float("-inf")) for record in run_data["progress"]),
                default=float("-inf")
            )
        else:
            best_val_acc = float("-inf")
        
        # Füge den Run und seine Validation Accuracy zur Liste hinzu
        runs_list.append((run_name, run_data, best_val_acc))
    
    # Sortiere die Runs basierend auf der Validation Accuracy absteigend
    sorted_runs = sorted(runs_list, key=lambda x: x[2], reverse=True)
    
    # Erstelle ein neues Dictionary mit neuen Keys (0 bis n-1)
    sorted_hyperparameter_data[model_name] = {
        str(i): {**run_data, "max_validation_accuracy": best_val_acc}
        for i, (run_name, run_data, best_val_acc) in enumerate(sorted_runs)
    }

# # Überprüfung der Struktur
# print(sorted_hyperparameter_data["deeplabv3_mobilenet_v3_large"]["0"].keys())

# # Überprüfung des Typs von 'result'
# print(type(sorted_hyperparameter_data["deeplabv3_mobilenet_v3_large"]["0"]["result"]))

# # Überprüfung der max_validation_accuracy
# print(sorted_hyperparameter_data["deeplabv3_mobilenet_v3_large"]["0"]["max_validation_accuracy"])

# print(sorted_hyperparameter_data.keys())

for i in range(len(sorted_hyperparameter_data["deeplabv3_resnet50"].keys())):
    print(sorted_hyperparameter_data["deeplabv3_mobilenet_v3_large"][str(i)]["max_validation_accuracy"])


89.87057476799626
89.85961711400111
89.78379219701162
89.75867220424844
89.73367060789238
89.72295972393682
89.33979103699289
89.2675392969648
89.18345589800349
89.1674138499851
89.15679475650249
89.00109616448853
88.9146601357967
88.81714273551573
87.936804254821
87.79769113703121
87.53370506257716
87.37950105891618
87.28338645651526
87.05404263867013
86.82957502341323
85.74887190839044
84.99778306053382
84.97365214337405
84.6110219073262
84.43229987016304
84.37916116810693
84.37767389638584
84.35738554127113
84.30558777829808
84.22380645779235
84.01644113703121
83.81390745391852
83.64821500787535
83.60331882423056
83.48727172108467
83.00345810629602
82.88424755757524
82.43775275637478
82.25089728513048
81.12496208654379
81.11013459942106
80.35634258865097
79.93279740113235
79.7027624941467
79.46586857732748
65.58378474735005
55.90803924907412
42.31113578072453
41.13738838810608
