In [1]:
import pandas as pd
from pathlib import Path

metrics_dir = Path("experiments/Metrics")
assert metrics_dir.exists()
metas_dir = Path("experiments/Meta")
assert metas_dir.exists()

In [2]:
import re
from typing import Literal



metric_names = ["LPIPS", "PSNR", "SSIM", "Ref_LPIPS", "Ref_PSNR", "Ref_SSIM", "FID_0", "FID_4", "FID_8"]
experiment_names = ["Finetune_Lora", "Neurad_Base_Diffusion", "Neurad_Base_Diffusion_Sweep", "Neurad_Checkpoint_Training", "Neurad_Baseline", "Neurad_Finetuned_Diffusion"]
ranks = ["4", "128"]

neurad_experiments = {
    "Neurad_Base_Diffusion_Sweep", "Neurad_Base_Diffusion", "Neurad_Checkpoint_Training", "Neurad_Baseline", "Neurad_Finetuned_Diffusion"
}
finetune_experiments = {
    "Finetune_Lora"
}


def get_all_metrics_infos(experiment_name):
    infos = []

    if experiment_name == "Finetune_Lora":
        for rank in ranks:
            for metric in metric_names:
                path = metrics_dir / f"{experiment_name}-Rank{rank}-{metric}.csv"
                if path.exists():
                    infos.append({"path": path, "rank": rank, "metric": metric})
        
    else:
        for metric in metric_names:
            path = metrics_dir / f"{experiment_name}-{metric}.csv"
            if path.exists():
                infos.append({"path": path, "metric": metric})

    return infos

    

def get_metric_by_name(experiment_name, metric):
    if experiment_name == "Finetune_Lora":
        for rank in ranks:
            path = metrics_dir / f"{experiment_name}-Rank{rank}-{metric}.csv"
            if path.exists():
                return pd.read_csv(path)

    path = metrics_dir / f"{experiment_name}-{metric}.csv"    
    if not path.exists():
        return None
    return pd.read_csv(path)


parse_run_pattern1 = re.compile(r"\d+_\d+_(?P<run>\d+).*")
parse_run_pattern2 = re.compile(r"(?P<run>\d+)_\d+_\d+.*")

def filter_column_name(col_name, pattern, value_name = "run"):
    pattern_match = pattern.match(col_name)
    if not pattern_match:
        return col_name

    group_dict = pattern.match(col_name).groupdict()
    return group_dict[value_name]

def parse_run_from_column_name(col_name, pattern = parse_run_pattern1):
    pattern_match = pattern.match(col_name)
    if not pattern_match:
        return col_name

    run = pattern_match.group("run")
    return "run_" + run

def clean_metrics(experiment_name, metrics):
    columns_to_remove = [
        column for column in metrics.columns
        if (column.endswith("__MIN") or column.endswith("__MAX"))
    ] 
    metrics = metrics.drop(columns_to_remove, axis=1)
    metrics = metrics.ffill(axis=0)

    if experiment_name in neurad_experiments:
        pattern = parse_run_pattern1    
    else:
        pattern = parse_run_pattern2

    metrics.columns = list(map(lambda c: parse_run_from_column_name(c, pattern), metrics.columns))

    return metrics 

ex_name = "Neurad_Finetuned_Diffusion"
ex_metric = "LPIPS"

metrics = get_metric_by_name(ex_name, ex_metric)
metrics = clean_metrics(ex_name, metrics)
metrics

Unnamed: 0,Step,run_12146255,run_12146254,run_12146251,run_12146253,run_12146252,run_12146250,run_12146249,run_12146248,run_12146247,run_12146246,run_12146245,run_12146244
0,5000,0.164532,0.229889,0.221506,0.184695,0.156776,0.189177,0.1665,0.222872,0.198615,0.195498,0.229031,0.200974
1,10000,0.156522,0.210406,0.212082,0.181136,0.159866,0.177039,0.364607,0.211682,0.180468,0.353962,0.212767,0.179689
2,15000,0.14407,0.201924,0.200672,0.172626,0.143719,0.168676,0.554299,0.199954,0.167611,0.353962,0.200343,0.170008
3,20000,0.165186,0.195418,0.197309,0.166059,0.16612,0.164382,0.572561,0.197184,0.1659,0.353962,0.195899,0.166445
4,25000,0.370172,0.193881,0.192417,0.161945,0.191269,0.162209,0.637893,0.19015,0.162846,0.353962,0.190852,0.161966
5,30000,0.377656,0.191825,0.483814,0.163782,0.191269,0.162221,0.594238,0.192888,0.159024,0.353962,0.190852,0.159033
6,35000,0.368122,0.18962,0.483814,0.164174,0.191269,0.157841,0.594238,0.192694,0.162861,0.353962,0.188724,0.157851
7,40000,0.368122,0.188671,0.483814,0.163171,0.191269,0.15768,0.594238,0.186542,0.404629,0.353962,0.188499,0.158182


In [3]:
run_column_pattern = re.compile(r"run_(?P<run>\d+)")

def select_columns_matching_pattern(df, pattern):
    return df.filter(regex=pattern)

In [4]:
improve_direction = {
    "SSIM": "max",
    "PSNR": "max",
    "LPIPS": "min",
    "Ref_SSIM": "max",
    "Ref_PSNR": "max",
    "Ref_LPIPS": "min",
    "FID_0": "min",
    "FID_4": "min",
    "FID_8": "min"
}

def find_best_step(metrics, metric_name):
    runs = select_columns_matching_pattern(metrics, run_column_pattern)
    if improve_direction[metric_name] == "max":
        best_step = runs.idxmax()
    else:
        best_step = runs.idxmin()

    return(best_step)

best_steps = find_best_step(metrics, ex_metric)
best_steps

run_12146255    2
run_12146254    7
run_12146251    4
run_12146253    4
run_12146252    2
run_12146250    7
run_12146249    0
run_12146248    7
run_12146247    5
run_12146246    0
run_12146245    7
run_12146244    6
dtype: int64

In [5]:
def get_best_values(metrics, best_steps):
    subset = select_columns_matching_pattern(metrics, run_column_pattern)
    
    metrics = {}

    for i_col, run in enumerate(subset.columns):
        best_step = best_steps.iloc[i_col]
        best_value = subset.loc[best_step, run]

        try:
            metrics[str(run)] = float(best_value) 
        except BaseException as e:
            print(f"Error: {e}")
            print("Best value", best_value)
            raise e

    return metrics


get_best_values(metrics, best_steps)


{'run_12146255': 0.1440704315900802,
 'run_12146254': 0.1886714845895767,
 'run_12146251': 0.192417487502098,
 'run_12146253': 0.1619447767734527,
 'run_12146252': 0.1437189579010009,
 'run_12146250': 0.157680481672287,
 'run_12146249': 0.1665003001689911,
 'run_12146248': 0.1865417063236236,
 'run_12146247': 0.1590242981910705,
 'run_12146246': 0.1954975128173828,
 'run_12146245': 0.1884991377592086,
 'run_12146244': 0.1578506976366043}

In [6]:
def get_running_average(metrics: pd.DataFrame, window_size: int):
    all_values = []

    for i in range(len(metrics)-window_size+1):
        value = metrics.iloc[i:i+window_size].mean()
        all_values.append(value)
    
    return pd.DataFrame(all_values, columns=metrics.columns)

get_running_average(metrics, 3)

Unnamed: 0,Step,run_12146255,run_12146254,run_12146251,run_12146253,run_12146252,run_12146250,run_12146249,run_12146248,run_12146247,run_12146246,run_12146245,run_12146244
0,10000.0,0.155042,0.214073,0.21142,0.179486,0.153453,0.178297,0.361802,0.211503,0.182232,0.301141,0.214047,0.183557
1,15000.0,0.155259,0.202582,0.203354,0.173273,0.156568,0.170032,0.497156,0.20294,0.171326,0.353962,0.203003,0.172047
2,20000.0,0.226476,0.197074,0.196799,0.166877,0.167036,0.165089,0.588251,0.195763,0.165452,0.353962,0.195698,0.166139
3,25000.0,0.304338,0.193708,0.29118,0.163928,0.182886,0.162937,0.601564,0.193408,0.16259,0.353962,0.192534,0.162481
4,30000.0,0.371983,0.191775,0.386682,0.1633,0.191269,0.160757,0.608789,0.191911,0.161577,0.353962,0.190143,0.159617
5,35000.0,0.3713,0.190039,0.483814,0.163709,0.191269,0.159248,0.594238,0.190708,0.242171,0.353962,0.189358,0.158355


In [7]:
def get_best_metrics(experiment_name, reference_metric: str, neurad_window_size: int = 1, finetune_window_size: int = 1):
    window_size = neurad_window_size if experiment_name in neurad_experiments else finetune_window_size

    ref_metrics = get_metric_by_name(experiment_name, reference_metric)
    ref_metrics = clean_metrics(experiment_name, ref_metrics)

    ref_metrics = get_running_average(ref_metrics, window_size=window_size)
    best_steps = find_best_step(ref_metrics, reference_metric)
    

    merged_metrics = {}

    for metrics_info in get_all_metrics_infos(experiment_name):
        metrics = pd.read_csv(metrics_info["path"])
        metrics = clean_metrics(experiment_name, metrics)
        metrics = get_running_average(metrics, window_size=window_size)
        values = get_best_values(metrics, best_steps)

        metric_name = metrics_info["metric"]

        for run, value in values.items():
            if run not in merged_metrics:
                merged_metrics[run] = {}

            merged_metrics[run][metric_name] = value

        
    frame = pd.DataFrame.from_dict(merged_metrics, orient="index")
    frame = frame.reset_index(drop=False).rename(columns={"index": "Run"})
    frame["Run"] = frame["Run"].apply(lambda x: int(x.split("_")[1]))

    return frame


ex_experiment_name = "Finetune_Lora"
ex_metric = "LPIPS"
best_metrics = get_best_metrics(ex_experiment_name, ex_metric)
best_metrics

Unnamed: 0,Run,LPIPS,PSNR,SSIM,Ref_LPIPS,Ref_PSNR,Ref_SSIM
0,12139546,0.126905,23.91737,0.726831,0.651216,17.515772,0.626276
1,12139541,0.11273,24.471294,0.752596,0.639482,17.801876,0.635347
2,12139540,0.097804,25.030251,0.777955,0.570712,18.130219,0.634849
3,12139536,0.161586,24.860737,0.684128,0.71415,20.247663,0.633088
4,12139535,0.150293,25.359369,0.7071,0.682804,20.393532,0.643994
5,12139532,0.132117,25.839212,0.731585,0.583201,20.574081,0.644031
6,12139528,0.085118,26.40057,0.843583,0.74867,19.491974,0.488186
7,12139525,0.076501,26.971552,0.856741,0.707513,19.122231,0.48325
8,12139524,0.067387,27.579357,0.871178,0.649067,18.767595,0.469647
9,12139520,0.106738,23.471973,0.76045,0.700647,16.885237,0.477032


In [8]:
def get_metas_for_experiment(experiment_name):
    path = metas_dir / f"{experiment_name}.csv"
    if not path.exists():
        return None

    frame = pd.read_csv(path)
    if "Scene" in frame.columns:
        frame["Scene"] = frame["Scene"].apply(lambda x: f"{x:03d}")
    return frame

for experiment_name in experiment_names:
    metas = get_metas_for_experiment(experiment_name)
    if metas is not None:
        print(experiment_name + " - " + str(list(metas.columns)))
    else:
        print(f"No metas for {experiment_name}")

ex_metas = get_metas_for_experiment(ex_experiment_name)
ex_metas

Finetune_Lora - ['Run', 'Scene', 'Rank', 'Noise Strength', 'Controlnet']
Neurad_Base_Diffusion - ['Run', 'Scene']
Neurad_Base_Diffusion_Sweep - ['Run', 'Augment Loss Mult', 'Augment Phase Step', 'Noise Start Phase Step', 'Noise Strength']
Neurad_Checkpoint_Training - ['Run', 'Scene']
Neurad_Baseline - ['Run', 'Scene']
Neurad_Finetuned_Diffusion - ['Run', 'Model', 'Scene']


Unnamed: 0,Run,Scene,Rank,Noise Strength,Controlnet
0,12139481,53,128,0.3,Yes
1,12139557,53,128,0.2,Yes
2,12139552,53,128,0.1,Yes
3,12139539,28,128,0.3,Yes
4,12139537,28,128,0.1,Yes
5,12139531,16,128,0.3,Yes
6,12139530,16,128,0.2,Yes
7,12139529,16,128,0.1,Yes
8,12139523,11,128,0.3,Yes
9,12139522,11,128,0.2,Yes


In [9]:
def combine_metrics_and_metas(experiment_name, metrics, metas):
    return pd.merge(metas, metrics, on="Run")

ex_combined = combine_metrics_and_metas(ex_experiment_name, best_metrics, ex_metas)
ex_combined

Unnamed: 0,Run,Scene,Rank,Noise Strength,Controlnet,LPIPS,PSNR,SSIM,Ref_LPIPS,Ref_PSNR,Ref_SSIM
0,12139481,53,128,0.3,Yes,0.109444,24.853533,0.767564,0.671253,17.365459,0.633548
1,12139557,53,128,0.2,Yes,0.098762,25.195759,0.78399,0.662765,17.43428,0.635507
2,12139552,53,128,0.1,Yes,0.087015,25.491392,0.795274,0.611114,17.939899,0.640921
3,12139539,28,128,0.3,Yes,0.149095,25.755817,0.722999,0.783546,19.831226,0.648649
4,12139537,28,128,0.1,Yes,0.120281,26.351002,0.754334,0.626471,20.467089,0.6513
5,12139531,16,128,0.3,Yes,0.072403,27.304478,0.865597,0.788669,19.489737,0.489755
6,12139530,16,128,0.2,Yes,0.062232,27.713093,0.87499,0.809753,19.494808,0.491727
7,12139529,16,128,0.1,Yes,0.058544,28.12764,0.882384,0.704644,19.134699,0.480142
8,12139523,11,128,0.3,Yes,0.092616,24.511366,0.79772,0.786416,16.566675,0.499723
9,12139522,11,128,0.2,Yes,0.083238,24.878605,0.809434,0.734226,16.903486,0.487635


In [10]:
def load_experiment(experiment_name, ref_metric: str = "LPIPS"):
    metrics = get_best_metrics(experiment_name, ref_metric)
    metas = get_metas_for_experiment(experiment_name)

    return combine_metrics_and_metas(experiment_name, metrics, metas)

experiments = {}
for experiment_name in experiment_names:
    print(f"Loading {experiment_name}")
    loaded_experiment = load_experiment(experiment_name)
    print(f"Success: {isinstance(loaded_experiment, pd.DataFrame)}")

    experiments[experiment_name] = loaded_experiment

Loading Finetune_Lora
Success: True
Loading Neurad_Base_Diffusion
Success: True
Loading Neurad_Base_Diffusion_Sweep
Success: True
Loading Neurad_Checkpoint_Training
Success: True
Loading Neurad_Baseline
Success: True
Loading Neurad_Finetuned_Diffusion
Success: True


In [11]:
experiment_dir = Path("experiments/Experiment")
experiment_dir.mkdir(exist_ok=True)

for experiment_name, experiment in experiments.items():
    experiment.to_csv(experiment_dir / f"{experiment_name}.csv", index=False)

In [12]:
ex_experiment = load_experiment("Neurad_Finetuned_Diffusion")
ex_experiment

Unnamed: 0,Run,Model,Scene,LPIPS,PSNR,SSIM,FID_0,FID_4,FID_8
0,12146255,un128cn128,16,0.14407,26.845678,0.857691,24.571753,126.704224,212.924774
1,12146254,un128cn128,1,0.188671,26.460135,0.77062,27.273657,135.326385,167.926819
2,12146251,un128,1,0.192417,26.393488,0.767565,29.886395,141.200394,171.432648
3,12146253,un128,53,0.161945,26.79475,0.807522,25.695992,125.256935,306.546906
4,12146252,un128,16,0.143719,26.994045,0.857989,21.985817,121.031403,216.127594
5,12146250,un4cn4,53,0.15768,26.847706,0.807583,25.85463,126.677979,313.162262
6,12146249,un4cn4,16,0.1665,26.42989,0.8453,28.542702,120.682373,212.349533
7,12146248,un4cn4,1,0.186542,26.452417,0.769359,28.561502,136.320328,172.818756
8,12146247,un4,53,0.159024,26.945248,0.809565,26.366367,127.570503,315.077148
9,12146246,un4,16,0.195498,26.037884,0.83281,38.403362,134.535217,216.990784


In [13]:
base_experiment = experiments["Neurad_Base_Diffusion"]
finetuned_experiment = experiments["Neurad_Finetuned_Diffusion"]

In [14]:
#base_experiment[base_experiment["Scene"] == "053"].sort_values("Model")

In [15]:
def format_experiment(experiment_name, experiment: pd.DataFrame, sort_by = ["Scene", "Model"], filter_condition = lambda x: True):
    experiment = experiment.drop(columns=["Run"]).sort_values(sort_by)
    experiment = experiment[experiment.apply(filter_condition, axis=1)]
    return experiment

def experiment_to_latex(experiment_name, experiment: pd.DataFrame):
    return experiment.to_latex(index=False,
                  formatters={"name": str.upper},
                  float_format="{:.2f}".format)

In [16]:
format_experiment("Neurad_Baseline", experiments["Neurad_Baseline"], sort_by=["Scene"])

Unnamed: 0,Scene,LPIPS,PSNR,SSIM,FID_0,FID_4,FID_8
4,1,0.187058,26.473171,0.769444,29.101103,138.505737,168.963104
0,11,0.172812,24.843683,0.794248,19.369232,85.830978,102.774582
3,16,0.149292,26.892334,0.855997,25.172848,125.246475,210.984634
2,28,0.236377,26.172037,0.736835,36.219589,156.354034,297.856018
1,53,0.161413,27.012257,0.811754,26.420893,126.777039,302.288757


In [17]:
format_experiment("Neurad_Base_Diffusion", experiments["Neurad_Base_Diffusion"], sort_by=["Scene"])

Unnamed: 0,Scene,LPIPS,PSNR,SSIM,FID_0,FID_4,FID_8
1,1,0.189962,26.445934,0.768263,29.650841,139.832245,169.838226
0,16,0.151992,26.745672,0.852949,24.637953,125.702805,217.759552
2,53,0.159633,26.884113,0.808451,25.547682,126.698143,316.849335


In [18]:
format_experiment(ex_name, ex_experiment)

Unnamed: 0,Model,Scene,LPIPS,PSNR,SSIM,FID_0,FID_4,FID_8
2,un128,1,0.192417,26.393488,0.767565,29.886395,141.200394,171.432648
1,un128cn128,1,0.188671,26.460135,0.77062,27.273657,135.326385,167.926819
10,un4,1,0.188499,26.518902,0.770376,27.9314,138.242523,173.212738
7,un4cn4,1,0.186542,26.452417,0.769359,28.561502,136.320328,172.818756
4,un128,16,0.143719,26.994045,0.857989,21.985817,121.031403,216.127594
0,un128cn128,16,0.14407,26.845678,0.857691,24.571753,126.704224,212.924774
9,un4,16,0.195498,26.037884,0.83281,38.403362,134.535217,216.990784
6,un4cn4,16,0.1665,26.42989,0.8453,28.542702,120.682373,212.349533
3,un128,53,0.161945,26.79475,0.807522,25.695992,125.256935,306.546906
11,un128cn128,53,0.157851,26.910961,0.809345,25.067616,127.675423,313.634064


In [19]:
print(experiment_to_latex("Neurad_Finetuned_Diffusion", experiments["Neurad_Finetuned_Diffusion"]))

\begin{tabular}{rllrrrrrr}
\toprule
Run & Model & Scene & LPIPS & PSNR & SSIM & FID_0 & FID_4 & FID_8 \\
\midrule
12146255 & un128cn128 & 016 & 0.14 & 26.85 & 0.86 & 24.57 & 126.70 & 212.92 \\
12146254 & un128cn128 & 001 & 0.19 & 26.46 & 0.77 & 27.27 & 135.33 & 167.93 \\
12146251 & un128 & 001 & 0.19 & 26.39 & 0.77 & 29.89 & 141.20 & 171.43 \\
12146253 & un128 & 053 & 0.16 & 26.79 & 0.81 & 25.70 & 125.26 & 306.55 \\
12146252 & un128 & 016 & 0.14 & 26.99 & 0.86 & 21.99 & 121.03 & 216.13 \\
12146250 & un4cn4 & 053 & 0.16 & 26.85 & 0.81 & 25.85 & 126.68 & 313.16 \\
12146249 & un4cn4 & 016 & 0.17 & 26.43 & 0.85 & 28.54 & 120.68 & 212.35 \\
12146248 & un4cn4 & 001 & 0.19 & 26.45 & 0.77 & 28.56 & 136.32 & 172.82 \\
12146247 & un4 & 053 & 0.16 & 26.95 & 0.81 & 26.37 & 127.57 & 315.08 \\
12146246 & un4 & 016 & 0.20 & 26.04 & 0.83 & 38.40 & 134.54 & 216.99 \\
12146245 & un4 & 001 & 0.19 & 26.52 & 0.77 & 27.93 & 138.24 & 173.21 \\
12146244 & un128cn128 & 053 & 0.16 & 26.91 & 0.81 & 25.07 & 127.6

In [20]:
format_experiment("Neurad_Base_Diffusion_Sweep", experiments["Neurad_Base_Diffusion_Sweep"], sort_by=["Noise Strength", "Augment Loss Mult", "Augment Phase Step", "Noise Start Phase Step", ])

Unnamed: 0,Augment Loss Mult,Augment Phase Step,Noise Start Phase Step,Noise Strength,LPIPS,PSNR,SSIM,FID_0,FID_4,FID_8
34,20,0,0,0.1,0.187207,26.344543,0.768848,27.100138,136.062027,166.219208
25,20,0,20000,0.1,0.188045,26.433567,0.769166,28.781672,138.139511,171.745346
30,20,20000,0,0.1,0.187101,26.522787,0.771463,27.832098,134.175186,169.085861
29,20,20000,20000,0.1,0.186767,26.49559,0.771589,28.790785,134.339569,170.837936
27,40,0,0,0.1,0.186012,26.497669,0.771275,29.035374,139.166031,174.145065
24,40,0,20000,0.1,0.186411,26.452581,0.77003,28.020338,134.892349,167.696136
13,40,20000,0,0.1,0.18963,26.417114,0.768747,28.991943,137.328644,167.271133
17,40,20000,20000,0.1,0.18776,26.402874,0.769401,27.781132,135.202332,168.250015
15,60,0,0,0.1,0.187957,26.403851,0.767455,28.435768,137.917908,170.176361
19,60,0,20000,0.1,0.188177,26.483786,0.77101,29.022547,136.381912,164.161301


In [21]:
format_experiment("Neurad_Finetuned_Diffusion", experiments["Neurad_Finetuned_Diffusion"], sort_by=["FID_4"], filter_condition=lambda x: x["Scene"] == "001")

Unnamed: 0,Model,Scene,LPIPS,PSNR,SSIM,FID_0,FID_4,FID_8
1,un128cn128,1,0.188671,26.460135,0.77062,27.273657,135.326385,167.926819
7,un4cn4,1,0.186542,26.452417,0.769359,28.561502,136.320328,172.818756
10,un4,1,0.188499,26.518902,0.770376,27.9314,138.242523,173.212738
2,un128,1,0.192417,26.393488,0.767565,29.886395,141.200394,171.432648


In [22]:
format_experiment("Neurad_Base_Diffusion", experiments["Neurad_Base_Diffusion"], sort_by=["FID_4"], filter_condition=lambda x: x["Scene"] == "001")

Unnamed: 0,Scene,LPIPS,PSNR,SSIM,FID_0,FID_4,FID_8
1,1,0.189962,26.445934,0.768263,29.650841,139.832245,169.838226


In [None]:
print(experiment_to_latex("Neurad_Base_Diffusion", experiments["Neurad_Base_Diffusion"]))