In [1]:
import pandas as pd
import os

# Load experiment results

In [2]:
# Definitions
metrics_path = "../results/influence_of_n_samples/performance/"
timing_path = "../results/influence_of_n_samples/timing/"
output_path = "../results/influence_of_n_samples/"
metrics = ['CRPSValidation', 'Gaussian NLLValidation',
    'CoverageValidation', 'IntervalWidthValidation', 'MSETest',
    'EnergyScoreTest', 'CRPSTest', 'Gaussian NLLTest', 'CoverageTest',
    'IntervalWidthTest']
agg_groups = ["uncertainty_quantification", "n_samples"]
n_samples = [5, 10, 20, 50]
methods = ["scoring-rule-dropout", "scoring-rule-reparam"]


# Load experiment results
results = pd.DataFrame()
# Loop over subfolders
folders = os.listdir(metrics_path)
for sf in folders:
    if os.path.isdir(metrics_path + sf):
        file = os.path.join(metrics_path+sf, "test.csv")
        # Read file if exits
        if os.path.exists(file):
            results_df = pd.read_csv(file, index_col=0)
            results = pd.concat([results, results_df], axis = 1)
rows = metrics.copy()
for g in agg_groups:
    rows.append(g)

results = results.loc[rows]
results.loc[metrics] = results.loc[metrics].astype("float32")
results.loc["n_samples"] = results.loc["n_samples"].astype("int32")
results = results.transpose()
# Group by uncertainty quantification method
mean = results.groupby(agg_groups).mean().astype("float32")
mean.insert(0, "Statistic", "Mean")
std = results.groupby(agg_groups).std().astype("float32")
std.insert(0, "Statistic", "Std")

results_df = pd.concat([mean.transpose(), std.transpose()], axis = 1)
results_df = results_df[results_df.columns.sort_values().unique()]

In [3]:
# Add Training time results
results_df.loc["training_time"] = 0
results_timing = pd.DataFrame()
# Loop over subfolders
folders = os.listdir(timing_path)
for sf in folders:
    if os.path.isdir(timing_path + sf):
        file = os.path.join(timing_path + sf, "test.csv")
        # Read file if exits
        if os.path.exists(file):
            results_df_timing = pd.read_csv(file, index_col=0)
            results_timing = pd.concat([results_timing, results_df_timing], axis=1)
rows = ["t_training"]
for g in agg_groups:
    rows.append(g)
results_timing = results_timing.loc[rows].transpose()
results_timing["t_training"] = results_timing["t_training"].astype("float32")
results_timing["n_samples"] = results_timing["n_samples"].astype("int32")
for method in methods:
    for n in n_samples:
        results_df.loc["training_time", (method, n)]  = results_timing[
            (results_timing["uncertainty_quantification"] == method)
            & (results_timing["n_samples"] == n)
        ]["t_training"].values[0] / 25 # Number of epochs
results_df.sort_values(by = ["uncertainty_quantification", "n_samples"], axis = 1, inplace = True)
results_df.to_csv(f"{output_path}aggregated_results.csv")

  results_df.loc["training_time", (method, n)]  = results_timing[
  results_df.loc["training_time", (method, n)]  = results_timing[
  results_df.loc["training_time", (method, n)]  = results_timing[
  results_df.loc["training_time", (method, n)]  = results_timing[
  results_df.loc["training_time", (method, n)]  = results_timing[
  results_df.loc["training_time", (method, n)]  = results_timing[
  results_df.loc["training_time", (method, n)]  = results_timing[
  results_df.loc["training_time", (method, n)]  = results_timing[
  results_df.loc["training_time", (method, n)]  = results_timing[
  results_df.loc["training_time", (method, n)]  = results_timing[
  results_df.loc["training_time", (method, n)]  = results_timing[


# Create latex table

In [4]:
results_df

uncertainty_quantification,scoring-rule-dropout,scoring-rule-dropout,scoring-rule-dropout,scoring-rule-dropout,scoring-rule-dropout,scoring-rule-dropout,scoring-rule-dropout,scoring-rule-reparam,scoring-rule-reparam,scoring-rule-reparam,scoring-rule-reparam,scoring-rule-reparam,scoring-rule-reparam,scoring-rule-reparam,scoring-rule-reparam,scoring-rule-reparam,scoring-rule-reparam
n_samples,3,3.1,5,5.1,10,20,50,3,3.1,5,5.1,10,10.1,20,20.1,50,50.1
Statistic,Mean,Std,Mean,Std,,,,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std
CRPSValidation,0.548556,0.009714,0.55685,0.007986,,,,0.47347,0.003678,0.474363,0.003353,0.472898,0.003952,0.474704,0.005357,0.472522,0.002642
Gaussian NLLValidation,3.032448,0.585035,5.293431,1.894548,,,,1.225241,0.008222,1.371462,0.44014,1.216473,0.016517,1.218104,0.018621,1.212391,0.008541
CoverageValidation,0.765348,0.01877,0.754921,0.018761,,,,0.940477,0.002839,0.942409,0.002722,0.943954,0.00194,0.945616,0.002759,0.9452,0.002848
IntervalWidthValidation,3.091159,0.041356,3.110771,0.054213,,,,3.258196,0.029861,3.273296,0.027697,3.269374,0.024599,3.283218,0.023714,3.269583,0.022393
MSETest,0.879336,0.007243,0.878956,0.005881,,,,0.863959,0.003844,0.866071,0.003566,0.864933,0.00363,0.868047,0.005331,0.864934,0.003505
EnergyScoreTest,0.619538,0.004923,0.619132,0.003831,,,,0.608104,0.002693,0.609589,0.002497,0.608789,0.002545,0.610973,0.003745,0.608789,0.002462
CRPSTest,0.549622,0.009735,0.557674,0.007961,,,,0.474298,0.003695,0.475138,0.003164,0.473709,0.003834,0.475572,0.005107,0.473303,0.002694
Gaussian NLLTest,3.038941,0.587175,5.287218,1.880209,,,,1.226809,0.008312,1.376768,0.450959,1.218008,0.015787,1.219801,0.018079,1.213822,0.008529
CoverageTest,0.764044,0.01913,0.754054,0.018918,,,,0.940139,0.002973,0.942123,0.002708,0.943722,0.001972,0.945274,0.002708,0.944911,0.002794


In [17]:
# Initialize an empty DataFrame to store the formatted values
formatted_df = pd.DataFrame()
metrics = results_df.index[-7:] # Extract only test metrics
methods = list(results_df.columns.unique())  # Methods are the top level of the columns MultiIndex

# Create a new DataFrame with the method as the index and metrics as columns
for metric in metrics:
    formatted_df[metric] = [
        f"\\makecell{{{results_df.loc[metric, method].values[0]:.4f} \\\\ ($\\pm$ {results_df.loc[metric, method].values[0]:.4f})}}"
        for method in methods
    ]
formatted_df.index = methods
# Move last column to beginniing
cols = ['training_time'] + list(formatted_df.columns[:-1])
formatted_df = formatted_df[cols]
latex_table = formatted_df.to_latex(escape=False)
#  save to a file
with open(f"{output_path}aggregated_results.tex", "w") as f:
        f.write(latex_table)