In [1]:
output_path = './outputs/graphs/'
results_path = 'outputs/california_housing/'

In [2]:
import json
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
import os
import graph_code.graphing_utils as plotting

In [3]:
colwidth = 4.22716535
pagewidth = 8.7598425

In [4]:
results_list = []

for file in [
    f for f in os.listdir(results_path) if f.endswith("json")
]:
    with open(results_path + file) as f:
        results_temp = json.load(f)
    results_list.append(results_temp)


In [5]:
results = {}

for results_dict in results_list:
    
    for dataset in results_dict:
        if dataset not in results:
            results[dataset] = {}

        for corruption_type in results_dict[dataset]:
            if corruption_type not in results[dataset]:
                results[dataset][corruption_type] = {}

            for run in results_dict[dataset][corruption_type]:
                if run not in results[dataset][corruption_type]:
                    results[dataset][corruption_type][run] = {}

                for depression in results_dict[dataset][corruption_type][run]:

                    results[dataset][corruption_type][run][depression] = [
                        dict(epoch=int(epoch), **metrics) 
                        for epoch, metrics in results_dict[
                            dataset
                        ][corruption_type][run][depression].items() 
                        if epoch != 'corrupt_sources'
                    ]

In [6]:
results_df = []

for dataset in results:
    for corruption_type in results[dataset]:
        for run in results[dataset][corruption_type]:
            for depression in results[dataset][corruption_type][run]:
                results_df.append(
                    pd.json_normalize(
                        results[dataset][corruption_type][run][depression]
                    )
                    .assign(
                        dataset=dataset,
                        corruption_type=corruption_type,
                        run=run,
                        depression=depression
                    )
                    .assign(epoch=lambda x: x['epoch']+1)
                )

results_df = pd.concat(results_df)
results_df = (
    results_df
    .replace({"depression": {"true": True, "false": False}})
    .astype(
        {
            "run": "int64",
            "depression": "bool",
        }
    )
)

In [7]:
results_df = results_df.melt(
    id_vars=['dataset', 'corruption_type', 'run', 'depression', 'epoch'],
    var_name='metric',
    value_name='value'
)

In [8]:
(
    results_df
    .loc[lambda df: df.metric == 'test_loss']
    .groupby(["depression", 'corruption_type', "run"])
    [['value']]
    .min()
    .unstack(0)
)

Unnamed: 0_level_0,Unnamed: 1_level_0,value,value
Unnamed: 0_level_1,depression,False,True
corruption_type,run,Unnamed: 2_level_2,Unnamed: 3_level_2
original,1,0.436656,0.450649
original,2,0.454133,0.454254
original,3,0.402523,0.411935
original,4,0.458108,0.441117
original,5,0.445344,0.435897
random_label,1,0.631144,0.450391
random_label,2,0.628551,0.41906
random_label,3,0.61603,0.460128
random_label,4,0.577974,0.450867
random_label,5,0.634682,0.466275


In [9]:
def bold_min_value_latex(x, model_names):
    x = x.copy()
    len_cols = x.shape[0]
    n_models = len(model_names)
    idx_bold = (
        x
        [-n_models:]
        .str.replace(" ", "")
        .str.split("±")
        .str[0]
        .argmin()
    )
    x.iloc[idx_bold+len_cols-n_models] = '\\textbf{' + x.iloc[idx_bold+len_cols-n_models] + '}'
    
    return x


corruption_types = {
    "original": "Original Data",
    "random_noise" : "Random Noise",
    "random_label": "Random Label",
}

corruption_order = [
    "original",
    "random_noise",
    "random_label",
]


model_order = [
    'Standard',
    'LAP (Ours)',
]

results_final_df = (
    results_df
    .loc[lambda df: df.metric == 'test_loss']
    .groupby(["depression", 'corruption_type', "run"])
    [['value']]
    .min()
    .reset_index()
    .groupby(['corruption_type', "depression"])
    ['value']
    .agg(['mean', 'std'])
    .assign(
        mean_std = lambda x: 
            np.round(x['mean'], 2).astype(str) 
            + " ± "
            + np.round(x['std'], 2).astype(str),
    )
    ['mean_std']
    .to_frame()
    .reset_index()
    .rename(columns={
        "depression": "LAP", 
        'corruption_type': "Noise Type", 
        "mean_std": "MSE Loss"
    })
    .replace(
        {
            "LAP": {
                False: "Standard",
                True: "LAP (Ours)"
            }
        }
    )
    .pivot(
        index="Noise Type",
        columns="LAP",
        values="MSE Loss"
    )
    .reset_index()
    [[
        'Noise Type',  'Standard', 'LAP (Ours)',
    ]]
    .loc[
        lambda x: x['Noise Type'].isin(corruption_order)
    ]
    .sort_values(
        "Noise Type", key=lambda x: x.map(corruption_order.index)
    )
    .replace(
        {
            "Noise Type": corruption_types
        }
    )
    # makes bold with latex:
    .apply(
        bold_min_value_latex,
        model_names = model_order,
        axis=1
    )
)

results_final_df

LAP,Noise Type,Standard,LAP (Ours)
0,Original Data,\textbf{0.44 ± 0.02},0.44 ± 0.02
1,Random Label,0.62 ± 0.02,\textbf{0.45 ± 0.02}


In [10]:
print(
    results_final_df.to_latex(index=False)
)

\begin{tabular}{lll}
\toprule
Noise Type & Standard & LAP (Ours) \\
\midrule
Original Data & \textbf{0.44 ± 0.02} & 0.44 ± 0.02 \\
Random Label & 0.62 ± 0.02 & \textbf{0.45 ± 0.02} \\
\bottomrule
\end{tabular}

