In [182]:
output_path = './outputs/graphs/'
results_path = 'outputs/imdb_random_label/'

In [183]:
import json
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
import os
import graph_code.graphing_utils as plotting
from copy import deepcopy

In [184]:
colwidth = 4.22716535
pagewidth = 8.7598425

In [185]:
results_dict = {}

for file in [
    f for f in os.listdir(results_path) if f.endswith("json")
]:
    with open(results_path + file) as f:
        results_temp = json.load(f)
    results_dict[file] = results_temp


In [186]:
results_idpa = {
    ds : {
        ctype : {
            run: {
                'idpa': results_dict['results_idpa.json'][ds][ctype][run]
            } for run in results_dict['results_idpa.json'][ds][ctype]
        } for ctype in results_dict['results_idpa.json'][ds]
    } for ds in results_dict['results_idpa.json']
}

results_cot = {
    ds : {
        ctype : {
            run: {
                'cot': results_dict['results_cot.json'][ds][ctype][run]
            } for run in results_dict['results_cot.json'][ds][ctype]
        } for ctype in results_dict['results_cot.json'][ds]
    } for ds in results_dict['results_cot.json']
}

results_dict['results_idpa.json'] = results_idpa
results_dict['results_cot.json'] = results_cot

In [187]:
def combine_results_dict(results_dict_in, results_out):
    
    # results dict in should be of form: {dataset: {corruption_type: {run: {method: {epoch: metrics}}}}}

    for dataset in results_dict_in:
        if dataset not in results_out:
            results_out[dataset] = {}

        for corruption_type in results_dict_in[dataset]:
            if corruption_type not in results_out[dataset]:
                results_out[dataset][corruption_type] = {}

            for run in results_dict_in[dataset][corruption_type]:
                if run not in results_out[dataset][corruption_type]:
                    results_out[dataset][corruption_type][run] = {}

                for method in results_dict_in[dataset][corruption_type][run]:

                    results_out[dataset][corruption_type][run][method] = [
                        dict(epoch=int(epoch), **metrics) 
                        for epoch, metrics in results_dict_in[
                            dataset
                        ][corruption_type][run][method].items() 
                        if epoch != 'corrupt_sources'
                    ]

In [188]:
combined_results = {}

for result in results_dict:
    combine_results_dict(results_dict[result], combined_results)

In [189]:
combined_results_df = []

for dataset in combined_results:
    for corruption_type in combined_results[dataset]:
        for run in combined_results[dataset][corruption_type]:
            for method in combined_results[dataset][corruption_type][run]:
                combined_results_df.append(
                    pd.json_normalize(
                        combined_results[dataset][corruption_type][run][method]
                    )
                    .assign(
                        dataset=dataset,
                        corruption_type=corruption_type,
                        run=run,
                        method=method
                    )
                    .assign(epoch=lambda x: x['epoch']+1)
                )

combined_results_df = pd.concat(combined_results_df)
combined_results_df = (
    combined_results_df
    .replace({"method": {"true": True, "false": False}})
    .astype(
        {
            "run": "int64",
        }
    )
)

In [191]:
combined_results_df = combined_results_df.melt(
    id_vars=['dataset', 'corruption_type', 'run', 'method', 'epoch'],
    var_name='metric',
    value_name='value'
)

In [193]:
(
    combined_results_df
    .loc[lambda df: df.metric == 'test_top1acc']
    .groupby(["method", 'corruption_type', "run"])
    [['value']]
    .max()
    .unstack(0)
)

Unnamed: 0_level_0,Unnamed: 1_level_0,value,value,value,value
Unnamed: 0_level_1,method,False,True,cot,idpa
corruption_type,run,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
original,1,0.81528,0.81892,0.85656,0.83604
original,2,0.82716,0.83252,0.85984,0.83076
original,3,0.83332,0.83248,0.84316,0.82356
original,4,0.82936,0.84192,0.84232,0.83804
original,5,0.83556,0.83396,0.854,0.83356
random_label,1,0.63964,0.6816,0.66232,0.6362
random_label,2,0.65568,0.71568,0.6756,0.65004
random_label,3,0.65348,0.75684,0.68072,0.65072
random_label,4,0.64804,0.70436,0.6706,0.64116
random_label,5,0.65348,0.73912,0.66988,0.65256


In [194]:
def bold_max_value_latex(x, model_names):
    x = x.copy()
    len_cols = x.shape[0]
    n_models = len(model_names)
    idx_bold = (
        x
        [-n_models:]
        .str.replace(" ", "")
        .str.split("±")
        .str[0]
        .argmax()
    )
    x.iloc[idx_bold+len_cols-n_models] = '\\textbf{' + x.iloc[idx_bold+len_cols-n_models] + '}'
    
    return x


corruption_types = {
    "original": "Original Data",
    "random_permute" : "Random Permute",
    "random_label": "Random Label",
}

corruption_order = [
    "original",
    "random_permute",
    "random_label",
]


model_order = [
    'Co-teaching',
    'IDPA',
    'Standard',
    'LAP (Ours)',
]

combined_results_final_df = (
    combined_results_df
    .loc[lambda df: df.metric == 'test_top1acc']
    .groupby(["method", 'corruption_type', "run"])
    [['value']]
    .max()
    .assign(value = lambda x: x['value']*100)
    .reset_index()
    .groupby(['corruption_type', "method"])
    ['value']
    .agg(['mean', 'std'])
    .assign(
        mean_std = lambda x: 
            np.round(x['mean'], 2).astype(str) 
            + " ± "
            + np.round(x['std'], 2).astype(str),
    )
    ['mean_std']
    .to_frame()
    .reset_index()
    .rename(columns={
        "method": "LAP", 
        'corruption_type': "Noise Type", 
        "mean_std": "Top-1 Accuracy"
    })
    .replace(
        {
            "LAP": {
                False: "Standard",
                True: "LAP (Ours)",
                'cot': 'Co-teaching',
                'idpa': 'IDPA'
            }
        }
    )
    .pivot(
        index="Noise Type",
        columns="LAP",
        values="Top-1 Accuracy"
    )
    .reset_index()
    [[
        'Noise Type',  'Standard', 'IDPA', 'Co-teaching', 'LAP (Ours)',  
    ]]
    .loc[
        lambda x: x['Noise Type'].isin(corruption_order)
    ]
    .sort_values(
        "Noise Type", key=lambda x: x.map(corruption_order.index)
    )
    .replace(
        {
            "Noise Type": corruption_types
        }
    )
    # makes bold with latex:
    .apply(
        bold_max_value_latex,
        model_names = model_order,
        axis=1
    )
)

combined_results_final_df

LAP,Noise Type,Standard,IDPA,Co-teaching,LAP (Ours)
0,Original Data,82.81 ± 0.79,83.24 ± 0.56,\textbf{85.12 ± 0.8},83.2 ± 0.83
2,Random Permute,83.46 ± 0.91,83.28 ± 0.79,\textbf{85.6 ± 0.15},83.26 ± 1.09
1,Random Label,65.01 ± 0.65,64.61 ± 0.71,67.18 ± 0.69,\textbf{71.95 ± 2.94}


In [195]:
print(
    combined_results_final_df.to_latex(index=False)
)

\begin{tabular}{lllll}
\toprule
Noise Type & Standard & IDPA & Co-teaching & LAP (Ours) \\
\midrule
Original Data & 82.81 ± 0.79 & 83.24 ± 0.56 & \textbf{85.12 ± 0.8} & 83.2 ± 0.83 \\
Random Permute & 83.46 ± 0.91 & 83.28 ± 0.79 & \textbf{85.6 ± 0.15} & 83.26 ± 1.09 \\
Random Label & 65.01 ± 0.65 & 64.61 ± 0.71 & 67.18 ± 0.69 & \textbf{71.95 ± 2.94} \\
\bottomrule
\end{tabular}

