In [1]:
output_path = './outputs/graphs/'
results_path = 'outputs/difficult_data/'

In [2]:
import json
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
import os
import graph_code.graphing_utils as plotting
from copy import deepcopy

In [3]:
colwidth = 4.22716535
pagewidth = 8.7598425

In [4]:
results_dicts = {}

for file in [
    f 
    for f in os.listdir(results_path) 
    if f.endswith("json")
]:
    with open(results_path + file) as f:
        results_temp = json.load(f)
    results_dicts[file] = results_temp


In [5]:
results_df = []

for results_file, results_dict in results_dicts.items():
    for dataset in results_dict:
        for corruption_type in results_dict[dataset]:
            for run in results_dict[dataset][corruption_type]:
                for method in results_dict[dataset][corruption_type][run]:
                    results_df.append(
                        pd.DataFrame(results_dict[dataset][corruption_type][run][method])
                            .T
                            .reset_index()
                            .rename(columns={'index': 'epoch'})
                            .assign(epoch=lambda x: x['epoch'].apply(int))
                        .assign(
                            results_file=results_file,
                            dataset=dataset,
                            corruption_type=corruption_type,
                            run=run,
                            method=method
                        )
                        .assign(epoch=lambda x: x['epoch']+1)

                    )
                    

results_df = pd.concat(results_df)
results_df = (
    results_df
    .replace({"method": {"true": True, "false": False}})
    .astype(
        {
            "run": "int64",
        }
    )
)

In [6]:
accuracy_results_df = (
    results_df
    [[
        "dataset", "corruption_type", "run", "method", "epoch", "accuracy"
    ]]
    .melt(
        id_vars=['dataset', 'corruption_type', 'run', 'method', 'epoch'],
        var_name='metric',
        value_name='value'
    )
)

In [7]:
(
    accuracy_results_df
    .loc[lambda x: x['method'].str.contains("validation")]
    .loc[lambda x: x['metric'] == 'accuracy']
    .assign(value = lambda x: x['value']*100)
    .groupby(['corruption_type', 'dataset', 'method', 'run'])
    ['value']
    .max()
    .reset_index()
    .groupby(['corruption_type', 'dataset', 'method'])
    ['value']
    .agg(['mean', 'std'])
    .assign(
        mean_std = lambda x: 
            np.round(x['mean'].astype(float), 2).astype(str) 
            + " ± "
            + np.round(x['std'].astype(float), 2).astype(str),
    )
    ['mean_std']
    .unstack(0)
    
)

Unnamed: 0_level_0,corruption_type,original,random_label
dataset,method,Unnamed: 2_level_1,Unnamed: 3_level_1
mnist_and_cifar,lap_0.1_validation,95.8 ± 0.99,24.81 ± 0.86
mnist_and_cifar,lap_0.25_validation,95.84 ± 1.15,23.65 ± 0.76
mnist_and_cifar,lap_0.5_validation,97.05 ± 1.7,23.56 ± 0.66
mnist_and_cifar,lap_1.0_validation,98.09 ± 1.07,23.43 ± 0.6
mnist_and_cifar,lap_2.0_validation,98.45 ± 0.3,23.41 ± 0.55
mnist_and_cifar,lap_4.0_validation,98.48 ± 0.2,23.45 ± 0.61


best model for original data is `lap_4.0` and for the random label data is `lap_0.1`

In [8]:
best_results = {
    ("random_label", "lap_0.1_validation"): "lap_validation",
    ("random_label", "lap_0.1_test"): "lap",

    ("original", "lap_4.0_validation"): "lap_validation",
    ("original", "lap_4.0_test"): "lap"

}

best_accuracy_results_df = accuracy_results_df.assign(
    method=lambda df: df[['corruption_type', 'method']].apply(
        lambda x: best_results.get(tuple(x), x['method']),
        axis=1
    )
)

Model accuracy:

In [9]:
def bold_max_value_latex(x, model_names):
    x = x.copy()
    len_cols = x.shape[0]
    n_models = len(model_names)
    idx_bold = (
        x
        [-n_models:]
        .str.replace(" ", "")
        .str.split("±")
        .str[0]
        .astype(float)
        .argmax()
    )
    max_val = x.iloc[idx_bold+len_cols-n_models]
    max_val, max_std = map(float, max_val.split("±"))

    for i in range(len_cols-n_models, len_cols):
        val = x.iloc[i]
        val, std = map(float, val.split("±"))
        if val >= max_val - max_std and val <= max_val + max_std:
            x.iloc[i] = '\\textbf{' + x.iloc[i] + '}'
    
    return x


corruption_types = {
    "original": "Original Data",
    "random_label": "Random Label",
}

corruption_order = [
    "original",
    "random_label",
]


model_order = [
    'Standard',
    'LAP (Ours)',
]

best_accuracy_results_final_df = (
    best_accuracy_results_df
    .loc[lambda df: df.metric == 'accuracy']
    .groupby(["method", 'corruption_type', "run"])
    [['value']]
    .max()
    .assign(value = lambda x: x['value']*100)
    .reset_index()
    .groupby(['corruption_type', "method"])
    ['value']
    .agg(['mean', 'std'])
    .assign(
        mean_std = lambda x: 
            np.round(x['mean'].astype(float), 2).astype(str) 
            + " ± "
            + np.round(x['std'].astype(float), 2).astype(str),
    )
    ['mean_std']
    .to_frame()
    .reset_index()
    .rename(columns={
        "method": "LAP", 
        'corruption_type': "Noise Type", 
        "mean_std": "Accuracy"
    })
    .replace(
        {
            "LAP": {
                "standard": "Standard",
                "lap": "LAP (Ours)",
            }
        }
    )
    .pivot(
        index="Noise Type",
        columns="LAP",
        values="Accuracy"
    )
    .reset_index()
    [[
        'Noise Type',  'Standard', 'LAP (Ours)',  
    ]]
    .loc[
        lambda x: x['Noise Type'].isin(corruption_order)
    ]
    .sort_values(
        "Noise Type", key=lambda x: x.map(corruption_order.index)
    )
    .replace(
        {
            "Noise Type": corruption_types
        }
    )
    # makes bold with latex:
    .apply(
        bold_max_value_latex,
        model_names = model_order,
        axis=1
    )
)

best_accuracy_results_final_df

LAP,Noise Type,Standard,LAP (Ours)
0,Original Data,\textbf{98.38 ± 0.4},\textbf{98.36 ± 0.39}
1,Random Label,67.43 ± 2.83,\textbf{96.29 ± 0.83}


In [10]:
print(
    best_accuracy_results_final_df.to_latex(index=False)
)

\begin{tabular}{lll}
\toprule
Noise Type & Standard & LAP (Ours) \\
\midrule
Original Data & \textbf{98.38 ± 0.4} & \textbf{98.36 ± 0.39} \\
Random Label & 67.43 ± 2.83 & \textbf{96.29 ± 0.83} \\
\bottomrule
\end{tabular}



CIFAR source accuracy:

In [11]:
best_results = {
    ("random_label", "lap_0.1_validation"): "lap_validation",
    ("random_label", "lap_0.1_test"): "lap",

    ("original", "lap_1.0_validation"): "lap_validation",
    ("original", "lap_1.0_test"): "lap"

}

cifar_classes_accuracy_df = (
    results_df[[
        'dataset', 'corruption_type', 'run', 'method', 'epoch', 'class_accuracy'
    ]]
    .assign(
        method=lambda df: df[['corruption_type', 'method']].apply(
            lambda x: best_results.get(tuple(x), x['method']),
            axis=1
        )
    )
    .assign(class_label=lambda df: df['class_accuracy'].apply(lambda x: np.arange(len(x))))
    .explode(["class_accuracy", "class_label"])
    .loc[lambda x: x['class_label'].isin([11, 12])]
    .drop(columns=['epoch'])
    .groupby(["dataset", "corruption_type", "run", "method", "class_label"])
    .max()
    .reset_index()
    .groupby(["dataset", "corruption_type", "method"])
    ['class_accuracy']
    .agg(['mean', 'std'])
    .assign(
        mean_std = lambda x: 
            np.round(x['mean'].astype(float), 2).astype(str) 
            + " ± "
            + np.round(x['std'].astype(float), 2).astype(str),
    )
    ['mean_std']
    .to_frame()
    .reset_index()
    .rename(columns={
        "method": "LAP", 
        'corruption_type': "Noise Type", 
        "mean_std": "Accuracy"
    })
    .replace(
        {
            "LAP": {
                "standard": "Standard",
                "lap": "LAP (Ours)",
            }
        }
    )
    .pivot(
        index="Noise Type",
        columns="LAP",
        values="Accuracy"
    )
    .reset_index()
    [[
        'Noise Type',  'Standard', 'LAP (Ours)',  
    ]]
    .loc[
        lambda x: x['Noise Type'].isin(corruption_order)
    ]
    .sort_values(
        "Noise Type", key=lambda x: x.map(corruption_order.index)
    )
    .replace(
        {
            "Noise Type": corruption_types
        }
    )
    # makes bold with latex:
    .apply(
        bold_max_value_latex,
        model_names = model_order,
        axis=1
    )
)

In [12]:
print(
    cifar_classes_accuracy_df.to_latex(index=False)
)

\begin{tabular}{lll}
\toprule
Noise Type & Standard & LAP (Ours) \\
\midrule
Original Data & \textbf{0.97 ± 0.01} & 0.95 ± 0.06 \\
Random Label & \textbf{0.98 ± 0.01} & \textbf{0.97 ± 0.02} \\
\bottomrule
\end{tabular}



And CIFAR source unreliability:

In [13]:
source_weighting_correctness = (
    results_df
    .groupby(['dataset', 'corruption_type', 'run'])
    [['method', 'epoch', 'accuracy']]
    .apply(lambda x: x.loc[x['accuracy'].idxmax()])
    .reset_index()
    .merge(results_df)
    .loc[lambda x: x['method'].str.contains("test")]
    [[
        'dataset', 'corruption_type', 'method', 'run', 
        'epoch', 'source_weights', 'source_values', 'corrupt_sources'
    ]]
    .explode(['source_weights', 'source_values'])
    .astype(
        {
            "source_weights": "float",
            "source_values": "float"
        }
    )
    .astype(
        {
            "source_weights": "float",
            "source_values": "int"
        }
    )
    .assign(
        corrupt_source = lambda x: (
            x[['source_values', 'corrupt_sources']]
            .apply(
                lambda x: int(float(x['source_values'])) in x['corrupt_sources'],
                axis=1
            )
        )
    )
    .drop(columns=['corrupt_sources'])
)

In [14]:
(
    source_weighting_correctness
    .loc[lambda x: (x['corruption_type'] == "original") & (x['method'] == "lap_4.0_test")]  
    .loc[lambda x: x['source_values'] == 99]
)

Unnamed: 0,dataset,corruption_type,method,run,epoch,source_weights,source_values,corrupt_source
12,mnist_and_cifar,original,lap_4.0_test,0,12,0.0,99,False
25,mnist_and_cifar,original,lap_4.0_test,1,22,0.0,99,False
38,mnist_and_cifar,original,lap_4.0_test,2,21,0.0,99,False
51,mnist_and_cifar,original,lap_4.0_test,3,19,0.0,99,False
64,mnist_and_cifar,original,lap_4.0_test,4,24,0.0,99,False


In [15]:
(
    source_weighting_correctness
    .loc[lambda x: (x['corruption_type'] == "random_label") & (x['method'] == "lap_0.1_test")]  
    .loc[lambda x: x['source_values'] == 99]
)

Unnamed: 0,dataset,corruption_type,method,run,epoch,source_weights,source_values,corrupt_source
66,mnist_and_cifar,random_label,lap_0.1_test,0,10,2551.0,99,False
79,mnist_and_cifar,random_label,lap_0.1_test,1,25,0.0,99,False
92,mnist_and_cifar,random_label,lap_0.1_test,2,24,0.0,99,False
105,mnist_and_cifar,random_label,lap_0.1_test,3,18,0.0,99,False
118,mnist_and_cifar,random_label,lap_0.1_test,4,22,60.0,99,False


In [16]:
(
    accuracy_results_df
    .loc[lambda x: (x['corruption_type'] == "random_label") & (x['method'] == "lap_0.1_validation")]  
    .loc[lambda x: x['metric'] == 'accuracy']
    .assign(value = lambda x: x['value']*100)
    .groupby(['corruption_type', 'dataset', 'method', 'run'])
    ['value']
    .max()
    .reset_index()
    
)

Unnamed: 0,corruption_type,dataset,method,run,value
0,random_label,mnist_and_cifar,lap_0.1_validation,0,23.5
1,random_label,mnist_and_cifar,lap_0.1_validation,1,25.457143
2,random_label,mnist_and_cifar,lap_0.1_validation,2,25.185714
3,random_label,mnist_and_cifar,lap_0.1_validation,3,25.528571
4,random_label,mnist_and_cifar,lap_0.1_validation,4,24.385714
