# Synthetic Noise with PresNet

In [20]:
output_path = './outputs/graphs/'
results_path = './outputs/presnet_results/'

In [21]:
import os
import json
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

## Loading Baseline Results:

In [22]:
results_dicts = []

for filename in os.listdir(
    os.path.join(
        results_path, "baseline", "rrl",
    )
):
    if filename.endswith('.json'):
        with open(
            os.path.join(
                results_path, "baseline", "rrl", filename
            )
        ) as f:
            results_dicts.append(json.load(f))

In [23]:
results = {}

In [24]:
for results_dict in results_dicts:
    for dataset in results_dict:
        if dataset not in results:
            results[dataset] = {}
        for corruption_type in results_dict[dataset]:
            if corruption_type not in results[dataset]:
                results[dataset][corruption_type] = {}
            for run in results_dict[dataset][corruption_type]:
                if int(run) in results[dataset][corruption_type]:
                    new_run = int(max(results[dataset][corruption_type].keys())) + 1
                    results[dataset][corruption_type][new_run] = results_dict[dataset][corruption_type][run]
                else:
                    results[dataset][corruption_type][int(run)] = results_dict[dataset][corruption_type][run]

In [25]:
results_baseline = []

for dataset in results:
    for corruption_type in results[dataset]:
        for run in results[dataset][corruption_type]:
            results_baseline.append(
                pd.json_normalize(
                    results[dataset][corruption_type][run]['test_acc']
                ).assign(
                    dataset=dataset,
                    corruption_type=corruption_type,
                    run=run
                )
                .assign(epoch=lambda x: x['epoch']+2)
            )
results_baseline = pd.concat(results_baseline)

## Getting LAP results

In [26]:
results_dicts = []

for filename in os.listdir(
    os.path.join(
        results_path,
    )
):
    if filename.endswith('.json'):
        with open(
            os.path.join(
                results_path, filename
            )
        ) as f:
            results_dicts.append(json.load(f))

In [27]:
results = {}

In [28]:
for results_dict in results_dicts:
    for dataset in results_dict:
        if dataset not in results:
            results[dataset] = {}
        for corruption_type in results_dict[dataset]:
            if corruption_type not in results[dataset]:
                results[dataset][corruption_type] = {}
            for run in results_dict[dataset][corruption_type]:
                new_run = int(run)
                if int(run) in results[dataset][corruption_type]:
                    new_run = int(max(results[dataset][corruption_type].keys())) + 1
                results[dataset][corruption_type][new_run] = [
                    dict(epoch=int(epoch), **metrics) 
                    for epoch, metrics in results_dict[dataset][corruption_type][run].items() 
                    if epoch != 'corrupt_sources'
                ]

In [29]:
results_lap = []

for dataset in results:
    for corruption_type in results[dataset]:
        for run in results[dataset][corruption_type]:
            results_lap.append(
                pd.json_normalize(
                    results[dataset][corruption_type][run]
                )
                .assign(
                    dataset=dataset,
                    corruption_type=corruption_type,
                    run=run
                )
                .assign(epoch=lambda x: x['epoch']+1)
            )
results_lap = pd.concat(results_lap)

In [30]:
results_all_epochs = pd.concat(
    [
        (
            results_lap
            .reset_index()
            .assign(method='lap')
        ),
        (
            results_baseline
            .reset_index()
            .rename(columns={'value.top1': 'test_top1acc'})
            .assign(method='baseline')
        )
    ]
)

In [31]:
results_all = pd.concat(
    [
        (
            results_lap
            .groupby(['dataset', 'corruption_type', 'run',])
            .agg({'test_top1acc': 'max'})
            .sort_values('test_top1acc', ascending=False)
            .reset_index()
            .assign(method='lap')
        ),
        (
            results_baseline
            .groupby(['dataset', 'corruption_type', 'run',])
            .agg({'value.top1': 'max'})
            .reset_index()
            .rename(columns={'value.top1': 'test_top1acc'})
            .assign(method='baseline')
        )
    ]
)

In [32]:
def bold_max_value(x, model_names):
    x = x.copy()
    len_cols = x.shape[0]
    n_models = len(model_names)
    idx_bold = (
        x
        [-n_models:]
        .str.replace(" ", "")
        .str.split("±")
        .str[0]
        .astype(float)
        .argmax()
    )
    max_val = x.iloc[idx_bold+len_cols-n_models]
    max_val, max_std = map(float, max_val.split("±"))

    for i in range(len_cols-n_models, len_cols):
        val = x.iloc[i]
        val, std = map(float, val.split("±"))
        if val >= max_val - max_std and val <= max_val + max_std:
            x.iloc[i] = '\\textbf{' + x.iloc[i] + '}'
    
    return x

experiment_dict = {
    'no_c': 'Original Data',
    'c_cs': 'Chunk Shuffle',
    'c_rl': 'Random Label',
    'c_lbs': 'Batch Label Shuffle',
    'c_lbf': 'Batch Label Flip',
    'c_ns': 'Added Noise',
    'c_no': 'Replace With Noise',
}

dataset_dict = {
    'cifar10': 'CIFAR-10',
    'cifar100': 'CIFAR-100',
    'fmnist': 'F-MNIST'
}


model_names = [
    'RRL',
    'RRL + LAP',
]

corruption_type_order = [
    'Original Data',
    'Chunk Shuffle',
    'Random Label',
    'Batch Label Shuffle',
    'Batch Label Flip',
    'Added Noise',
    'Replace With Noise',
]

dataset_order = [
    'CIFAR-10'
]


results_formatted = (
    results_all
    .replace(
        {
            'dataset': dataset_dict,
            'corruption_type': experiment_dict
        }
    )
    .assign(
        method=lambda x: x['method'].map({'lap': 'RRL + LAP', 'baseline': 'RRL'})
    )
    .drop(
        columns=['run']
    )
    .rename(
        columns={
            'dataset': 'Dataset',
            'corruption_type': 'Corruption Type',
            'test_top1acc': 'Accuracy (%)',
            'method': 'Model Name'
        }
    )
    .assign(
        **{
            'Accuracy (%)': lambda x: x['Accuracy (%)']*100
        }
    )
    .loc[lambda x: x["Model Name"].isin(model_names)]
    .groupby(['Dataset', 'Corruption Type', 'Model Name'])
    .agg(['mean', 'std'])
    .assign(result = (
        lambda x: 
        x['Accuracy (%)']['mean'].map('{:.2f}'.format)
        + ' ± ' 
        + x['Accuracy (%)']['std'].map('{:.2f}'.format)
    ))
    ['result']
    .unstack(level='Model Name')
    .reindex(
        [
            (ds, ct)  for ds in dataset_order for ct in corruption_type_order
        ]
    )
    .reset_index()
    .rename_axis(index=None, columns=None)
    [
        ['Dataset', 'Corruption Type'] + model_names
    ]
    .apply(
        bold_max_value,
        model_names = model_names,
        axis=1
    )
    .drop(
        columns=['Dataset']
    )
)
results_formatted

Unnamed: 0,Corruption Type,RRL,RRL + LAP
0,Original Data,\textbf{87.67 ± 0.37},\textbf{87.54 ± 0.22}
1,Chunk Shuffle,82.93 ± 0.29,\textbf{84.27 ± 0.31}
2,Random Label,76.04 ± 1.43,\textbf{80.31 ± 0.58}
3,Batch Label Shuffle,77.66 ± 0.71,\textbf{80.84 ± 0.51}
4,Batch Label Flip,78.81 ± 0.66,\textbf{82.02 ± 0.45}
5,Added Noise,78.51 ± 0.74,\textbf{81.70 ± 0.48}
6,Replace With Noise,\textbf{80.05 ± 0.65},79.00 ± 0.75


In [33]:
print(results_formatted.to_latex(index=False))

\begin{tabular}{lll}
\toprule
Corruption Type & RRL & RRL + LAP \\
\midrule
Original Data & \textbf{87.67 ± 0.37} & \textbf{87.54 ± 0.22} \\
Chunk Shuffle & 82.93 ± 0.29 & \textbf{84.27 ± 0.31} \\
Random Label & 76.04 ± 1.43 & \textbf{80.31 ± 0.58} \\
Batch Label Shuffle & 77.66 ± 0.71 & \textbf{80.84 ± 0.51} \\
Batch Label Flip & 78.81 ± 0.66 & \textbf{82.02 ± 0.45} \\
Added Noise & 78.51 ± 0.74 & \textbf{81.70 ± 0.48} \\
Replace With Noise & \textbf{80.05 ± 0.65} & 79.00 ± 0.75 \\
\bottomrule
\end{tabular}

