# CIFAR-10 with Different Noise Levels

In [1]:
output_path = './outputs/graphs/'
results_path = './outputs/cifar_different_noise_results/'

In [2]:
import json
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os

## Getting results

In [3]:
results_list = []
corruption_levels = []
for file in [
    f for f in os.listdir(results_path) if f.endswith("json")
]:
    with open(results_path + file) as f:
        results_temp = json.load(f)
    results_list.append(results_temp)
    corruption_level = float(file.split(".json")[0].split("_")[-2])
    corruption_levels.append(corruption_level)

In [4]:
results = {}

for results_dict, c_level in zip(results_list, corruption_levels):
    if c_level not in results:
        results[c_level] = {}
    
    for dataset in results_dict:
        if dataset not in results[c_level]:
            results[c_level][dataset] = {}
       
        for nc in results_dict[dataset]:
            if nc not in results[c_level][dataset]:
                results[c_level][dataset][nc] = {}
            
            for run in results_dict[dataset][nc]:
                if run not in results[c_level][dataset][nc]:
                    results[c_level][dataset][nc][run] = {}

                for depression in results_dict[dataset][nc][run]:

                    results[c_level][dataset][nc][run][depression] = [
                        dict(epoch=int(epoch), **metrics) 
                        for epoch, metrics in results_dict[dataset][nc][run][depression].items() 
                        if epoch != 'corrupt_sources'
                    ]

In [5]:
results_df = []

for c_level in results:
    for dataset in results[c_level]:
        for nc in results[c_level][dataset]:
            for run in results[c_level][dataset][nc]:
                for depression in results[c_level][dataset][nc][run]:
                    results_df.append(
                        pd.json_normalize(
                            results[c_level][dataset][nc][run][depression]
                        )
                        .assign(
                            dataset=dataset,
                            n_corrupt_sources=nc,
                            run=run,
                            corruption_level=c_level,
                            depression=depression
                        )
                        .assign(epoch=lambda x: x['epoch']+1)
                    )
results_df = pd.concat(results_df)

In [6]:
(
    results_df
    .groupby(["corruption_level", "n_corrupt_sources", "depression", "run"])
    [['test_top1acc']]
    .max()
    .reset_index()
    .loc[
        lambda df: df['n_corrupt_sources'].astype(int) < 8
    ]
    .set_index(
        ["corruption_level", "n_corrupt_sources", "depression", "run"]
    )
    .unstack(level="depression")
    ['test_top1acc']
    .groupby(
        [
            'n_corrupt_sources', 
            'corruption_level', 
        ]
    )
    .agg(['mean', 'std'])
    
)

Unnamed: 0_level_0,depression,false,false,true,true
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std
n_corrupt_sources,corruption_level,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2,0.25,0.856983,0.003601,0.864517,0.003387
2,0.5,0.842067,0.004602,0.8556,0.00175
2,0.75,0.826233,0.003126,0.850583,0.002655
2,1.0,0.816833,0.004175,0.842633,0.001851
4,0.25,0.842467,0.006273,0.843217,0.004622
4,0.5,0.81665,0.004207,0.826867,0.002813
4,0.75,0.794517,0.004919,0.81045,0.005241
4,1.0,0.763783,0.007495,0.80375,0.003822
6,0.25,0.82695,0.002504,0.827117,0.003573
6,0.5,0.794583,0.002574,0.798217,0.005608


In [7]:
results_latex = (
    results_df
    .groupby(["corruption_level", "n_corrupt_sources", "depression", "run"])
    [['test_top1acc']]
    .max()
    .reset_index()
    .loc[
        lambda df: df['n_corrupt_sources'].astype(int) < 8
    ]
    .set_index(
        ["corruption_level", "n_corrupt_sources", "depression", "run"]
    )
    .unstack(level="depression")
    ['test_top1acc']
    .assign(
        percentage_difference=
            lambda x: (x['true'] - x['false'])
            /x['false']*100
    )
    .reset_index()
    .rename_axis(index=None, columns=None)
    .rename(
        columns={
            'true': 'Depression Enabled',
            'false': 'Depression Disabled',
            'corruption_level': 'Noise Level',
            'n_corrupt_sources': 'Number of Corrupted Sources',
        }
    )
    [
        [
            'Noise Level', 
            'Number of Corrupted Sources', 
            'percentage_difference', 
        ]
    ]
    .groupby(
        [
            'Number of Corrupted Sources', 
            'Noise Level', 
        ]
    )
    .agg(['mean', 'std'])
    .assign(result = (
        lambda x: 
        x['percentage_difference']['mean'].map('{:.2f}'.format)
        + '\%'
        + ' ± ' 
        + x['percentage_difference']['std'].map('{:.2f}'.format)
    ))
    ['result']
    .unstack(level='Noise Level')
    .round(2)
)

In [8]:
results_latex

Noise Level,0.25,0.50,0.75,1.00
Number of Corrupted Sources,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2,0.88\% ± 0.19,1.61\% ± 0.60,2.95\% ± 0.64,3.16\% ± 0.51
4,0.09\% ± 0.71,1.25\% ± 0.58,2.01\% ± 0.96,5.24\% ± 1.24
6,0.02\% ± 0.43,0.46\% ± 0.50,3.74\% ± 0.80,11.61\% ± 1.66


In [9]:
print(
    (
        results_latex
        .rename_axis(index=None, columns=None)
        .rename(
            columns={
                0.25: '25\%',
                0.5: '50\%',
                0.75: '75\%',
                1: '100\%',
            }
        )
    )
    .to_latex(index=False)
)

\begin{tabular}{llll}
\toprule
25\% & 50\% & 75\% & 100\% \\
\midrule
0.88\% ± 0.19 & 1.61\% ± 0.60 & 2.95\% ± 0.64 & 3.16\% ± 0.51 \\
0.09\% ± 0.71 & 1.25\% ± 0.58 & 2.01\% ± 0.96 & 5.24\% ± 1.24 \\
0.02\% ± 0.43 & 0.46\% ± 0.50 & 3.74\% ± 0.80 & 11.61\% ± 1.66 \\
\bottomrule
\end{tabular}

