# Synthetic results:

In [1]:
output_path = './outputs/graphs/'
results_path = './outputs/synthetic_results/'

In [2]:
import pandas as pd
import graph_code
import os
import functools
import json

## CIFAR-10:

In [3]:
lap_n_dict = {
    'lap_n_25': 'LAP Model (Ours)',
    'lap_n_20': 'LAP Model (Ours), LAPN 20',
    'lap_n_50': 'LAP Model (Ours), LAPN 50',
}

run_model_map = functools.partial(graph_code.run_model_map, lap_n_dict=lap_n_dict)

experiment_dict = {
    'Original\nData' : [
        'Conv3Net-no_c-drstd-results.csv'
    ],
    'Chunk\nShuffle' : [
        'Conv3Net-c_cs-drstd-results.csv', 'Conv3Net-c_cs_srb-drstd-results.csv'
    ],
    'Random\nLabel' : [
        'Conv3Net-c_rl-drstd-results.csv', 'Conv3Net-c_rl_srb-drstd-results.csv'
    ],
    'Batch\nLabel\nShuffle' : [
        'Conv3Net-c_lbs-drstd-results.csv', 'Conv3Net-c_lbs_srb-drstd-results.csv'
    ],
    'Batch\nLabel\nFlip' : [
        'Conv3Net-c_lbf-drstd-results.csv', 'Conv3Net-c_lbf_srb-drstd-results.csv'
    ],
    'Added\nNoise' : [
        'Conv3Net-c_ns-drstd-results.csv', 'Conv3Net-c_ns_srb-drstd-results.csv'
    ],
    'Replace\nWith\nNoise' : [
        'Conv3Net-c_no-drstd-results.csv', 'Conv3Net-c_no_srb-drstd-results.csv'
    ],
}

In [4]:
results = pd.DataFrame()
for experiment, files in experiment_dict.items():
    for file in files:
        results_temp = pd.read_csv(os.path.join(results_path, file))
        results_temp['Corruption Type'] = experiment
        results = pd.concat([results, results_temp])
    results = graph_code.expand_run_names(results)
    results['Model Name'] = results['Run'].map(run_model_map)

In [5]:
## federated learning results
fed_arfl_folder = os.path.join(results_path, 'baseline', 'arfl', 'cifar10')
tbl = graph_code.TensorboardLoad(fed_arfl_folder, level=1)

In [6]:
fed_arfl_results = tbl.scalars(tags='weighted_average_test_results_accuracy')[1]

fed_arfl_results = (fed_arfl_results
.drop(['run', 'level_0', 'level_1', 'level_2', 'level_3', 'level_4'], axis=1)
.rename(
    {'level_6': 'Run', 'value': 'Value', 'step': 'Step', 'tag': 'Metric'},
    axis=1)
)

fed_arfl_results = graph_code.expand_run_names(fed_arfl_results)
fed_arfl_results = fed_arfl_results.query("Step == 9024")[['Run', 'Value', 'Seed']]
fed_arfl_results['Corruption Type'] = fed_arfl_results['Run'].map(graph_code.run_corrupt_map)
fed_arfl_results['Model Name'] = 'Fed ARFL'

Loading Files:   1%|▏▏▏▏▏▏▏▏▏▏| 1/71 [00:00<00:00, 316.96it/s]

Loading Files: 100%|▉▉▉▉▉▉▉▉▉▉| 71/71 [00:00<00:00, 81.52it/s] 


In [7]:
data_plot_cifar10 = results[
    (results['Metric'].isin(['Accuracy']))
    & (results['Number of Epochs'] == '25')
    & (results['Number of Corrupt Sources'].isin(['0', '4']))
    & (results['LAP N'].isin(['20', '25', '50']))
    & (results['Depression Strength'].isin(['0.0', '1.0']))
    & (results['Strictness'].isin(['0.8']))
].copy()

data_plot_cifar10 = pd.concat([data_plot_cifar10, fed_arfl_results])
data_plot_cifar10['Accuracy (%)'] = 100*data_plot_cifar10['Value']

## CIFAR-100:

In [8]:
lap_n_dict = {
    'lap_n_25': 'LAP Model (Ours)',
    'lap_n_20': 'LAP Model (Ours), LAPN 20',
    'lap_n_50': 'LAP Model (Ours), LAPN 50',
}

run_model_map = functools.partial(graph_code.run_model_map, lap_n_dict=lap_n_dict)

experiment_dict = {
    'Original\nData' : [
        'Conv3Net_100-no_c-drstd-results.csv'
    ],
    'Chunk\nShuffle' : [
        'Conv3Net_100-c_cs-drstd-results.csv', 'Conv3Net_100-c_cs_srb-drstd-results.csv'
    ],
    'Random\nLabel' : [
        'Conv3Net_100-c_rl-drstd-results.csv', 'Conv3Net_100-c_rl_srb-drstd-results.csv'
    ],
    'Batch\nLabel\nShuffle' : [
        'Conv3Net_100-c_lbs-drstd-results.csv', 'Conv3Net_100-c_lbs_srb-drstd-results.csv'
    ],
    'Batch\nLabel\nFlip' : [
        'Conv3Net_100-c_lbf-drstd-results.csv', 'Conv3Net_100-c_lbf_srb-drstd-results.csv'
    ],
    'Added\nNoise' : [
        'Conv3Net_100-c_ns-drstd-results.csv', 'Conv3Net_100-c_ns_srb-drstd-results.csv'
    ],
    'Replace\nWith\nNoise' : [
        'Conv3Net_100-c_no-drstd-results.csv', 'Conv3Net_100-c_no_srb-drstd-results.csv'
    ],
}

In [9]:
results = pd.DataFrame()
for experiment, files in experiment_dict.items():
    for file in files:
        results_temp = pd.read_csv(os.path.join(results_path, file))
        results_temp['Corruption Type'] = experiment
        results = pd.concat([results, results_temp])
    results = graph_code.expand_run_names(results)
    results['Model Name'] = results['Run'].map(run_model_map)

In [10]:
## federated learning results
fed_arfl_folder = os.path.join(results_path, 'baseline', 'arfl', 'cifar100')
tbl = graph_code.TensorboardLoad(fed_arfl_folder, level=1)

In [11]:
fed_arfl_results = tbl.scalars(tags='weighted_average_test_results_top_5_acc')[1]

fed_arfl_results = (fed_arfl_results
.drop(['run', 'level_0', 'level_1', 'level_2', 'level_3', 'level_4'], axis=1)
.rename(
    {'level_6': 'Run', 'value': 'Value', 'step': 'Step', 'tag': 'Metric'},
    axis=1)
)
fed_arfl_results = graph_code.expand_run_names(fed_arfl_results)
fed_arfl_results = fed_arfl_results.query("Step == 14664")[['Run', 'Value', 'Seed']]
fed_arfl_results['Corruption Type'] = fed_arfl_results['Run'].map(graph_code.run_corrupt_map)
fed_arfl_results['Model Name'] = 'Fed ARFL'

Loading Files: 100%|▉▉▉▉▉▉▉▉▉▉| 71/71 [00:00<00:00, 80.47it/s] 


In [12]:
data_plot_cifar100 = results[
    (results['Metric'].isin(['Top 5 Accuracy']))
    & (results['Number of Epochs'] == '40')
    & (results['Number of Corrupt Sources'].isin(['2']))
    & (results['LAP N'].isin(['25']))
    & (results['Depression Strength'].isin(['0.0', '1.0']))
    & (results['Strictness'].isin(['0.8']))
].copy()

data_plot_cifar100 = pd.concat([data_plot_cifar100, fed_arfl_results])
data_plot_cifar100['Top 5 Accuracy (%)'] = 100*data_plot_cifar100['Value']

## F-MNIST:

In [13]:
lap_n_dict = {
    'lap_n_25': 'LAP Model (Ours), LAPN 25',
    'lap_n_20': 'LAP Model (Ours), LAPN 20',
    'lap_n_50': 'LAP Model (Ours)',
}

run_model_map = functools.partial(graph_code.run_model_map, lap_n_dict=lap_n_dict)

experiment_dict = {
    'Original\nData' :  ['MLP-no_c-drstd-results.csv'],
    'Chunk\nShuffle' : ['MLP-c_cs-drstd-results.csv', 'MLP-c_cs_srb-drstd-results.csv'],
    'Random\nLabel' : ['MLP-c_rl-drstd-results.csv', 'MLP-c_rl_srb-drstd-results.csv'],
    'Batch\nLabel\nShuffle' : ['MLP-c_lbs-drstd-results.csv', 'MLP-c_lbs_srb-drstd-results.csv'],
    'Batch\nLabel\nFlip' : ['MLP-c_lbf-drstd-results.csv', 'MLP-c_lbf_srb-drstd-results.csv'],
    'Added\nNoise' : ['MLP-c_ns-drstd-results.csv', 'MLP-c_ns_srb-drstd-results.csv'],
    'Replace\nWith\nNoise' : ['MLP-c_no-drstd-results.csv', 'MLP-c_no_srb-drstd-results.csv'],
    }

results = pd.DataFrame()
for experiment, files in experiment_dict.items():
    for file in files:
        results_temp = pd.read_csv(os.path.join(results_path, file))
        results_temp['Corruption Type'] = experiment
        results = pd.concat([results, results_temp])
    results = graph_code.expand_run_names(results)
    results['Model Name'] = results['Run'].map(run_model_map)

In [14]:
## federated learning results
fed_arfl_folder = os.path.join(results_path, 'baseline', 'arfl', 'fmnist')
tbl = graph_code.TensorboardLoad(fed_arfl_folder, level=1)

In [15]:
fed_arfl_results = tbl.scalars(tags='weighted_average_test_results_accuracy')[1]

fed_arfl_results = (fed_arfl_results
    .drop(['run', 'level_0', 'level_1', 'level_2', 'level_3', 'level_4'], axis=1)
    .rename(
        {'level_6': 'Run', 'value': 'Value', 'step': 'Step', 'tag': 'Metric'},
        axis=1
    )
)
fed_arfl_results = graph_code.expand_run_names(fed_arfl_results)
fed_arfl_results = fed_arfl_results.query("Step == 14664")[['Run', 'Value', 'Seed']]
fed_arfl_results['Corruption Type'] = fed_arfl_results['Run'].map(graph_code.run_corrupt_map)
fed_arfl_results['Model Name'] = 'Fed ARFL'

Loading Files: 100%|▉▉▉▉▉▉▉▉▉▉| 71/71 [00:00<00:00, 166.14it/s]


In [16]:
data_plot_fmnist = results[
    (results['Metric'].isin(['Accuracy']))
    & (results['Number of Epochs'] == '40')
    & (results['Number of Corrupt Sources'].isin(['0', '6']))
    & (results['LAP N'].isin(['20', '25', '50']))
    & (results['Depression Strength'].isin(['0.0', '1.0']))
    & (results['Strictness'].isin(['0.8']))
].copy()

data_plot_fmnist = pd.concat([data_plot_fmnist, fed_arfl_results])
data_plot_fmnist['Accuracy (%)'] = 100*data_plot_fmnist['Value']

## All Together:

In [17]:
data_plot_cifar10['Dataset'] = 'CIFAR-10'
data_plot_cifar100['Dataset'] = 'CIFAR-100'
data_plot_fmnist['Dataset'] = 'F-MNIST'

data_plot_cifar100['Accuracy (%)'] = data_plot_cifar100['Top 5 Accuracy (%)']

In [18]:
data_plot = pd.concat([data_plot_cifar10, data_plot_cifar100, data_plot_fmnist])

In [19]:
data_plot = data_plot[['Dataset', 'Corruption Type', 'Model Name', 'Accuracy (%)']]

In [20]:
def load_baseline_results(path):

    with open(path, 'r') as f:
        results = json.load(f)

    experiment_dict = {
        'no_c': 'Original\nData',
        'c_cs': 'Chunk\nShuffle',
        'c_rl': 'Random\nLabel',
        'c_lbs': 'Batch\nLabel\nShuffle',
        'c_lbf': 'Batch\nLabel\nFlip',
        'c_ns': 'Added\nNoise',
        'c_no': 'Replace\nWith\nNoise',
    }

    def get_correct_accuracy(df):
        accuracy = df['Accuracy (%)'].map(lambda x: x[0]).copy()
        top5_accuracy = df['Accuracy (%)'].map(lambda x: x[1]).copy()
        index_top_5 = df['Dataset'].isin(['cifar100'])
        accuracy[index_top_5] = top5_accuracy[index_top_5]
        return accuracy

    results = (
        pd.concat(
            {
                k: pd.DataFrame(v).T for k, v in results.items()
            }, 
            axis=0
        )
        .melt(var_name='Run', value_name='Accuracy (%)', ignore_index=False)
        .rename_axis(['Dataset', 'Corruption Type'])   
        .reset_index()
        .assign(
            **{
                'Corruption Type': lambda x: x['Corruption Type'].map(experiment_dict)
            }
        )
        .assign(
            **{
                'Accuracy (%)': lambda x: get_correct_accuracy(x)*100
            }
        )
        .drop(columns=['Run'])
        .replace(
            {
                'Dataset': {
                'cifar10': 'CIFAR-10',
                'cifar100': 'CIFAR-100',
                'fmnist': 'F-MNIST'
                }
            }
        )
    )

    return results

In [21]:
idpa_path = os.path.join(
    results_path,
    'baseline',
    'idpa',
    'results.json'
)

idpa_results = (
    load_baseline_results(idpa_path)
    .assign(**{'Model Name': 'IDPA'})
)

In [22]:
coteaching_path = os.path.join(
    results_path,
    'baseline',
    'co-teaching',
    'results.json'
)

coteaching_results = (
    load_baseline_results(coteaching_path)
    .assign(**{'Model Name': 'Co-teaching'})
)

In [23]:
data_plot = pd.concat([data_plot, idpa_results, coteaching_results])

In [24]:
def bold_max_value(x, model_names):
    x = x.copy()
    len_cols = x.shape[0]
    n_models = len(model_names)
    idx_bold = (
        x
        [-n_models:]
        .str.replace(" ", "")
        .str.split("±")
        .str[0]
        .argmax()
    )
    x.iloc[idx_bold+len_cols-n_models] = '\\textbf{' + x.iloc[idx_bold+len_cols-n_models] + '}'
    
    return x

model_names = [
    'Standard Model',
    'Fed ARFL',
    'IDPA',
    'Co-teaching',
    'LAP Model (Ours)',
]

corruption_type_order = [
    'Original\nData',
    'Chunk\nShuffle',
    'Random\nLabel',
    'Batch\nLabel\nShuffle',
    'Batch\nLabel\nFlip',
    'Added\nNoise',
    'Replace\nWith\nNoise',
]

dataset_order = [
    'CIFAR-10',
    'CIFAR-100',
    'F-MNIST',
]

full_synthetic_results = (
    data_plot
    .loc[lambda x: x["Model Name"].isin(model_names)]
    .groupby(['Dataset', 'Corruption Type', 'Model Name'])
    .agg(['mean', 'std'])
    .assign(result = (
        lambda x: 
        x['Accuracy (%)']['mean'].map('{:.2f}'.format)
        + ' ± ' 
        + x['Accuracy (%)']['std'].map('{:.2f}'.format)
    ))
    ['result']
    .unstack(level='Model Name')
    .reindex(
        [
            (ds, ct)  for ds in dataset_order for ct in corruption_type_order
        ]
    )
    .reset_index()
    .rename_axis(index=None, columns=None)
    .replace(
        {
            "Corruption Type": {
                'Original\nData': 'Original Data',
                'Chunk\nShuffle': 'Chunk Shuffle',
                'Random\nLabel': 'Random Label',
                'Batch\nLabel\nShuffle': 'Batch Label Shuffle',
                'Batch\nLabel\nFlip': 'Batch Label Flip',
                'Added\nNoise': 'Added Noise',
                'Replace\nWith\nNoise': 'Replace With Noise',
            }
        }
    )
    [
        ['Dataset', 'Corruption Type'] + model_names
    ]
    .apply(
        bold_max_value,
        model_names = model_names,
        axis=1
    )
)

full_synthetic_results

Unnamed: 0,Dataset,Corruption Type,Standard Model,Fed ARFL,IDPA,Co-teaching,LAP Model (Ours)
0,CIFAR-10,Original Data,\textbf{67.89 ± 1.09},61.32 ± 1.13,64.17 ± 1.92,67.19 ± 0.86,67.82 ± 1.31
1,CIFAR-10,Chunk Shuffle,63.53 ± 1.55,58.01 ± 1.54,58.11 ± 1.30,62.39 ± 0.65,\textbf{64.22 ± 2.06}
2,CIFAR-10,Random Label,57.77 ± 1.52,57.71 ± 2.32,51.02 ± 2.10,54.35 ± 1.48,\textbf{62.74 ± 1.84}
3,CIFAR-10,Batch Label Shuffle,57.65 ± 1.74,59.38 ± 1.25,51.71 ± 1.59,58.66 ± 0.67,\textbf{63.06 ± 1.90}
4,CIFAR-10,Batch Label Flip,51.51 ± 2.43,59.21 ± 1.35,49.58 ± 1.67,51.82 ± 1.41,\textbf{63.21 ± 2.35}
5,CIFAR-10,Added Noise,57.30 ± 1.60,51.57 ± 2.21,53.43 ± 2.14,57.64 ± 0.71,\textbf{59.62 ± 1.91}
6,CIFAR-10,Replace With Noise,61.27 ± 2.66,55.19 ± 1.72,59.22 ± 1.66,\textbf{62.27 ± 1.02},61.52 ± 1.87
7,CIFAR-100,Original Data,\textbf{64.46 ± 0.93},58.25 ± 1.19,58.64 ± 0.96,61.42 ± 0.99,64.46 ± 0.93
8,CIFAR-100,Chunk Shuffle,61.87 ± 1.16,57.12 ± 0.90,53.49 ± 1.22,54.74 ± 2.15,\textbf{62.56 ± 0.61}
9,CIFAR-100,Random Label,55.90 ± 1.11,56.49 ± 1.83,44.03 ± 1.31,45.08 ± 1.61,\textbf{61.26 ± 1.02}


In [25]:
print(full_synthetic_results.to_latex(index=False))

\begin{tabular}{lllllll}
\toprule
Dataset & Corruption Type & Standard Model & Fed ARFL & IDPA & Co-teaching & LAP Model (Ours) \\
\midrule
CIFAR-10 & Original Data & \textbf{67.89 ± 1.09} & 61.32 ± 1.13 & 64.17 ± 1.92 & 67.19 ± 0.86 & 67.82 ± 1.31 \\
CIFAR-10 & Chunk Shuffle & 63.53 ± 1.55 & 58.01 ± 1.54 & 58.11 ± 1.30 & 62.39 ± 0.65 & \textbf{64.22 ± 2.06} \\
CIFAR-10 & Random Label & 57.77 ± 1.52 & 57.71 ± 2.32 & 51.02 ± 2.10 & 54.35 ± 1.48 & \textbf{62.74 ± 1.84} \\
CIFAR-10 & Batch Label Shuffle & 57.65 ± 1.74 & 59.38 ± 1.25 & 51.71 ± 1.59 & 58.66 ± 0.67 & \textbf{63.06 ± 1.90} \\
CIFAR-10 & Batch Label Flip & 51.51 ± 2.43 & 59.21 ± 1.35 & 49.58 ± 1.67 & 51.82 ± 1.41 & \textbf{63.21 ± 2.35} \\
CIFAR-10 & Added Noise & 57.30 ± 1.60 & 51.57 ± 2.21 & 53.43 ± 2.14 & 57.64 ± 0.71 & \textbf{59.62 ± 1.91} \\
CIFAR-10 & Replace With Noise & 61.27 ± 2.66 & 55.19 ± 1.72 & 59.22 ± 1.66 & \textbf{62.27 ± 1.02} & 61.52 ± 1.87 \\
CIFAR-100 & Original Data & \textbf{64.46 ± 0.93} & 58.25 ± 1.19 