In [1]:
import os
import shutil
from collections import defaultdict
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import pandas as pd
from IPython.display import display, HTML
import matplotlib.pyplot as plt
import os
import cv2
import shutil
from scipy.stats import mannwhitneyu

In [2]:
root = 'results/cifar/'
for run in os.listdir(root):
    if run == '0images':
        continue
    for folder in tqdm(os.listdir(root + run)):
        for file in os.listdir(root + run + '/' + folder):
            if file.endswith('.json'):
                number = file.replace('.json', '').replace('config-', '')
                try:
                    shutil.move(root + 'fed-job-{}.out'.format(number), root + folder)
                except:
                    pass
            if file.endswith('npy'):
                metric = file.replace('.npy', '')
                if os.path.exists(root + run + '/' + folder + '/' + metric + '.png'):
                    continue
                y = np.load(root + run + '/' + folder + '/' + file) * 100
                x = np.arange(1, len(y) + 1)
                mean = y.cumsum() / x
                label = ''
                if 'main' in metric:
                    label = 'Accuracy'
                else:
                    label = 'Backdoor accuracy'

                plt.rcParams.update({'font.size': 20})

                fig = plt.figure(figsize=(10, 6))
                plt.plot(x, y, label=label)
                if 'main' in metric:
                    label = 'Cumulative mean accuracy'
                else:
                    label = 'Cumulative mean of backdoor accuracy'

                plt.plot(x, mean, label=label)
                plt.xlabel('Number of rounds')
                plt.grid(True)
                plt.yticks(np.arange(0, 110, 10))
                plt.legend()
                plt.savefig(root + run + '/' + folder + '/' + metric + '.png', bbox_inches='tight')
                plt.close(fig)

100%|██████████| 192/192 [00:00<00:00, 24503.46it/s]
100%|██████████| 192/192 [00:00<00:00, 26185.42it/s]
100%|██████████| 192/192 [00:00<00:00, 23546.28it/s]
100%|██████████| 301/301 [00:00<00:00, 20517.21it/s]
100%|██████████| 192/192 [00:00<00:00, 18432.70it/s]
100%|██████████| 192/192 [00:00<00:00, 22331.78it/s]
100%|██████████| 192/192 [00:00<00:00, 25531.24it/s]
100%|██████████| 192/192 [00:00<00:00, 24907.41it/s]
100%|██████████| 192/192 [00:00<00:00, 21998.70it/s]
100%|██████████| 192/192 [00:00<00:00, 26264.84it/s]


In [3]:
krum_images = []


def load(root):
    result_map = defaultdict(list)
    for run in os.listdir(root):
        if run == '0images':
            continue
        for folder in os.listdir(root + run):
            if folder == '0images':
                continue
            accuracy_m = 0
            accuracy_s = 0
            for file in os.listdir(root + run + '/' + folder):
                if file.endswith('.npy'):
                    results = np.load(root + run + '/' + folder + '/' + file)
                    num = len(results) // 10
                    accuracy_m = np.median(results[-num:])
                    deg = '0.4'
                    if 'adni' in folder:
                        deg = '0'
                    if deg + '--krum--clean' in folder:
                        krum_images.append(root + run + '/' + folder + '/' + file.replace('.npy', '-no-legend.png'))
                    accuracy_s = results[-num:].std()
                    if file.startswith('backdoor'):
                        break
            result_map[folder].append((accuracy_m, accuracy_s))
    return result_map


result_map = load(root)

path = root + '0images/krum/'
os.makedirs(path, exist_ok=True)
for i, image in enumerate(krum_images):
    shutil.copy(image, path + '{}.png'.format(i))



In [4]:
data = []
for config in result_map:
    results = result_map[config]
    results = np.median(results, axis=0)
    a = list(filter(lambda x: x, config.split('-')))
    dataset = a[0]
    non_iid = a[1]
    aggregator = a[2]
    attack = a[3]
    fraction = 0
    if len(a) > 4:
        fraction = a[4]
    data.append((dataset, non_iid, aggregator, attack, fraction, *results))

df = pd.DataFrame(data, columns=['dataset', 'non_iid', 'aggregator', 'attack', 'fraction', 'accuracy_m', 'accuracy_s'])

df.to_csv(root + '0images/raw.csv')

In [5]:
def group_and_show(df, group_by, exclusions=None):
    if exclusions:
        df = df.drop(exclusions, axis=1)
    groups = df.groupby(group_by)
    frames = []
    for i, (_, frame) in enumerate(groups):

        if i != len(groups) - 1:
            frame = frame.append(pd.Series(dtype=object), ignore_index=True)
        frames.append(frame)

    frame = pd.concat(frames).fillna('').reset_index(drop=True)

    def fill(row):
        if row.dataset == '':
            return ['background-color: #e0e2e5'] * len(row)
        return [''] * len(row)

    display(frame.style.hide_index().set_properties(**{'font-size': '.8rem'}).apply(fill, axis=1))


### Comparison of aggregators with no attacks

In [6]:
clean_results = df[df['attack'] == 'clean']

group_and_show(clean_results, 'non_iid', ['attack', 'fraction'])
# group_and_show(clean_results, 'aggregator', ['attack', 'fraction'])

dataset,non_iid,aggregator,accuracy_m,accuracy_s
cifar,0.0,trimmedmean,0.78095,0.001522
cifar,0.0,fedavg,0.77495,0.001474
cifar,0.0,krum,0.697825,0.006362
cifar,0.0,median,0.776875,0.001717
,,,,
cifar,0.4,fedavg,0.7663,0.005551
cifar,0.4,median,0.7412,0.013033
cifar,0.4,trimmedmean,0.769325,0.005389
cifar,0.4,krum,0.6005,0.049205
,,,,


#
#
### Comparison of attacks on the baseline (fed averaging)


In [7]:
fed_avg_results = df[df['aggregator'] == 'fedavg']
fed_avg_results = fed_avg_results[fed_avg_results['attack'] != 'clean']

group_and_show(fed_avg_results, 'attack', ['aggregator'])

dataset,non_iid,attack,fraction,accuracy_m,accuracy_s
cifar,0.7,backdoor,0.1,0.941,0.007792
cifar,0.0,backdoor,0.1,0.918,0.006454
cifar,0.4,backdoor,0.3,0.956,0.004456
cifar,0.0,backdoor,0.3,0.952,0.003816
cifar,0.7,backdoor,0.3,0.964,0.00399
cifar,0.4,backdoor,0.5,0.968,0.003203
cifar,0.4,backdoor,0.1,0.928,0.008356
cifar,0.0,backdoor,0.5,0.964,0.002519
cifar,0.7,backdoor,0.5,0.973,0.003092
,,,,,


#
#
### Comparison of aggregators against attacks


In [8]:

defenses_results = df[df['attack'] != 'clean']

for non_iid, group in defenses_results.groupby('non_iid'):
    display(HTML('<br/><h3>{}</h3>'.format('non iid degree: {}'.format(non_iid))))
    group_and_show(group.sort_values('fraction'), 'attack', ['non_iid'])



dataset,aggregator,attack,fraction,accuracy_m,accuracy_s
cifar,median,backdoor,0.1,0.7905,0.011645
cifar,trimmedmean,backdoor,0.1,0.8595,0.00737
cifar,krum,backdoor,0.1,0.029,0.00757
cifar,fedavg,backdoor,0.1,0.918,0.006454
cifar,median,backdoor,0.3,0.923,0.00432
cifar,trimmedmean,backdoor,0.3,0.935,0.003526
cifar,krum,backdoor,0.3,0.027,0.007343
cifar,fedavg,backdoor,0.3,0.952,0.003816
cifar,median,backdoor,0.5,0.955,0.002333
cifar,krum,backdoor,0.5,0.03,0.008362


dataset,aggregator,attack,fraction,accuracy_m,accuracy_s
cifar,fedavg,backdoor,0.1,0.928,0.008356
cifar,krum,backdoor,0.1,0.0325,0.025203
cifar,trimmedmean,backdoor,0.1,0.89,0.007928
cifar,median,backdoor,0.1,0.802,0.02316
cifar,fedavg,backdoor,0.3,0.956,0.004456
cifar,median,backdoor,0.3,0.9375,0.0058
cifar,krum,backdoor,0.3,0.0335,0.026181
cifar,trimmedmean,backdoor,0.3,0.95,0.004747
cifar,trimmedmean,backdoor,0.5,0.967,0.0041
cifar,krum,backdoor,0.5,0.042,0.035267


dataset,aggregator,attack,fraction,accuracy_m,accuracy_s
cifar,krum,backdoor,0.1,0.0535,0.045838
cifar,median,backdoor,0.1,0.865,0.024648
cifar,fedavg,backdoor,0.1,0.941,0.007792
cifar,trimmedmean,backdoor,0.1,0.919,0.007611
cifar,krum,backdoor,0.3,0.0465,0.052489
cifar,median,backdoor,0.3,0.955,0.006023
cifar,fedavg,backdoor,0.3,0.964,0.00399
cifar,trimmedmean,backdoor,0.3,0.96,0.005472
cifar,krum,backdoor,0.5,0.0895,0.085991
cifar,trimmedmean,backdoor,0.5,0.976,0.005106


In [9]:

def autolabel(rects):
    """
    Attach a text label above each bar displaying its height
    """
    for rect in rects:
        height = rect.get_height()
        plt.text(rect.get_x() + rect.get_width() / 2., 1.03 * height,
                 '%d' % int(np.round(height)),
                 ha='center', va='bottom')


In [10]:

fed_avg_results = df[df['aggregator'] == 'fedavg']
# fed_avg_results = fed_avg_results[fed_avg_results['attack']!='clean']
fed_avg_clean = fed_avg_results[fed_avg_results['attack'] == 'clean']

print('fed avg summary')
for attack, attack_group in fed_avg_results[fed_avg_results['attack'] != 'clean'].groupby('attack'):
    attack_group = attack_group[attack_group['fraction'] == '0.5']
    if attack == 'backdoor':
        print(attack, np.round(attack_group['accuracy_m'].mean() * 100, 2))
        continue
    print(attack, np.round((attack_group['accuracy_m'] - fed_avg_clean['accuracy_m'].to_numpy()).mean() * 100, 2),
          np.round((attack_group['accuracy_m'] - fed_avg_clean['accuracy_m'].to_numpy()).std() * 100, 2))

# plt.bar()
attack_data = {}
for attack, attack_group in fed_avg_results.groupby('attack'):
    y = {}
    for non_iid, non_iid_group in attack_group.groupby('non_iid'):
        non_iid_group = non_iid_group.sort_values('fraction')
        y[non_iid] = non_iid_group['accuracy_m'].to_numpy() * 100
    attack_data[attack] = y

clean = attack_data['clean']
for attack in attack_data:
    if attack == 'clean' or attack == 'backdoor':
        continue
    data = attack_data[attack]
    for non_iid in clean:
        data[non_iid] = np.concatenate((clean[non_iid], data[non_iid]))

del attack_data['clean']

width = 0.22

x_ticks = ['Clean', '0.1', '0.3', '0.5']
y_label = 'Accuracy'
x_label = 'Proportion of affected clients'
path = root + '0images/fedavg/'
os.makedirs(path, exist_ok=True)

plt.rcParams.update({'font.size': 30})

for attack in attack_data:
    fig = plt.figure(figsize=(15, 8))
    data = attack_data[attack]
    for i, non_iid in enumerate(data):
        accuracy_values = data[non_iid]
        x = np.arange(len(accuracy_values))
        autolabel(plt.bar(x + width * i, accuracy_values, width, label='non iid: ' + non_iid))
    x1, x2, y1, y2 = plt.axis()
    plt.axis((x1, x2, y1, y2 + 8))
    plt.xlabel(x_label)
    if 'backdoor' in attack:
        plt.ylabel('Backdoor accuracy')
    else:
        plt.ylabel(y_label)
    plt.xticks(x + width, x_ticks[-len(x):])
    plt.legend(loc="lower left")
    file = path + attack + '.png'
    plt.savefig(file, bbox_inches='tight')
    plt.close(fig)



fed avg summary
backdoor 96.83
deletedata -4.3 0.7
labelflip -39.02 7.81
noisedata -5.66 1.05
overlapdata -0.71 3.29
randomupdate -66.13 1.67
signflip -66.13 1.67
unbalancedata -0.64 1.01


In [11]:

clean_results = df[df['attack'] == 'clean']

# plt.bar()
clean_data = {}
for aggregator, aggregator_group in clean_results.groupby('aggregator'):
    aggregator_group = aggregator_group.sort_values('non_iid')
    clean_data[aggregator] = aggregator_group['accuracy_m'].to_numpy() * 100

width = 0.18

x_ticks = ['0', '0.4', '0.7']
y_label = 'Accuracy'
x_label = 'Non iid degree'

file = root + '0images/clean.png'

plt.rcParams.update({'font.size': 22})

fig = plt.figure(figsize=(10, 6))

if 'adni' in root:
    x = [aggregator for aggregator in clean_data]
    y = [clean_data[aggregator][0] for aggregator in clean_data]
    bars = plt.bar(x, y, 0.4)
    for i, bar in enumerate(bars):
        bar.set_color('C{}'.format(i))
    autolabel(bars)

else:
    for i, aggregator in enumerate(clean_data):
        accuracy_values = clean_data[aggregator]
        x = np.arange(len(accuracy_values))
        autolabel(plt.bar(x + width * i, accuracy_values, width, label=aggregator))
    plt.xticks(x + width * 1.5, x_ticks[-len(x):])
    plt.xlabel(x_label)
    plt.legend(loc="lower left")

x1, x2, y1, y2 = plt.axis()
plt.axis((x1, x2, y1, y2 + 8))
plt.ylabel(y_label)
plt.savefig(file, bbox_inches='tight')
plt.close(fig)


In [12]:
if 'adni' in root:
    defenses_results = df[df['attack'] != 'clean']
else:
    defenses_results = df[
        (df['attack'] == 'labelflip') |
        (df['attack'] == 'randomupdate') |
        (df['attack'] == 'signflip') |
        (df['attack'] == 'backdoor')
        ]

width = 0.2

x_ticks = ['0.1', '0.3', '0.5']
y_label = 'Accuracy'
x_label = 'Proportion of affected clients'

path = root + '0images/defences/'
plt.rcParams.update({'font.size': 30})

for non_iid, non_iid_group in defenses_results.groupby('non_iid'):
    p = path + non_iid + '/'
    os.makedirs(p, exist_ok=True)
    for attack, attack_group in non_iid_group.groupby('attack'):
        fig = plt.figure(figsize=(15, 8))
        for i, (aggregator, aggregator_group) in enumerate(attack_group.groupby('aggregator')):
            aggregator_group = aggregator_group.sort_values('fraction')
            accuracy_values = aggregator_group['accuracy_m'].to_numpy() * 100
            x = np.arange(len(accuracy_values))
            autolabel(plt.bar(x + width * i, accuracy_values, width, label=aggregator))
        x1, x2, y1, y2 = plt.axis()
        plt.axis((x1, x2, y1, y2 + 8))
        plt.xlabel(x_label)
        if 'backdoor' in attack:
            plt.ylabel('Backdoor accuracy')
        else:
            plt.ylabel(y_label)
        plt.xticks(x + width * 1.5, x_ticks[-len(x):])
        plt.legend(loc="lower left")
        file = p + attack + '.png'
        plt.savefig(file, bbox_inches='tight')
        plt.close(fig)


In [24]:
all_confs = defenses_results
if 'adni' in root:
    all_confs = df[
        (df['attack'] == 'labelflip') |
        (df['attack'] == 'randomupdate') |
        (df['attack'] == 'signflip') |
        (df['attack'] == 'overlapdata') |
        (df['attack'] == 'backdoor')
        ]

print('aggregator summary')


def summary(all_confs):
    rank_data = all_confs[all_confs['attack'] != 'backdoor']
    ranks = defaultdict(int)
    for non_iid, non_iid_group in rank_data.groupby('non_iid'):
        for attack, attack_group in non_iid_group.groupby('attack'):
            for fraction, fraction_group in attack_group.groupby('fraction'):
                ranks[fraction_group.loc[fraction_group['accuracy_m'].idxmax()]['aggregator']] += 1

    print(ranks)

    print('non targeted attacks\ntechnique\tmean\t\tmedian\t\tstd')
    for aggregator, aggregator_group in all_confs[all_confs['attack'] != 'backdoor'].groupby('aggregator'):
        print(aggregator, '\t\t',  np.round(aggregator_group['accuracy_m'].to_numpy().mean() * 100, 2), '\t\t',np.round(np.median(aggregator_group['accuracy_m'].to_numpy()) * 100, 2), '\t\t',
              np.round(aggregator_group['accuracy_m'].to_numpy().std() * 100, 2))

    print('\n')

    print('targeted\ntechnique\tmean\t\tmedian\t\tstd')
    for aggregator, aggregator_group in all_confs[all_confs['attack'] == 'backdoor'].groupby('aggregator'):
        print(aggregator, '\t\t',  np.round(aggregator_group['accuracy_m'].to_numpy().mean() * 100, 2), '\t\t',np.round(np.median(aggregator_group['accuracy_m'].to_numpy()) * 100, 2), '\t\t',
              np.round(aggregator_group['accuracy_m'].to_numpy().std() * 100, 2))

summary(all_confs)


aggregator summary
defaultdict(<class 'int'>, {'fedavg': 8, 'median': 6, 'krum': 12, 'trimmedmean': 1})
non targeted attacks
technique	mean		median		std
fedavg 		 26.31 		 10.0 		 24.88
krum 		 51.09 		 55.05 		 16.66
median 		 37.04 		 27.59 		 27.32
trimmedmean 		 24.61 		 10.0 		 23.2


targeted
technique	mean		median		std
fedavg 		 95.16 		 95.6 		 1.78
krum 		 4.26 		 3.35 		 1.86
median 		 90.72 		 93.75 		 6.65
trimmedmean 		 93.56 		 95.0 		 3.71


In [30]:

cifar_raw = pd.read_csv('results/cifar/0images/raw.csv')
cifar_raw = cifar_raw[
    (cifar_raw['non_iid'] == 0.4) & ((cifar_raw['attack'] == 'labelflip') | (cifar_raw['attack'] == 'signflip'))]
adni_raw = pd.read_csv('results/adni/0images/raw.csv')
adni_raw = adni_raw[(adni_raw['attack'] == 'labelflip') | (adni_raw['attack'] == 'signflip')]
comb_cifar_raw = pd.read_csv('results/ensemble/cifar/0images/raw.csv')
comb_adni_raw = pd.read_csv('results/ensemble/adni/0images/raw.csv')
comb_adni_raw['non_iid'] = comb_adni_raw['non_iid'].apply(lambda x: 0)
cifar_raw = pd.concat([cifar_raw, comb_cifar_raw])
adni_raw = pd.concat([adni_raw, comb_adni_raw])

table = []
for dataset, raw in [('CIFAR-10', cifar_raw), ('ADNI', adni_raw)]:
    columns = []
    for attack, attack_group in raw.groupby('attack'):
        attack_group = attack_group.sort_values('fraction')
        for fraction, fraction_group in attack_group.groupby('fraction'):
            fraction_group = fraction_group.sort_values('aggregator')
            columns.append(fraction_group['accuracy_m'].to_numpy() * 100)

    columns = np.column_stack(columns)
    print(dataset)
    print('Ensemble', 'FedAvg', 'Krum', 'Median', 'Tri-Mean')
    print('mean', columns.mean(axis=1))
    print('median', np.median(columns, axis=1))
    print('std', columns.std(axis=1))
    columns = np.column_stack([columns, columns.mean(axis=1)])

    columns = np.round(columns, 2)

    indices = columns.argmax(axis=0)

    columns = columns.astype(np.str)

    for i, index in enumerate(indices):
        columns[index, i] = '\\cellcolor{gray!25}' + columns[index, i]
    columns = np.column_stack([[''] * 5, ['Ensemble', 'FedAvg', 'Krum', 'Median', 'Tri-Mean'], columns])

    columns[0, 0] = '\\multirow{5}{*}{' + dataset + '}'

    table.append(columns)
table = np.row_stack(table)

print()
print('\\\\\n'.join(map(lambda row: ' & '.join(row), table)))


CIFAR-10
Ensemble FedAvg Krum Median Tri-Mean
mean [56.75499998 34.63166667 50.11083347 40.50499996 32.34083392]
median [62.68875077 23.20000045 54.30999994 40.86124972 20.92125006]
std [21.53931216 27.24774375 11.70264733 25.92115812 25.42230343]
ADNI
Ensemble FedAvg Krum Median Tri-Mean
mean [64.64009707 47.46497646 60.47222205 58.44685957 59.10024146]
median [65.75724706 44.2572467  62.26449236 64.38405663 64.84420374]
std [ 7.22663666 13.72830154  7.29617855 13.65128441 13.6188691 ]

\multirow{5}{*}{CIFAR-10} & Ensemble & 73.31 & 64.85 & \cellcolor{gray!25}60.53 & \cellcolor{gray!25}72.08 & 59.77 & 10.0 & \cellcolor{gray!25}56.75\\
 & FedAvg & \cellcolor{gray!25}74.58 & \cellcolor{gray!25}66.81 & 36.4 & 10.0 & 10.0 & 10.0 & 34.63\\
 & Krum & 49.49 & 41.02 & 29.41 & 61.0 & \cellcolor{gray!25}60.62 & \cellcolor{gray!25}59.13 & 50.11\\
 & Median & 70.34 & 54.13 & 27.59 & 70.96 & 10.0 & 10.0 & 40.5\\
 & Tri-Mean & 73.2 & 59.01 & 31.84 & 10.0 & 10.0 & 10.0 & 32.34\\
\multirow{5}{*}{ADNI

In [15]:
def calc_test(base_map, best_choice, second_choice, attacks):
    best = {}
    second = {}
    for key in base_map:
        # if 'fedavg' in key and '0.5' in key:
        #     for attack in attacks:
        #         if attack in key:
        #             best[key] = base_map[key]
        #             second[key] = base_map['--'.join(key.split('--')[:-1])+'--clean']
        for attack in attacks:
            if attack in key:
                if best_choice in key:
                    best[key.replace(best_choice, '')] = base_map[key]
                elif second_choice[attack] in key:
                    second[key.replace(second_choice[attack], '')] = base_map[key]
    results = {
        'Better Different': 0,
        'Better Same': 0,
        'Worse Same': 0,
        'Worse Different': 0,
    }
    for key in best:
        a = best[key]
        b = second[key]
        a = list(map(lambda x: x[0], a))
        b = list(map(lambda x: x[0], b))
        better = np.median(a) > np.median(b)
        if 'backdoor' in key:
            better = !better
        better_text = 'Better' if better else 'Worse'
        # print('')
        # print(key)
        stat, p = mannwhitneyu(a, b)
        # print('Statistics=%.3f, p=%.3f' % (stat, p))
        # interpret
        alpha = 0.05
        if p > alpha:
            # print(better_text, 'Same distribution (fail to reject H0)')
            results[better_text + ' Same'] += 1
        else:
            # print(better_text, 'Different distribution (reject H0)')
            results[better_text + ' Different'] += 1
    print(results)

cifar_map = load('results/cifar/')
mnist_map = load('results/fashion-mnist/')
adni_map = load('results/adni/')


# second_choice = {'backdoor': 'krum',
#                  'overlapdata': 'trimmedmean',
#                  'labelflip-0.1': 'trimmedmean',
#                  'labelflip-0.3': 'trimmedmean',
#                  'labelflip-0.5': 'trimmedmean',
#                  'randomupdate-0.1': 'trimmedmean',
#                  'randomupdate-0.3': 'trimmedmean',
#                  'randomupdate-0.5': 'krum',
#                  'signflip-0.1': 'trimmedmean',
#                  'signflip-0.3': 'trimmedmean',
#                  'signflip-0.5': 'krum'}
#
# cifar_comb_map = load('results/ensemble/cifar/')
# adni_comb_map = load('results/ensemble/adni/')
# tmp = {}
# for key in adni_map:
#     tmp[key.replace('adni-0', 'adni-0.4')] = adni_map[key]
# adni_map = tmp
# cifar_map = dict(cifar_map,  **cifar_comb_map)
# adni_map = dict(adni_map,  **adni_comb_map)
#