In [86]:
import os
import shutil
from collections import defaultdict
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import pandas as pd
from IPython.display import display, HTML
import matplotlib.pyplot as plt
import os
import cv2
import shutil
from scipy.stats import mannwhitneyu

In [87]:
root = 'results/adni/'
for run in os.listdir(root):
    if run == '0images':
            continue
    for folder in tqdm(os.listdir(root + run)):
        for file in os.listdir(root + run + '/' + folder):
            if file.endswith('.json'):
                number = file.replace('.json', '').replace('config-', '')
                try:
                    shutil.move(root + 'fed-job-{}.out'.format(number), root + folder)
                except:
                    pass
            if file.endswith('npy'):
                metric = file.replace('.npy', '')
                if os.path.exists(root + run + '/' + folder + '/' + metric + '.png'):
                    continue
                y = np.load(root + run + '/' + folder + '/' + file)
                x = np.arange(1, len(y) + 1)
                mean = y.cumsum() / x
                fig = plt.figure()
                plt.plot(x, y, label=metric.replace('_', ' '))
                plt.plot(x, mean, label=metric.replace('_', ' ') + ' mean')
                plt.xlabel('num of round')
                plt.grid(True)
                plt.yticks(np.arange(0, 1, 0.1))
                plt.legend()
                plt.savefig(root + run + '/' + folder + '/' + metric + '.png', bbox_inches='tight')
                plt.close(fig)

100%|██████████| 101/101 [00:00<00:00, 2589.06it/s]
100%|██████████| 100/100 [00:00<00:00, 2859.45it/s]
100%|██████████| 101/101 [00:00<00:00, 3255.50it/s]
100%|██████████| 101/101 [00:00<00:00, 3258.38it/s]
100%|██████████| 100/100 [00:00<00:00, 3122.53it/s]
100%|██████████| 100/100 [00:00<00:00, 3330.69it/s]
100%|██████████| 100/100 [00:00<00:00, 3330.32it/s]
100%|██████████| 100/100 [00:00<00:00, 3333.89it/s]
100%|██████████| 100/100 [00:00<00:00, 3225.74it/s]
100%|██████████| 100/100 [00:00<00:00, 3223.31it/s]


In [88]:
krum_images = []

def load(root):
    result_map = defaultdict(list)
    for run in os.listdir(root):
        if run == '0images':
                continue
        for folder in os.listdir(root + run):
            if folder == '0images':
                continue
            accuracy_m = 0
            accuracy_s = 0
            for file in os.listdir(root + run + '/' + folder):
                if file.endswith('.npy'):
                    results = np.load(root + run + '/' + folder + '/' + file)
                    num = max(20, len(results) // 10)
                    accuracy_m = np.median(results[-num:])
                    deg = '0.4'
                    if 'adni' in folder:
                        deg = '0'
                    if deg+'--krum--clean' in folder:
                        krum_images.append(root + run + '/' + folder + '/' + file.replace('.npy', '.png'))
                    accuracy_s = results[-num:].std()
                    if file.startswith('backdoor'):
                        break
            result_map[folder].append((accuracy_m, accuracy_s))
    return result_map
result_map = load(root)

path = root+'0images/krum/'
os.makedirs(path,exist_ok=True)
for i, image in enumerate(krum_images):
    shutil.copy(image, path+'{}.png'.format(i))



In [89]:
data = []
for config in result_map:
    results = result_map[config]
    results = np.median(results, axis=0)
    a = list(filter(lambda x: x, config.split('-')))
    dataset = a[0]
    non_iid = a[1]
    aggregator = a[2]
    attack = a[3]
    fraction = 0
    if len(a) > 4:
        fraction = a[4]
    data.append((dataset, non_iid, aggregator, attack, fraction, *results))

df = pd.DataFrame(data, columns=['dataset', 'non_iid', 'aggregator', 'attack', 'fraction', 'accuracy_m', 'accuracy_s'])

df.to_csv(root + '0images/raw.csv')

In [90]:
def group_and_show(df, group_by, exclusions=None):
    if exclusions:
        df = df.drop(exclusions, axis=1)
    groups = df.groupby(group_by)
    frames = []
    for i , (_, frame) in enumerate(groups):

        if i!= len(groups)-1:
            frame = frame.append(pd.Series(dtype=object), ignore_index=True)
        frames.append(frame)

    frame = pd.concat(frames).fillna('').reset_index(drop=True)

    def fill(row):
        if row.dataset == '':
            return ['background-color: #e0e2e5'] * len(row)
        return [''] * len(row)

    display(frame.style.hide_index().set_properties(**{'font-size': '.8rem'}).apply(fill, axis=1))


### Comparison of aggregators with no attacks

In [91]:
clean_results =  df[df['attack'] == 'clean']

group_and_show(clean_results, 'non_iid', ['attack', 'fraction'])
# group_and_show(clean_results, 'aggregator', ['attack', 'fraction'])

dataset,non_iid,aggregator,accuracy_m,accuracy_s
adni,0,fedavg,0.747754,0.001314
adni,0,krum,0.634565,0.001112
adni,0,median,0.75413,0.001623
adni,0,trimmedmean,0.754203,0.001174


#
#
### Comparison of attacks on the baseline (fed averaging)


In [92]:
fed_avg_results =  df[df['aggregator'] == 'fedavg']
fed_avg_results = fed_avg_results[fed_avg_results['attack']!='clean']

group_and_show(fed_avg_results, 'attack', ['aggregator'])

dataset,non_iid,attack,fraction,accuracy_m,accuracy_s
adni,0.0,backdoor,0.1,0.985465,0.085722
adni,0.0,backdoor,0.3,1.0,0.0
adni,0.0,backdoor,0.5,1.0,0.0
,,,,,
adni,0.0,deletedata,0.1,0.751884,0.001447
adni,0.0,deletedata,0.3,0.749565,0.001288
adni,0.0,deletedata,0.5,0.741739,0.001475
,,,,,
adni,0.0,labelflip,0.1,0.690217,0.001519
adni,0.0,labelflip,0.3,0.581377,0.002093


#
#
### Comparison of aggregators against attacks


In [93]:

defenses_results =  df[df['attack']!='clean']

for non_iid, group in defenses_results.groupby('non_iid'):
    display(HTML('<br/><h3>{}</h3>'.format('non iid degree: {}'.format(non_iid))))
    group_and_show(group.sort_values('fraction'), 'attack', ['non_iid'])



dataset,aggregator,attack,fraction,accuracy_m,accuracy_s
adni,fedavg,backdoor,0.1,0.985465,0.085722
adni,krum,backdoor,0.1,0.505814,0.004243
adni,median,backdoor,0.1,0.921512,0.003466
adni,trimmedmean,backdoor,0.1,0.952035,0.005169
adni,median,backdoor,0.3,0.985465,0.001506
adni,trimmedmean,backdoor,0.3,0.985465,0.002318
adni,fedavg,backdoor,0.3,1.0,0.0
adni,krum,backdoor,0.3,0.5,0.018605
adni,trimmedmean,backdoor,0.5,1.0,0.0
adni,fedavg,backdoor,0.5,1.0,0.0


In [94]:

def autolabel(rects):
    """
    Attach a text label above each bar displaying its height
    """
    for rect in rects:
        height = rect.get_height()
        plt.text(rect.get_x() + rect.get_width()/2., 1.03 * height,
                '%d' % int(np.round(height)),
                ha='center', va='bottom')


In [95]:

fed_avg_results =  df[df['aggregator'] == 'fedavg']
# fed_avg_results = fed_avg_results[fed_avg_results['attack']!='clean']
fed_avg_clean = fed_avg_results[fed_avg_results['attack']=='clean']

print('fed avg summary')
for attack, attack_group in fed_avg_results[fed_avg_results['attack']!='clean'].groupby('attack'):
    attack_group = attack_group[attack_group['fraction'] == '0.5']
    if attack == 'backdoor':
        print(attack, np.round(attack_group['accuracy_m'].mean()*100, 2))
        continue
    print(attack, np.round((fed_avg_clean['accuracy_m'].to_numpy() - attack_group['accuracy_m']).mean()*100, 2))

# plt.bar()
attack_data = {}
for attack, attack_group in fed_avg_results.groupby('attack'):
    y = {}
    for non_iid, non_iid_group in attack_group.groupby('non_iid'):
        y[non_iid] = non_iid_group['accuracy_m'].to_numpy() * 100
    attack_data[attack] = y

clean = attack_data['clean']
for attack in attack_data:
    if attack == 'clean' or attack == 'backdoor':
        continue
    data = attack_data[attack]
    for non_iid in clean:
        data[non_iid] = np.concatenate((clean[non_iid], data[non_iid]))

del attack_data['clean']

width = 0.22

x_ticks = ['Clean', '0.1', '0.3', '0.5']
y_label = 'Accuracy'
x_label = 'Fraction of malicious devices'
path = root + '0images/fedavg/'
os.makedirs(path, exist_ok=True)

plt.rcParams.update({'font.size': 13})

for attack in attack_data:
    fig = plt.figure()
    data = attack_data[attack]
    for i, non_iid in enumerate(data):
        accuracy_values = data[non_iid]
        x = np.arange(len(accuracy_values))
        autolabel(plt.bar(x + width * i, accuracy_values, width, label='non iid: ' + non_iid))
    x1,x2,y1,y2 = plt.axis()
    plt.axis((x1,x2,y1 ,y2 + 5))
    plt.xlabel(x_label)
    plt.ylabel(y_label)
    plt.xticks(x + width, x_ticks[-len(x):])
    plt.legend(loc="lower left")
    file = path + attack + '.png'
    plt.savefig(file, bbox_inches='tight')
    plt.close(fig)



fed avg summary
backdoor 100.0
deletedata 0.6
labelflip 20.75
noisedata 3.62
overlapdata 28.83
randomupdate 10.75
signflip 40.28
unbalancedata 1.16


In [96]:

clean_results =  df[df['attack'] == 'clean']

# plt.bar()
clean_data = {}
for aggregator, aggregator_group in clean_results.groupby('aggregator'):
    clean_data[aggregator] = aggregator_group['accuracy_m'].to_numpy() * 100


width = 0.15


x_ticks = ['0', '0.4', '0.7']
y_label = 'Accuracy'
x_label = 'Non iid degree'

file = root + '0images/clean.png'

plt.rcParams.update({'font.size': 15})

fig = plt.figure(figsize=(10, 6))

if 'adni' in root:
    x = [aggregator for aggregator in clean_data]
    y = [clean_data[aggregator][0] for aggregator in clean_data]
    bars = plt.bar(x, y, 0.4)
    for i, bar in enumerate(bars):
        bar.set_color('C{}'.format(i))
    autolabel(bars)

else:
    for i, aggregator in enumerate(clean_data):
        accuracy_values = clean_data[aggregator]
        x = np.arange(len(accuracy_values))
        autolabel(plt.bar(x + width * i, accuracy_values, width, label=aggregator))
    plt.xticks(x + width, x_ticks[-len(x):])
    plt.xlabel(x_label)
    plt.legend(loc="lower left")

x1,x2,y1,y2 = plt.axis()
plt.axis((x1,x2,y1 ,y2 + 5))
plt.ylabel(y_label)
plt.savefig(file, bbox_inches='tight')
plt.close(fig)


In [97]:
if 'adni' in root:
    defenses_results = df[df['attack']!='clean']
else:
    defenses_results =  df[
    (df['attack']=='labelflip') |
    (df['attack'] == 'randomupdate') |
    (df['attack'] == 'signflip') |
    (df['attack'] == 'backdoor')
    ]

width = 0.2

x_ticks = ['0.1', '0.3', '0.5']
y_label = 'Accuracy'
x_label = 'Fraction of malicious devices'

path = root + '0images/defences/'
plt.rcParams.update({'font.size': 13})

for non_iid, non_iid_group in defenses_results.groupby('non_iid'):
    p = path+non_iid+'/'
    os.makedirs(p, exist_ok=True)
    for attack, attack_group in non_iid_group.groupby('attack'):
        fig = plt.figure()
        for i, (aggregator, aggregator_group) in enumerate(attack_group.groupby('aggregator')):
            accuracy_values = aggregator_group['accuracy_m'].to_numpy()*100
            x = np.arange(len(accuracy_values))
            autolabel(plt.bar(x + width * i, accuracy_values, width, label=aggregator))
        x1,x2,y1,y2 = plt.axis()
        plt.axis((x1,x2,y1 ,y2 + 5))
        plt.xlabel(x_label)
        plt.ylabel(y_label)
        plt.xticks(x + width, x_ticks[-len(x):])
        plt.legend(loc="lower left")
        file = p + attack + '.png'
        plt.savefig(file, bbox_inches='tight')
        plt.close(fig)


In [98]:
all_confs = defenses_results
if 'adni' in root:
    all_confs = df[
    (df['attack']=='labelflip') |
    (df['attack'] == 'randomupdate') |
    (df['attack'] == 'signflip') |
    (df['attack'] == 'overlapdata') |
    (df['attack'] == 'backdoor')
    ]


print('aggregator summary')

def summary(all_confs):
    rank_data = all_confs[all_confs['attack']!='backdoor']
    ranks = defaultdict(int)
    for non_iid, non_iid_group in rank_data.groupby('non_iid'):
        for attack, attack_group in non_iid_group.groupby('attack'):
            for fraction, fraction_group in attack_group.groupby('fraction'):
                ranks[fraction_group.loc[fraction_group['accuracy_m'].idxmax()]['aggregator']]+=1

    print(ranks)


    print('non targeted attacks')
    for aggregator, aggregator_group in all_confs[all_confs['attack']!='backdoor'].groupby('aggregator'):
        print(aggregator, '\t\t', np.round(aggregator_group['accuracy_m'].to_numpy().mean()*100, 2))

    print('\n')

    print('targeted')
    for aggregator, aggregator_group in all_confs[all_confs['attack']=='backdoor'].groupby('aggregator'):
        print(aggregator, '\t\t', np.round(aggregator_group['accuracy_m'].to_numpy().mean()*100, 2))


summary(all_confs)


aggregator summary
defaultdict(<class 'int'>, {'median': 4, 'fedavg': 1, 'trimmedmean': 5, 'krum': 2})
non targeted attacks
fedavg 		 52.92
krum 		 63.24
median 		 64.91
trimmedmean 		 65.08


targeted
fedavg 		 99.52
krum 		 53.88
median 		 96.9
trimmedmean 		 97.92


In [99]:

cifar_raw = pd.read_csv('results/cifar/0images/raw.csv')
cifar_raw = cifar_raw[(cifar_raw['non_iid'] == 0.4) & ((cifar_raw['attack'] == 'labelflip') | (cifar_raw['attack'] == 'signflip'))]
adni_raw = pd.read_csv('results/adni/0images/raw.csv')
adni_raw = adni_raw[(adni_raw['attack']=='labelflip') | (adni_raw['attack']=='signflip')]
comb_cifar_raw = pd.read_csv('results/combination/cifar/0images/raw.csv')
comb_adni_raw = pd.read_csv('results/combination/adni/0images/raw.csv')
comb_adni_raw['non_iid'] = comb_adni_raw['non_iid'].apply(lambda x: 0)
cifar_raw = pd.concat([cifar_raw, comb_cifar_raw])
adni_raw = pd.concat([adni_raw, comb_adni_raw])


table = []
for dataset, raw in [('CIFAR-10', cifar_raw), ('ADNI', adni_raw)]:
    columns = []
    for attack, attack_group in raw.groupby('attack'):
        for fraction, fraction_group in attack_group.groupby('fraction'):
            columns.append(fraction_group['accuracy_m'].to_numpy()*100)

    columns = np.column_stack(columns)

    columns = np.column_stack([columns, columns.mean(axis=1)])

    columns = np.round(columns, 2)

    indices = columns.argmax(axis=0)

    columns = columns.astype(np.str)

    for i, index in enumerate(indices):
        columns[index, i] = '\\cellcolor{gray!25}' + columns[index, i]
    columns = np.column_stack([['']*5, ['FedAvg', 'Krum', 'Median', 'Tri-Mean', 'Ensemble'], columns])

    columns[0,0] = '\\multirow{5}{*}{' + dataset + '}'

    table.append(columns)
table = np.row_stack(table)

print('\\\\\n'.join(map(lambda row: ' & '.join(row), table)))


\multirow{5}{*}{CIFAR-10} & FedAvg & \cellcolor{gray!25}74.58 & \cellcolor{gray!25}66.81 & 36.4 & 10.0 & 10.0 & 10.0 & 34.63\\
 & Krum & 49.49 & 41.02 & 29.41 & 61.0 & \cellcolor{gray!25}60.62 & \cellcolor{gray!25}59.13 & 50.11\\
 & Median & 70.34 & 54.13 & 27.59 & 70.96 & 10.0 & 10.0 & 40.5\\
 & Tri-Mean & 73.2 & 59.01 & 31.84 & 10.0 & 10.0 & 10.0 & 32.34\\
 & Ensemble & 73.31 & 64.85 & \cellcolor{gray!25}60.53 & \cellcolor{gray!25}72.08 & 59.77 & 10.0 & \cellcolor{gray!25}56.75\\
\multirow{5}{*}{ADNI} & FedAvg & 69.02 & 58.14 & 54.03 & 34.49 & 34.49 & 34.49 & 47.44\\
 & Krum & 61.06 & 59.04 & 46.67 & 63.4 & 65.93 & \cellcolor{gray!25}67.43 & 60.59\\
 & Median & 69.59 & 60.98 & 46.34 & 71.61 & 68.11 & 34.49 & 58.52\\
 & Tri-Mean & 68.97 & 60.49 & 48.43 & \cellcolor{gray!25}72.78 & \cellcolor{gray!25}68.91 & 34.49 & 59.01\\
 & Ensemble & \cellcolor{gray!25}70.02 & \cellcolor{gray!25}65.44 & \cellcolor{gray!25}66.16 & 71.48 & 65.64 & 49.7 & \cellcolor{gray!25}64.74


In [122]:

cifar_map = load('results/cifar/')
adni_map = load('results/adni/')
cifar_comb_map = load('results/combination/cifar/')
adni_comb_map = load('results/combination/adni/')

temp ={}
for key in adni_map:
    if (key == 'adni-0--median--labelflip-0.1' or
        key == 'adni-0--median--labelflip-0.3' or
        key == 'adni-0--fedavg--labelflip-0.5' or
        key == 'adni-0--trimmedmean--signflip-0.3' or
        key == 'adni-0--krum--signflip-0.5' or
        key == 'adni-0--trimmedmean--signflip-0.1'):
        temp[key.split('--')[-1]] = adni_map[key]
adni_map = temp

temp ={}
for key in adni_comb_map:
    temp[key.split('--')[-1]] = adni_comb_map[key]
adni_comb_map = temp


temp ={}
for key in cifar_map:
    if (key == 'cifar-0.4--fedavg--labelflip-0.1' or
        key == 'cifar-0.4--fedavg--labelflip-0.3' or
        key == 'cifar-0.4--fedavg--labelflip-0.5' or
        key == 'cifar-0.4--krum--signflip-0.3' or
        key == 'cifar-0.4--krum--signflip-0.5' or
        key == 'cifar-0.4--median--signflip-0.1'):
        temp[key.split('--')[-1]] = cifar_map[key]
cifar_map = temp

temp ={}
for key in cifar_comb_map:
    temp[key.split('--')[-1]] = cifar_comb_map[key]
cifar_comb_map = temp




In [125]:
print('cifar')
for key in cifar_comb_map:
    single = cifar_map[key]
    comb = cifar_comb_map[key]
    single = list(map(lambda x: x[0], single))
    comb = list(map(lambda x: x[0], comb))
    print('')
    print(key)
    stat, p = mannwhitneyu(single, comb)
    print('Statistics=%.3f, p=%.3f' % (stat, p))
    # interpret
    alpha = 0.05
    if p > alpha:
    	print('Same distribution (fail to reject H0)')
    else:
    	print('Different distribution (reject H0)')

print('\n\n')
print('adni')
for key in adni_comb_map:
    single = adni_map[key]
    comb = adni_comb_map[key]
    single = list(map(lambda x: x[0], single))
    comb = list(map(lambda x: x[0], comb))
    print('')
    print(key)
    stat, p = mannwhitneyu(single, comb)
    print('Statistics=%.3f, p=%.3f' % (stat, p))
    # interpret
    alpha = 0.05
    if p > alpha:
    	print('Same distribution (fail to reject H0)')
    else:
    	print('Different distribution (reject H0)')

cifar

labelflip-0.1
Statistics=92.000, p=0.002
Different distribution (reject H0)

labelflip-0.3
Statistics=79.000, p=0.031
Different distribution (reject H0)

labelflip-0.5
Statistics=0.000, p=0.000
Different distribution (reject H0)

signflip-0.1
Statistics=12.000, p=0.005
Different distribution (reject H0)

signflip-0.3
Statistics=65.000, p=0.271
Same distribution (fail to reject H0)

signflip-0.5
Statistics=80.000, p=0.006
Different distribution (reject H0)



adni

labelflip-0.1
Statistics=54.500, p=0.762
Same distribution (fail to reject H0)

labelflip-0.3
Statistics=31.000, p=0.162
Same distribution (fail to reject H0)

labelflip-0.5
Statistics=4.000, p=0.001
Different distribution (reject H0)

signflip-0.1
Statistics=67.000, p=0.211
Same distribution (fail to reject H0)

signflip-0.3
Statistics=57.000, p=0.623
Same distribution (fail to reject H0)

signflip-0.5
Statistics=79.000, p=0.030
Different distribution (reject H0)
