In [None]:
import numpy as np
import pandas as pd
import random

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

from torch import nn,optim
import torch

import algo
import attack
import scripts

import os
import matplotlib.pyplot as plt
import json

rand_seed=24
np.random.seed(rand_seed)
torch.manual_seed(rand_seed)

In [8]:
# purchase
# load data with different seeds
data_seed = {}
for rand_seed in [1,3,13,24,42]:
    data_seed[rand_seed] =  {}
    
    data_seed[rand_seed]['x_target_train'] = np.load('data/rs'+str(rand_seed)+'_x_target_train.npy')
    data_seed[rand_seed]['y_target_train'] = np.load('data/rs'+str(rand_seed)+'_y_target_train.npy')
    data_seed[rand_seed]['x_target_test'] = np.load('data/rs'+str(rand_seed)+'_x_target_test.npy')
    data_seed[rand_seed]['y_target_test'] = np.load('data/rs'+str(rand_seed)+'_y_target_test.npy')
    data_seed[rand_seed]['n_classes'] = len(np.unique(data_seed[rand_seed]['y_target_train']))
    data_seed[rand_seed]['X_train_size'] = data_seed[rand_seed]['x_target_train'].shape[0]
    data_seed[rand_seed]['X_test_size ']= data_seed[rand_seed]['x_target_test'].shape[0]

def set_the_seed_and_data(seed):
    np.random.seed(rand_seed)
    torch.manual_seed(rand_seed)
    random.seed(rand_seed)
    
    return data_seed[seed]['x_target_train'], data_seed[rand_seed]['y_target_train'], data_seed[rand_seed]['x_target_test'], data_seed[rand_seed]['y_target_test']

In [9]:
# attack models
from torch import nn

class Net_attack(nn.Module):

    def __init__(self, h_neurons, do, input_size):
        super(Net_attack, self).__init__()
        self.input_size = input_size
        self.h_neurons = h_neurons
        self.do = do
        self.fc1 = nn.Linear(input_size, h_neurons)
        self.fc2 = nn.Linear(h_neurons, 2)
        self.relu = nn.ReLU()
        self.drop = nn.Dropout(do)
        self.softmax = nn.Softmax(dim=1)   

    def forward(self, x):
        x = x.view(-1, self.input_size)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.drop(x)
        x = self.softmax(x)
        return x

path = 'mia'
ams = {}
for r,d,f in os.walk(path):
    for file in f:
        if "best_mi_model" in file:
            ams[file] = Net_attack(h_neurons=64, do=0, input_size=200)
            ams[file] = torch.load(r+'/'+file)      

# MI on centralized 

In [None]:
path = 'tm'
tms_params = {}
tms = {}
for r,d,f in os.walk(path):
    for file in f:
        if "target_model_params.json" in file:
            with open(path+'/'+file) as json_file:
                tms_params[file.replace('_params.json', '')] = json.load(json_file)
        if "target_model.npy" in file:
            tms[file.replace('.npy', '')] = np.load(path+'/'+file)
            
df = pd.DataFrame.from_dict(tms_params, orient='index')
df.shape            

In [None]:
# attack every centralized target model
for file in tms_params:
    
    if 'attack_acc_mean' in tms_params[file]:
        continue
    # set random seed and load the data    
    rand_seed=int(file[file.find('rs')+2:file.find('_lr')])
    x_target_train, y_target_train, x_target_test, y_target_test = set_the_seed_and_data(rand_seed)
    
    #attack
    target_model = algo.LogisticRegression_DPSGD()
    target_model.theta = tms[file]
    params = tms_params[file]
    scripts.set_model_params(target_model, params)
    attack_dict = attack.test_mi_attack(ams, target_model, x_target_train, y_target_train, x_target_test, y_target_test)
    
    params.update(attack_dict)
    if 'attack_acc' in params:
        params.pop('attack_acc')
        params.pop('attack_pre')
        params.pop('attack_rec')
    #write a new parameters file with attack results
    with open('tm/'+file+'_params.json', 'w') as file:
        json.dump(params, file)
        

# Membership inference Federated Learning


In [161]:
path = 'fl/'
params = {}
results = {}
models = {}
for r,d,f in os.walk(path):
    for file in f:
        if "params.json" in file:
            with open(r+'/'+file) as json_file:
                params[r] = json.load(json_file)
        elif "results.csv" in file:
            results[r] = pd.read_csv(r+'/'+file)
            it = []
            client = []
            model_filenames = []
            if 'Unnamed: 0' in results[r].keys():
                for k in results[r]['Unnamed: 0']:
                    it.append(k[k.find('i')+1:k.find('_')])
                    client.append(k[k.find('_')+1:])
                    model_filenames.append(k+'.npy')
                results[r]['it'] = it
                results[r]['client'] = client
                results[r]['file_name'] = model_filenames
                results[r].pop('Unnamed: 0')
        elif '.npy' in file:
            if r not in models:
                models[r] = {}
            models[r][file] = np.load(r+'/'+file)



In [162]:
selected_files = results.keys()
len(selected_files)

101

In [163]:
# attack every federated local/global model
for file in selected_files:
    
#     if 'attack_acc_mean' in results[file].keys():
#         continue
    if 'outDPlocalTrue' not in file:
        continue
        
    # set random seed and load the data    
    rand_seed=int(file[file.find('rs')+2:file.find('_ncl')])
    x_target_train, y_target_train, x_target_test, y_target_test = set_the_seed_and_data(rand_seed)
    #set number of client for following split of the training data between clients
    number_of_clients = len(results[file]['client'].unique())-1
    data_per_client = int(x_target_train.shape[0]/number_of_clients)

    #attack
    attack_results = {}
    for tm in results[file]['file_name']:
        target_model = algo.LogisticRegression_DPSGD()
        target_model.theta = models[file][tm]
        tm_params = params[file]
        scripts.set_model_params(target_model, tm_params)
        if 'g' in tm:
            target_model.x = x_target_train
            target_model.y = y_target_train
        else:
            i = int(tm[tm.find('_c')+2:tm.find('.npy')])
            target_model.x = x_target_train[i*data_per_client:(i+1)*data_per_client]
            target_model.y = y_target_train[i*data_per_client:(i+1)*data_per_client]
        attack_dict = attack.test_mi_attack(ams, target_model, target_model.x, target_model.y, x_target_test, y_target_test)
        attack_results[tm] = attack_dict
    attack_df = pd.DataFrame.from_dict(attack_results, orient='index')
    result_df = results[file].set_index('file_name')
    new_df = pd.merge(result_df, attack_df, left_index=True, right_index=True)
    #save attack results in old results file
    new_df.to_csv(file+'/results.csv')
    results[file] = new_df

In [164]:
results[file]

Unnamed: 0_level_0,train_acc,test_acc,train_acc_outDP,test_acc_out_DP,it,client,attack_acc_mean,attack_acc_std,attack_pre_mean,attack_pre_std,attack_rec_mean,attack_rec_std
file_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
i0_c0.npy,0.797,0.4198,0.0108,0.0131,0,c0,0.4973,0.002858,0.490948,0.011228,0.267833,0.171188
i0_c1.npy,0.8118,0.4442,0.0174,0.0151,0,c1,0.49995,0.003832,0.49995,0.004535,0.4289,0.117826
i0_g.npy,0.012,0.011,,,0,g,0.499858,0.002752,0.500996,0.003839,0.342,0.112854
i1_c0.npy,0.0154,0.0156,0.0194,0.0177,1,c0,0.495833,0.004116,0.493737,0.004515,0.293133,0.085385
i1_c1.npy,0.0164,0.0153,0.0086,0.0095,1,c1,0.504633,0.003502,0.511222,0.009277,0.298833,0.151414
i1_g.npy,0.0147,0.0163,,,1,g,0.49525,0.002002,0.491798,0.003898,0.307183,0.095606
i2_c0.npy,0.0156,0.0139,0.0066,0.0082,2,c0,0.500233,0.001862,0.501276,0.003984,0.273633,0.125626
i2_c1.npy,0.0166,0.0134,0.0128,0.0154,2,c1,0.496217,0.002274,0.491085,0.006266,0.252633,0.103787
i2_g.npy,0.0122,0.0136,,,2,g,0.499108,0.001671,0.497522,0.004291,0.264733,0.119598
i3_c0.npy,0.015,0.0145,0.0098,0.0138,3,c0,0.499167,0.002775,0.498862,0.00519,0.260967,0.084899


In [160]:
path = 'fl/'
params = {}
results = {}
models = {}
for r,d,f in os.walk(path):
    if 'outDPlocalTrue' not in r:
        continue
#     for file in f:
#         if '_outDP.npy' not in file and 'before_DP.npy' not in file and '.npy' in file:
#             os.rename(r+'/'+file, r+'/'+file.replace('.npy','')+'_before_DP.npy')
    for file in f:
        if '2.npy' in file:
            os.rename(r+'/'+file, r+'/'+file.replace('_outDP.npy', '.npy'))
#             if r not in models:
#                 models[r] = {}
#             models[r][file] = np.load(r+'/'+file)

