In [1]:
import numpy as np
import pandas as pd
import random

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

from torch import nn,optim
import torch

import algo
import attack
import scripts

import os
import matplotlib.pyplot as plt
import json

rand_seed=24
np.random.seed(rand_seed)
torch.manual_seed(rand_seed)

2023-03-06 20:51:56.595400: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


<torch._C.Generator at 0x14db67f10>

In [19]:
# purchase
# load data with different seeds
data_seed = {}
for rand_seed in [1,3,13,24,42]:
    data_seed[rand_seed] =  {}
    
    data_seed[rand_seed]['x_target_train'] = np.load('data/rs'+str(rand_seed)+'_x_target_train.npy')
    data_seed[rand_seed]['y_target_train'] = np.load('data/rs'+str(rand_seed)+'_y_target_train.npy')
    data_seed[rand_seed]['x_target_test'] = np.load('data/rs'+str(rand_seed)+'_x_target_test.npy')
    data_seed[rand_seed]['y_target_test'] = np.load('data/rs'+str(rand_seed)+'_y_target_test.npy')
    data_seed[rand_seed]['n_classes'] = len(np.unique(data_seed[rand_seed]['y_target_train']))
    data_seed[rand_seed]['X_train_size'] = data_seed[rand_seed]['x_target_train'].shape[0]
    data_seed[rand_seed]['X_test_size ']= data_seed[rand_seed]['x_target_test'].shape[0]

def set_the_seed_and_data(seed):
    np.random.seed(rand_seed)
    torch.manual_seed(rand_seed)
    random.seed(rand_seed)
    
    return data_seed[seed]['x_target_train'], data_seed[rand_seed]['y_target_train'], data_seed[rand_seed]['x_target_test'], data_seed[rand_seed]['y_target_test']

In [20]:
# attack models
from torch import nn

class Net_attack(nn.Module):

    def __init__(self, h_neurons, do, input_size):
        super(Net_attack, self).__init__()
        self.input_size = input_size
        self.h_neurons = h_neurons
        self.do = do
        self.fc1 = nn.Linear(input_size, h_neurons)
        self.fc2 = nn.Linear(h_neurons, 2)
        self.relu = nn.ReLU()
        self.drop = nn.Dropout(do)
        self.softmax = nn.Softmax(dim=1)   

    def forward(self, x):
        x = x.view(-1, self.input_size)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.drop(x)
        x = self.softmax(x)
        return x

path = 'mia'
ams = {}
for r,d,f in os.walk(path):
    for file in f:
        if "best_mi_model" in file:
            ams[file] = Net_attack(h_neurons=64, do=0, input_size=200)
            ams[file] = torch.load(r+'/'+file)      

# MI on centralized 

In [21]:
path = 'tm'
tms_params = {}
tms = {}
for r,d,f in os.walk(path):
    for file in f:
        if "target_model_params.json" in file:
            with open(path+'/'+file) as json_file:
                tms_params[file.replace('_params.json', '')] = json.load(json_file)
        if "target_model.npy" in file:
            tms[file.replace('.npy', '')] = np.load(path+'/'+file)
            
df = pd.DataFrame.from_dict(tms_params, orient='index')
df.shape            

(683, 18)

In [None]:
# attack every centralized target model
for file in tms_params:
    
    if 'attack_acc_mean' in tms_params[file]:
        continue
    # set random seed and load the data    
    rand_seed=int(file[file.find('rs')+2:file.find('_lr')])
    x_target_train, y_target_train, x_target_test, y_target_test = set_the_seed_and_data(rand_seed)
    
    #attack
    target_model = algo.LogisticRegression_DPSGD()
    target_model.theta = tms[file]
    params = tms_params[file]
    scripts.set_model_params(target_model, params)
    attack_dict = attack.test_mi_attack(ams, target_model, x_target_train, y_target_train, x_target_test, y_target_test)
    
    params.update(attack_dict)
    if 'attack_acc' in params:
        params.pop('attack_acc')
        params.pop('attack_pre')
        params.pop('attack_rec')
    #write a new parameters file with attack results
    with open('tm/'+file+'_params.json', 'w') as file:
        json.dump(params, file)
        

# Membership inference Federated Learning


In [22]:
path = 'fl/'
params = {}
results = {}
models = {}
for r,d,f in os.walk(path):
    for file in f:
        if "params.json" in file:
            with open(r+'/'+file) as json_file:
                params[r] = json.load(json_file)
        elif "results.csv" in file:

#             with open(r+'/'+file, 'r') as f_open:
#                 lines = f_open.readlines()
#                 if 'HEAD' in lines[0]:   
#                     for i,l in enumerate(lines):
#                         if 'attack_acc_mean' in l:
#                             index = i
#                             break
#                     with open(r+'/'+file, 'w') as f_write:
#                         f_write.writelines(lines[i:-1])

            results[r] = pd.read_csv(r+'/'+file)
            it = []
            client = []
            model_filenames = []
            if 'Unnamed: 0' in results[r].keys():
                for k in results[r]['Unnamed: 0']:
                    it.append(k[k.find('i')+1:k.find('_')])
                    client.append(k[k.find('_')+1:])
                    model_filenames.append(k+'.npy')
                results[r]['it'] = it
                results[r]['client'] = client
                results[r]['file_name'] = model_filenames
                results[r].pop('Unnamed: 0')
        elif '.npy' in file:
            if r not in models:
                models[r] = {}
#             print(r+'/'+file)
            models[r][file] = np.load(r+'/'+file)



In [23]:
selected_files = results.keys()
len(selected_files)

279

In [24]:
# attack every federated local/global model
for file in selected_files:
    
    if 'attack_acc_mean' in results[file].keys():
        continue
        
    # set random seed and load the data    
    rand_seed=int(file[file.find('rs')+2:file.find('_ncl')])
    x_target_train, y_target_train, x_target_test, y_target_test = set_the_seed_and_data(rand_seed)
    #set number of client for following split of the training data between clients
    number_of_clients = len(results[file]['client'].unique())-1
    data_per_client = int(x_target_train.shape[0]/number_of_clients)

    #attack
    attack_results = {}
    print(file)
    for tm in results[file]['file_name']:
        target_model = algo.LogisticRegression_DPSGD()
        target_model.theta = models[file][tm]
        tm_params = params[file]
        scripts.set_model_params(target_model, tm_params)
        if 'g' in tm:
            target_model.x = x_target_train
            target_model.y = y_target_train
        else:
            i = int(tm[tm.find('_c')+2:tm.find('.npy')])
            target_model.x = x_target_train[i*data_per_client:(i+1)*data_per_client]
            target_model.y = y_target_train[i*data_per_client:(i+1)*data_per_client]
        attack_dict = attack.test_mi_attack(ams, target_model, target_model.x, target_model.y, x_target_test, y_target_test)
        attack_results[tm] = attack_dict
    attack_df = pd.DataFrame.from_dict(attack_results, orient='index')
    result_df = results[file].set_index('file_name')
    new_df = pd.merge(result_df, attack_df, left_index=True, right_index=True)
    #save attack results in old results file
    new_df.to_csv(file+'/results.csv')
    results[file] = new_df

fl/rs42_ncl32_fiter10_lr0.01_iter100_reg0.0001_outDPlocalTrue_eps0.5
fl/rs42_ncl32_fiter10_lr0.01_iter100_reg0.0001_outDPlocalTrue_eps1
fl/rs42_ncl4_fiter10_lr0.01_iter200_reg0.0001_sgdDPTrue_eps500_L20_C2
fl/rs42_ncl32_fiter10_lr0.01_iter100_reg0.0001_sgdDPTrue_eps0.5_L20_C2


In [25]:
new_df

Unnamed: 0,train_acc,test_acc,it,client,attack_acc_mean,attack_acc_std,attack_pre_mean,attack_pre_std,attack_rec_mean,attack_rec_std
i0_c0.npy,0.019231,0.0127,0,c0,0.501335,0.005954,0.504275,0.014829,0.263355,0.130133
i0_c1.npy,0.012821,0.0138,0,c1,0.510417,0.012542,0.525921,0.032549,0.293269,0.084491
i0_c2.npy,0.032051,0.0129,0,c2,0.500801,0.011840,0.501221,0.028359,0.256410,0.119223
i0_c3.npy,0.016026,0.0151,0,c3,0.514957,0.009191,0.530723,0.023144,0.309295,0.106892
i0_c4.npy,0.019231,0.0118,0,c4,0.512553,0.012901,0.532344,0.029894,0.238782,0.053496
...,...,...,...,...,...,...,...,...,...,...
i9_c28.npy,0.012821,0.0190,9,c28,0.509615,0.011921,0.514778,0.019638,0.365385,0.108754
i9_c29.npy,0.019231,0.0170,9,c29,0.505609,0.019091,0.510315,0.027369,0.344551,0.091159
i9_c30.npy,0.012821,0.0128,9,c30,0.506143,0.009225,0.532261,0.043521,0.221154,0.144776
i9_c31.npy,0.022436,0.0188,9,c31,0.521368,0.010121,0.539622,0.017579,0.292201,0.091359


# MI LOAN

In [26]:
path = 'mia/loan/best_ams/'
aparams = {}
ams = {}
for r,d,f in os.walk(path):
    for file in f:
        if ".json" in file:
            with open(r+'/'+file) as json_file:
                aparams[r+file.replace('_params.json', '')] = json.load(json_file)
            
for r,d,f in os.walk(path):
    for file in f:
        if ".json" not in file and '.DS_Store' not in file:
            ams[r+file] = Net_attack(h_neurons=aparams[r+file]['h_neurons'], do=aparams[r+file]['do'], input_size=14)
            ams[r+file] = torch.load(r+'/'+file)      
        

In [27]:
path = 'loan/centr/'
tms_params = {}
tms = {}
for r,d,f in os.walk(path):
    for file in f:
        if ".json" in file:
            with open(r+'/'+file) as json_file:
                tms_params[file.replace('_params.json', '')] = json.load(json_file)
        if "target_model.npy" in file:
            tms[file.replace('.npy', '')] = np.load(path+'/'+file)
                    

In [28]:
# attack every centralized target model
for file in tms_params:
    
    if 'attack_acc_mean' in tms_params[file]:
        continue
    # set random seed and load the data    
    rand_seed=int(file[file.find('rs')+2:file.find('_lr')])
    x_target_train, y_target_train, x_target_test, y_target_test = scripts.load_loan(rand_seed, tr_size=10000)
    
    #attack
    target_model = algo.LogisticRegression_DPSGD()
    target_model.theta = tms[file]
    params = tms_params[file]
    scripts.set_model_params(target_model, params)
    attack_dict = attack.test_mi_attack(ams, target_model, x_target_train, y_target_train, x_target_test, y_target_test)
    
    params.update(attack_dict)
    if 'attack_acc' in params:
        params.pop('attack_acc')
        params.pop('attack_pre')
        params.pop('attack_rec')
    #write a new parameters file with attack results
    with open('loan/centr/'+file+'_params.json', 'w') as file:
        json.dump(params, file)
        

# Loan FL

In [29]:
path = 'loan/fl'
params = {}
results = {}
models = {}
for r,d,f in os.walk(path):
    for file in f:
        if "params.json" in file:
            with open(r+'/'+file) as json_file:
                params[r] = json.load(json_file)
        elif "results.csv" in file:
            results[r] = pd.read_csv(r+'/'+file)
            it = []
            client = []
            model_filenames = []
            if 'Unnamed: 0' in results[r].keys():
                for k in results[r]['Unnamed: 0']:
#                     print(k)
#                     break
                    it.append(k[k.find('i')+1:k.find('_')])
                    client.append(k[k.find('_')+1:])
                    model_filenames.append(k+'.npy')
                results[r]['it'] = it
                results[r]['client'] = client
                results[r]['file_name'] = model_filenames
                results[r].pop('Unnamed: 0')
        elif '.npy' in file:
            if r not in models:
                models[r] = {}
            models[r][file] = np.load(r+'/'+file)



In [30]:
path = 'mia/loan/best_ams/'
aparams = {}
ams = {}
for r,d,f in os.walk(path):
    for file in f:
        if ".json" in file:
            with open(r+'/'+file) as json_file:
                aparams[r+file.replace('_params.json', '')] = json.load(json_file)
            
for r,d,f in os.walk(path):
    for file in f:
        if ".json" not in file and '.DS_Store' not in file:
            ams[r+file] = Net_attack(h_neurons=aparams[r+file]['h_neurons'], do=aparams[r+file]['do'], input_size=14)
            ams[r+file] = torch.load(r+'/'+file)      
        

In [31]:
selected_files = results.keys()
len(selected_files)

587

In [32]:
# attack every federated local/global model
for file in selected_files:
    
    if 'attack_acc_mean' in results[file].keys():
        continue
        
    # set random seed and load the data    
    rand_seed=int(file[file.find('rs')+2:file.find('_ncl')])
    x_target_train, y_target_train, x_target_test, y_target_test = scripts.load_loan(rand_seed, tr_size=10000)
    #set number of client for following split of the training data between clients
    number_of_clients = len(results[file]['client'].unique())-1
    data_per_client = int(x_target_train.shape[0]/number_of_clients)

    #attack
    attack_results = {}
    print(file)
    for tm in results[file]['file_name']:
        target_model = algo.LogisticRegression_DPSGD()
        target_model.theta = models[file][tm]
        tm_params = params[file]
        scripts.set_model_params(target_model, tm_params)
        if 'g' in tm:
            target_model.x = x_target_train
            target_model.y = y_target_train
        else:
            i = int(tm[tm.find('_c')+2:tm.find('.npy')])
            target_model.x = x_target_train[i*data_per_client:(i+1)*data_per_client]
            target_model.y = y_target_train[i*data_per_client:(i+1)*data_per_client]
        attack_dict = attack.test_mi_attack(ams, target_model, target_model.x, target_model.y, x_target_test, y_target_test)
        attack_results[tm] = attack_dict
    attack_df = pd.DataFrame.from_dict(attack_results, orient='index')
    result_df = results[file].set_index('file_name')
    new_df = pd.merge(result_df, attack_df, left_index=True, right_index=True)
    #save attack results in old results file
    new_df.to_csv(file+'/results.csv')
    results[file] = new_df

loan/fl/rs42_ncl128_fiter10_lr0.01_iter200_reg1e-06_outDPlocalTrue_eps100
loan/fl/rs42_ncl128_fiter10_lr0.01_iter200_reg1e-06_outDPlocalTrue_eps5
loan/fl/rs42_ncl128_fiter10_lr0.01_iter200_reg1e-06_outDPlocalTrue_eps0.1
loan/fl/rs42_ncl128_fiter10_lr0.01_iter200_reg1e-06_outDPlocalTrue_eps10
loan/fl/rs42_ncl64_fiter10_lr0.01_iter200_reg1e-06_outDPlocalTrue_eps1000
loan/fl/rs42_ncl64_fiter10_lr0.01_iter200_reg1e-06_outDPlocalTrue_eps10000
loan/fl/rs42_ncl64_fiter10_lr0.01_iter200_reg1e-06_outDPlocalTrue_eps5000
loan/fl/rs42_ncl128_fiter10_lr0.01_iter200_reg1e-06_outDPlocalTrue_eps1
loan/fl/rs42_ncl128_fiter10_lr0.01_iter200_reg1e-06_outDPlocalTrue_eps0.5
loan/fl/rs42_ncl128_fiter10_lr0.01_iter200_reg1e-06_outDPlocalTrue_eps50


# Texas

In [33]:
import os
path = 'mia/texas/best_mia/'
aparams = {}
ams = {}
for r,d,f in os.walk(path):
    for file in f:
        if ".json" in file:
            with open(r+'/'+file) as json_file:
                aparams[r+file.replace('.json', '')] = json.load(json_file)
            
for r,d,f in os.walk(path):
    for file in f:
        if ".json" not in file and '.DS_Store' not in file:
            ams[r+file] = Net_attack(h_neurons=aparams[r+file]['h_neurons'], do=aparams[r+file]['do'], input_size=100)
            ams[r+file] = torch.load(r+'/'+file)      
        

In [34]:
path = 'texas/centr/'
tms_params = {}
tms = {}
for r,d,f in os.walk(path):
    for file in f:
        if ".json" in file:
            with open(r+'/'+file) as json_file:
                tms_params[file.replace('_params.json', '')] = json.load(json_file)
        if "target_model.npy" in file:
            tms[file.replace('.npy', '')] = np.load(path+'/'+file)
                    

In [35]:
# attack every centralized target model
for file in tms_params:
    
    if 'attack_acc_mean' in tms_params[file]:
        continue
    # set random seed and load the data
    print(file)
    rand_seed=int(file[file.find('rs')+2:file.find('_lr')])
    x_target_train, y_target_train, x_target_test, y_target_test = scripts.load_texas()
    
    #attack
    target_model = algo.LogisticRegression_DPSGD()
    target_model.theta = tms[file]
    params = tms_params[file]
    scripts.set_model_params(target_model, params)
    attack_dict = attack.test_mi_attack(ams, target_model, x_target_train, y_target_train, x_target_test, y_target_test)
    
    params.update(attack_dict)
    if 'attack_acc' in params:
        params.pop('attack_acc')
        params.pop('attack_pre')
        params.pop('attack_rec')
    #write a new parameters file with attack results
    with open('texas/centr/'+file+'_params.json', 'w') as file:
        json.dump(params, file)
        

rs42_lr0.01_iter50_reg0.0001_sgdDPTrue_eps10000_L5_C5_target_model
rs42_lr0.01_iter50_reg0.0001_sgdDPTrue_eps10000_L5_C4_target_model
rs42_lr0.01_iter100_reg0.0001_sgdDPTrue_eps10000_L20_C5_target_model
rs42_lr0.01_iter50_reg0.0001_sgdDPTrue_eps10000_L5_C3_target_model
