In [4]:
import numpy as np
import pandas as pd

import algo
import federated
import scripts

import os
import json
import random

for rand_seed in [42]:

    np.random.seed(rand_seed)
    random.seed(rand_seed)

    x_target_train = np.load('data/rs'+str(rand_seed)+'_x_target_train.npy')
    y_target_train = np.load('data/rs'+str(rand_seed)+'_y_target_train.npy')
    x_target_test = np.load('data/rs'+str(rand_seed)+'_x_target_test.npy')
    y_target_test = np.load('data/rs'+str(rand_seed)+'_y_target_test.npy')
    n_classes = len(np.unique(y_target_train))

    #for epsilon in [0.1, 0.5, 1, 10, 100, 1000, 10000]:
    for L in [1]:

        number_of_clients = 2
        fl_iterations = 2
        data_per_client = int(x_target_train.shape[0]/number_of_clients)

        #create clients with set training parameters and datasets
        clients = {}
        for i in range(number_of_clients):
            clients[i] = algo.LogisticRegression_DPSGD()

            clients[i].n_classes      = n_classes
            clients[i].alpha          = 0.01
            clients[i].max_iter       = 1
            clients[i].lambda_        = 0.0001
            clients[i].tolerance      = 1e-5
            clients[i].sgdDP          = True
            clients[i].L              = 1 #should be 1 if DP == False
            clients[i].epsilon        = 1000
            clients[i].C              = 1
            clients[i].outDP_local          = False
            clients[i].outDP_local_epsilon  = 1
#             clients[i].outDP_global         = False #not supported yet
#             clients[i].outDP_global_epsilon = 1 #not supported yet

            params = dict(clients[0].__dict__)

            clients[i].x = x_target_train[i*data_per_client:(i+1)*data_per_client]
            clients[i].y = y_target_train[i*data_per_client:(i+1)*data_per_client]

        fl_path = f'fl/rs{rand_seed}_ncl{number_of_clients}_fiter{fl_iterations}_lr{clients[0].alpha}_iter{clients[0].max_iter}_reg{clients[0].lambda_}'
        if clients[0].sgdDP:
            fl_path += f'_sgdDP{clients[0].sgdDP}_eps{clients[0].epsilon}_L{clients[0].L}_C{clients[0].C}'
        if clients[i].outDP_local:
            fl_path += f'_outDPlocal{clients[0].outDP_local}_eps{clients[0].outDP_local_epsilon}'
#         if clients[i].outDP_global:
#             fl_path += f'_outDPglobal{clients[0].outDP_global}_eps{clients[0].outDP_global_epsilon}'

        params.pop('x')
        params.pop('y')
        print(params)
        if os.path.exists(fl_path): 
            print('Experiment already exists:\n', fl_path)
        else:
            print('Creating new folder:\n', fl_path)
            os.mkdir(fl_path)
            with open(fl_path+'/params.json', 'w') as file:
                json.dump(params, file)
            results = {}
            for iteration in range(fl_iterations):

                print(iteration, ' FL iteration')
                for i in clients:
                    print("Start training client: ", i)
                    federated.train_client(iteration, clients[i], x_target_test, y_target_test)
                    if clients[i].outDP_local:
                        print('Adding local output DP')
                        federated.output_DP_federated(clients[i],  clients[i].x.shape[0], clients[i].outDP_local_epsilon)
                        clients[i].train_acc_outDP_local = clients[i].evaluate(clients[i].x, clients[i].y, acc=True)
                        clients[i].test_acc_outDP_local = clients[i].evaluate(x_target_test, y_target_test, acc=True)
                        np.save(fl_path + f'/i{iteration}_c{i}', clients[i].theta_before_noise)
                        np.save(fl_path + f'/i{iteration}_c{i}_outDP', clients[i].theta)
                        results[f'i{iteration}_c{i}'] = (clients[i].train_acc,  clients[i].test_acc, clients[i].train_acc_outDP_local, clients[i].test_acc_outDP_local)
                    else:
                        np.save(fl_path + f'/i{iteration}_c{i}', clients[i].theta)
                        results[f'i{iteration}_c{i}'] = (clients[i].train_acc,  clients[i].test_acc)
                        
                global_model = federated.aggregate(clients)
    #             if clients[0].outDP_global:
    #                 clients[0].theta = global_model  
    #                 clients[0] = output_DP_federated(clients[0],  clients[0].x.shape[0], clients[i].outDP_global_epsilon)
                np.save(fl_path + f'/i{iteration}_g', global_model)
                federated.update_clients(clients, global_model)
                #global model evaluation
                print('Global model evaluataion:')
                gtrain_acc = clients[0].evaluate(x_target_train, y_target_train, acc=True)
                gtest_acc = clients[0].evaluate(x_target_test, y_target_test, acc=True)
                results[f'i{iteration}_g'] = (gtrain_acc,  gtest_acc)
                if clients[0].evaluate(x_target_test, y_target_test)>=0.56:
                    break
            
            if clients[i].outDP_local:
                res = pd.DataFrame.from_dict(results, orient='index', columns=['train_acc', 'test_acc', 'train_acc_outDP', 'test_acc_out_DP'])
            else:    
                res = pd.DataFrame.from_dict(results, orient='index', columns=['train_acc', 'test_acc'])                
            res.to_csv(fl_path + f'/results.csv')


1674644457.107546
{'n_classes': 100, 'alpha': 0.01, 'max_iter': 1, 'lambda_': 0.0001, 'tolerance': 1e-05, 'sgdDP': True, 'L': 1, 'C': 1, 'epsilon': 1000, 'delta': 1e-05, 'sigma': 0, 'outDP_local': False, 'outDP_local_epsilon': 1}
Creating new folder:
 fl/rs42_ncl2_fiter2_lr0.01_iter1_reg0.0001_sgdDPTrue_eps1000_L1_C1
0  FL iteration
Start training client:  0
DP-SGD with sampling rate = 0.02% and noise_multiplier = 0.14305325786478998 iterated over 5000 steps satisfies differential privacy with eps = 1e+03 and delta = 1e-05.
The accuracy of the model : 4.3 %
The accuracy of the model : 3.6999999999999997 %
Start training client:  1
DP-SGD with sampling rate = 0.02% and noise_multiplier = 0.14305325786478998 iterated over 5000 steps satisfies differential privacy with eps = 1e+03 and delta = 1e-05.
The accuracy of the model : 3.3000000000000003 %
The accuracy of the model : 2.6 %
Global model evaluataion:
The accuracy of the model : 4.3999999999999995 %
The accuracy of the model : 3.5999

In [8]:
1674644507.172802 - 1674644457.107546

50.065255880355835

In [10]:
import numpy as np
import pandas as pd

import algo
import federated
import scripts

import time
import os
import json
import random

print(time.time())
for rand_seed in [42]:

    np.random.seed(rand_seed)
    random.seed(rand_seed)

    x_target_train = np.load('data/rs'+str(rand_seed)+'_x_target_train.npy')
    y_target_train = np.load('data/rs'+str(rand_seed)+'_y_target_train.npy')
    x_target_test = np.load('data/rs'+str(rand_seed)+'_x_target_test.npy')
    y_target_test = np.load('data/rs'+str(rand_seed)+'_y_target_test.npy')
    n_classes = len(np.unique(y_target_train))

    #for epsilon in [0.1, 0.5, 1, 10, 100, 1000, 10000]:
    for L in [1]:

        number_of_clients = 2
        fl_iterations = 2
        data_per_client = int(x_target_train.shape[0]/number_of_clients)

        #create clients with set training parameters and datasets
        clients = {}
        for i in range(number_of_clients):
            clients[i] = algo.LogisticRegression_DPSGD()

            clients[i].n_classes      = n_classes
            clients[i].alpha          = 0.01
            clients[i].max_iter       = 1
            clients[i].lambda_        = 0.0001
            clients[i].tolerance      = 1e-5
            clients[i].sgdDP          = True
            clients[i].L              = 1 #should be 1 if DP == False
            clients[i].epsilon        = 1000
            clients[i].C              = 1
            clients[i].outDP_local          = False
            clients[i].outDP_local_epsilon  = 1
#             clients[i].outDP_global         = False #not supported yet
#             clients[i].outDP_global_epsilon = 1 #not supported yet

            params = dict(clients[0].__dict__)

            clients[i].x = x_target_train[i*data_per_client:(i+1)*data_per_client]
            clients[i].y = y_target_train[i*data_per_client:(i+1)*data_per_client]

        fl_path = f'fl/rs{rand_seed}_ncl{number_of_clients}_fiter{fl_iterations}_lr{clients[0].alpha}_iter{clients[0].max_iter}_reg{clients[0].lambda_}'
        if clients[0].sgdDP:
            fl_path += f'_sgdDP{clients[0].sgdDP}_eps{clients[0].epsilon}_L{clients[0].L}_C{clients[0].C}'
        if clients[i].outDP_local:
            fl_path += f'_outDPlocal{clients[0].outDP_local}_eps{clients[0].outDP_local_epsilon}'
#         if clients[i].outDP_global:
#             fl_path += f'_outDPglobal{clients[0].outDP_global}_eps{clients[0].outDP_global_epsilon}'

        params.pop('x')
        params.pop('y')
#         print(params)
        if False and os.path.exists(fl_path): 
            print('Experiment already exists:\n', fl_path)
        else:
#             print('Creating new folder:\n', fl_path)
#             os.mkdir(fl_path)
            with open(fl_path+'/params.json', 'w') as file:
                json.dump(params, file)
            results = {}
            for iteration in range(fl_iterations):

#                 print(iteration, ' FL iteration')
                for i in clients:
#                     print("Start training client: ", i)
                    federated.train_client(iteration, clients[i], x_target_test, y_target_test)
                    if clients[i].outDP_local:
#                         print('Adding local output DP')
                        federated.output_DP_federated(clients[i],  clients[i].x.shape[0], clients[i].outDP_local_epsilon)
                        clients[i].train_acc_outDP_local = clients[i].evaluate(clients[i].x, clients[i].y)
                        clients[i].test_acc_outDP_local = clients[i].evaluate(x_target_test, y_target_test)
                        np.save(fl_path + f'/i{iteration}_c{i}', clients[i].theta_before_noise)
                        np.save(fl_path + f'/i{iteration}_c{i}_outDP', clients[i].theta)
                        results[f'i{iteration}_c{i}'] = (clients[i].train_acc,  clients[i].test_acc, clients[i].train_acc_outDP_local, clients[i].test_acc_outDP_local)
                    else:
                        np.save(fl_path + f'/i{iteration}_c{i}', clients[i].theta)
                        results[f'i{iteration}_c{i}'] = (clients[i].train_acc,  clients[i].test_acc)
                        
                global_model = federated.aggregate(clients)
    #             if clients[0].outDP_global:
    #                 clients[0].theta = global_model  
    #                 clients[0] = output_DP_federated(clients[0],  clients[0].x.shape[0], clients[i].outDP_global_epsilon)
                np.save(fl_path + f'/i{iteration}_g', global_model)
                federated.update_clients(clients, global_model)
                #global model evaluation
#                 print('Global model evaluataion:')
                gtrain_acc = clients[0].evaluate(x_target_train, y_target_train)
                gtest_acc = clients[0].evaluate(x_target_test, y_target_test)
                results[f'i{iteration}_g'] = (gtrain_acc,  gtest_acc)
                if clients[0].evaluate(x_target_test, y_target_test)>=0.56:
                    break
            
            if clients[i].outDP_local:
                res = pd.DataFrame.from_dict(results, orient='index', columns=['train_acc', 'test_acc', 'train_acc_outDP', 'test_acc_out_DP'])
            else:    
                res = pd.DataFrame.from_dict(results, orient='index', columns=['train_acc', 'test_acc'])                
            res.to_csv(fl_path + f'/results.csv')
print(time.time())

1674644676.7500732
DP-SGD with sampling rate = 0.02% and noise_multiplier = 0.14305325786478998 iterated over 5000 steps satisfies differential privacy with eps = 1e+03 and delta = 1e-05.
The accuracy of the model : 4.3 %
The accuracy of the model : 3.6999999999999997 %
DP-SGD with sampling rate = 0.02% and noise_multiplier = 0.14305325786478998 iterated over 5000 steps satisfies differential privacy with eps = 1e+03 and delta = 1e-05.
The accuracy of the model : 3.3000000000000003 %
The accuracy of the model : 2.6 %
The accuracy of the model : 5.7 %
The accuracy of the model : 4.2 %
The accuracy of the model : 5.7 %
The accuracy of the model : 3.8 %
1674644723.874408


In [9]:
1674644608.564843 - 1674644562.5075898

46.05725312232971

In [11]:
1674644723.874408 - 1674644676.7500732

47.12433481216431

In [None]:
def output_DP_federated(model, X_train_size, epsilon_outDP, delta_outDP=1e-5):
    #gaussian mechanism 
    model.delta_outDP = delta_outDP
    sensitivity = 2/(X_train_size*model.lambda_) 
    sigma = np.sqrt(2 * np.log(1.25 / model.delta_outDP)) * (sensitivity / epsilon_outDP)
    model.theta_before_noise = model.theta
    model.theta = model.theta + np.random.normal(loc=0.0, scale=sigma, size=model.theta.shape)


In [None]:
path = 'fl'
params = {}
results = {}
models = {}
for r,d,f in os.walk(path):
    for file in f:
        if "params.json" in file:
            with open(r+'/'+file) as json_file:
                params[r] = json.load(json_file)
        elif "results.csv" in file:
            results[r] = pd.read_csv(r+'/'+file)
            it = []
            client = []
            for k in results[r]['Unnamed: 0']:
                it.append(k[k.find('i')+1:k.find('_')])
                client.append(k[k.find('_')+1:])
            results[r]['it'] = it
            results[r]['client'] = client
        elif '.npy' in file:
            if r not in models:
                models[r] = {}
            models[r][file] = np.load(r+'/'+file)



In [None]:
x_target_train = np.load('data/rs'+str(rand_seed)+'_x_target_train.npy')
y_target_train = np.load('data/rs'+str(rand_seed)+'_y_target_train.npy')
x_target_test = np.load('data/rs'+str(rand_seed)+'_x_target_test.npy')
y_target_test = np.load('data/rs'+str(rand_seed)+'_y_target_test.npy')
n_classes = len(np.unique(y_target_train))


In [None]:
def aggregate(clients):
    
    global_model = clients[0].theta 
    for i in range(1, len(clients.keys())):
        global_model += clients[i].theta 

    return global_model/3

In [None]:
def update_clients(clients, global_model):
    
    for i in clients:
        clients[i].theta = global_model

In [None]:
def train_client(client):
    
    X,y = client.init_theta(client.x, client.y)
    client.train(X,y)
    client.train_acc = client.evaluate(client.x, client.y, acc=True)
    client.test_acc = client.evaluate(x_target_test, y_target_test, acc=True)


In [1]:
from time import gmtime, strftime
strftime("%Y-%m-%d %H:%M:%S", gmtime())


'2023-01-25 11:00:24'