In [None]:
%matplotlib inline

import torch
import torchvision
from torch.utils.data.dataset import Dataset
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

import numpy as np
import matplotlib.pyplot as plt
import random
from collections import defaultdict

from data_utils import CustomImageDataset, split_image_data
from data_utils import get_default_data_transforms
from models import ConvNet
from fl_devices import Server, Client
from helper import ExperimentLogger, display_train_stats

from sklearn.cluster import AgglomerativeClustering, DBSCAN
from sklearn.metrics import pairwise_distances
from sklearn.metrics import f1_score
from sklearn.decomposition import PCA


torch.manual_seed(0)
random.seed(0)
np.random.seed(0)

device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
# helper functions

# feature_matrix:
# each row is flatten dWs from a client
# helper functions

# detect_adv_idx: adverary indices detected by server
# gt_adv_idx: ground-truth indices
def check_detect(detect_adv_idx, gt_adv_idx):
    intersection = [idx for idx in gt_adv_idx if idx in detect_adv_idx]
    if len(intersection) > 0:
        return True
    else:
        return False
    
# feature_matrix:
# each row is flatten dWs from a client
def generate_feature_matrix(dW_dicts):
    with torch.no_grad():
        rows = []
        
        for dW_dict in dW_dicts:
            row = torch.empty(0).to(device)
            for key, value in dW_dict.items():
                row = torch.cat((row, value.flatten()), 0)
            rows.append(row)
            
        matrix = torch.stack(rows, 0)
        if device is "cpu":
            return matrix.numpy()
        else:
            return matrix.cpu().numpy()
        
def print_labels(labels):
    string = []
    for idx, label in enumerate(labels):
        string.append(str(idx)+': '+str(label))
    print('\t'.join(string))
    
def print_outliers(labels):
    outlier_idx = np.argwhere(labels == -1).flatten()
    print(outlier_idx)
    
def print_distance(feature_matrix, metric):
    distance = pairwise_distances(feature_matrix,metric=metirc)
    return distance 

def handle_adversary(adv_idx, handle, weights, reg_factor):
    if handle == None:
        return weights
    elif handle == 'remove':
        weights[adv_idx] = 0
        return weights 
    elif handle == 'reg':
        weights[adv_idx] = weights[adv_idx] * reg_factor
    return weights

In [None]:
# hyperparameters
N_CLIENT = 25
N_ADV_RANDOM = 3
N_ADV_OPP = 0
N_ADV_SWAP = 0

# hyperparemeters
TOTAL_TRIAL = 5
TOTAL_ROUND = 40
DETECT_ROUND = 10

ADV_HANDLE = [None, 'remove', 'reg']

esp = 0.8
min_samples =2
reg_factor = 0.1
metric = 'cosine'
cfl_stats = ExperimentLogger()
model_performance = defaultdict(lambda: [None] * TOTAL_TRIAL)
for handle in ADV_HANDLE:
  for trial in range(TOTAL_TRIAL):
    data = datasets.MNIST(root='./',download=True)
    train_frac = 0.5
    test_frac = 0.2 
    train_num = int(train_frac * len(data))
    test_num = int(test_frac * len(data))
    idcs = np.random.permutation(len(data))
    train_idcs, test_idcs = idcs[:train_num], idcs[train_num:train_num + test_num]
    train_labels = data.train_labels.numpy()
    clients_split = split_image_data(data.train_data[train_idcs], train_labels[train_idcs], n_clients=N_CLIENT, classes_per_client=5,balancedness=1)
    train_trans, val_trans = get_default_data_transforms("EMNIST")
    client_data = [CustomImageDataset(clients_split[i][0].to(torch.float32), clients_split[i][1],transforms=train_trans ) for i in range(len(clients_split))]

    test_data = data.test_data[train_num:train_num+test_num]
    test_labels = train_labels[train_num:train_num+test_num]
    test_data = CustomImageDataset(test_data.to(torch.float32), test_labels, transforms=val_trans)

    # Assign client modes
    clients = [Client(ConvNet, lambda x : torch.optim.SGD(x, lr=0.1, momentum=0.9), client_data[i], idnum=i) 
              for i, dat in enumerate(client_data)]
    client_indx = np.random.permutation(len(clients))
    offset = 0
    adv_random = client_indx[0:N_ADV_RANDOM]
    offset += N_ADV_RANDOM
    adv_opp = client_indx[offset:offset + N_ADV_OPP]
    offset += N_ADV_OPP
    adv_swap = client_indx[offset:offset+N_ADV_SWAP]
    offset += N_ADV_SWAP
    adv_idx = np.concatenate((adv_random,adv_opp,adv_swap)).tolist()
    for i in adv_random:
      clients[i].client_mode = 'random'

    for i in adv_opp:
      clients[i].client_mode = 'opposite'

    for i in adv_swap:
      clients[i].client_mode = 'swap'

    # print out each client and its mode
    for idx, client in enumerate(clients):
      print('{}: {}'.format(idx, client.client_mode))

    server = Server(ConvNet, test_data)
    weights = np.ones(len(clients))
    for round in range(TOTAL_ROUND):
      if round == 0:
        for client in clients:
          client.synchronize_with_server(server)
      
      
      participating_clients = server.select_clients(clients, frac=1.0)

      for client in participating_clients:
          train_stats = client.compute_weight_update(epochs=1)
          client.reset()
        
      if round + 1 == DETECT_ROUND:
          # generate feature matrix for clustering
          client_dW_dicts = [client.dW for client in clients]
          feature_matrix = generate_feature_matrix(client_dW_dicts)

          # detect adversary using clustering
          clustering_label = server.detect_adversary(feature_matrix, esp, min_samples, metric)
          adv_idx_label = np.argwhere(clustering_label == -1).flatten()

          # update clients by handling adversary detected
          clients = handle_advesary(adv_idx_label, ADV_HANDLE, weights,reg_factor)

      # aggregate weight updates; copy new weights to clients
      server.aggregate_weight_updates_weights(clients, weights)
      server.copy_weights(clients)
                
    # evaluate model performance after all the rounds
    acc_clients = [client.evaluate() for client in clients]

    acc_s = []
    for i, acc in enumerate(acc_clients):
      if i not in adv_idx:
        acc_s.append(acc)
  
    model_performance[handle][trial] = np.mean(np.array(acc_s)) # evaluation result

## Experiment A: Model Performance
Compare model that does handle adversary and model that does NOT handle adversary.

Same TOTAL_ROUND -> Different accuracy

In [None]:
# hyperparemeters
TOTAL_TRIAL = 30
TOTAL_ROUND = 20
DETECT_ROUND = 5

ESP = 0.5
MIN_SAMPLES = 2
METRIC = 'l2'

ADV_HANDLE = [None, 'remove', 'reg']

model_performance = defaultdict(lambda: [None] * TOTAL_TRIAL)

In [None]:
for handle in ADV_HANDLE:
    for trial in range(TOTAL_TRIAL):
        for round in range(TOTAL_ROUND):
            if round == 0:
                for client in clients:
                    client.synchronize_with_server(server)

                participating_clients = server.select_clients(clients, frac=1.0)

                for client in participating_clients:
                    train_stats = client.compute_weight_update(epochs=1)
                    client.reset()

                if round + 1 == DETECT_ROUND:
                    # generate feature matrix for clustering
                    client_dW_dicts = [client.dW for client in clients]
                    feature_matrix = generate_feature_matrix(client_dW_dicts)

                    # detect adversary using clustering
                    detect_adv_idx = server.detect_adversary(feature_matrix, esp, min_samples, metric)

                    # update clients by handling adversary detected
                    clients = handle_advesary(clients, detect_adv_idx, ADV_HANDLE)

                # aggregate weight updates; copy new weights to clients
                server.aggregate_weight_updates(clients)
                server.copy_weights(clients)
                
        # evaluate model performance after all the rounds
        model_performance[handle][trial] = # evaluation result

In [None]:
# plots
# TODO


## Experiment B: Convergence Rate
Compare model that does handle adversary and model that does NOT handle adversary.

Same accuracy -> Different round

In [None]:
# hyperparemeters
TOTAL_TRIAL = 30
MAX_ROUND = 50
DETECT_ROUND = 5

TARGET_ACC = 0.66

ESP = 0.5
MIN_SAMPLES = 2
METRIC = 'l2'

ADV_HANDLE = [None, 'remove', 'reg']

model_round = defaultdict(lambda: [None] * TOTAL_TRIAL)

In [None]:
for handle in ADV_HANDLE:
    for trial in range(TOTAL_TRIAL):
        for round in range(MAX_ROUND):
            if round == 0:
                for client in clients:
                    client.synchronize_with_server(server)

                participating_clients = server.select_clients(clients, frac=1.0)

                for client in participating_clients:
                    train_stats = client.compute_weight_update(epochs=1)
                    client.reset()

                if round + 1 == DETECT_ROUND:
                    # generate feature matrix for clustering
                    client_dW_dicts = [client.dW for client in clients]
                    feature_matrix = generate_feature_matrix(client_dW_dicts)

                    # detect adversary using clustering
                    detect_adv_idx = server.detect_adversary(feature_matrix, esp, min_samples, metric)

                    # update clients by handling adversary detected
                    clients = handle_advesary(clients, detect_adv_idx, ADV_HANDLE)

                # aggregate weight updates; copy new weights to clients
                server.aggregate_weight_updates(clients)
                server.copy_weights(clients)
                
            # evaluate model performance after each round
            model_performance = # evaluation result
            if model_performance >= TARGET_ACC:
                model_round[handle][trial] = round + 1
                break

In [None]:
# plots
# TODO
