# Configuration

Import all required modules, hook PySyft, declare functions for simulating FL environment (Star Architecture)

In [8]:
%load_ext autoreload
%load_ext tensorboard
%autoreload 2

####################
# Required Modules #
####################

# Generic
import copy
import math
import os
import random
import sys
import time
from collections import OrderedDict
from pathlib import Path
from collections import defaultdict
import json

# Libs
import sklearn as skl
from sklearn import preprocessing
import sklearn.datasets as skld
from sklearn.metrics import mean_squared_error, accuracy_score, roc_auc_score
from sklearn.model_selection import train_test_split, StratifiedShuffleSplit
import numpy as np
import pandas as pd
import missingno as msno
import matplotlib.pyplot as plt
import seaborn as sbn
import syft as sy
import torch as th
from torch import nn
from torch.utils.data import TensorDataset, DataLoader
from tqdm import tqdm, tnrange, tqdm_notebook
from tqdm.notebook import trange
from IPython.display import display

##################
# Configurations #
##################

# Integrating PyTorch with PySyft
pt_hook = sy.TorchHook(th)

########################
# Simulation Functions #
########################

def connect_to_workers(n_workers):
    """ Simulates the existence of N workers
    
    Args:
        n_workers (int): No. of virtual workers to simulate
    Returns:
        N virtual workers (list(sy.VirtualWorker))
    """
    return [
        sy.VirtualWorker(
            pt_hook, id=f"worker{i+1}"
        ).clear_objects(
        ) for i in range(n_workers)
    ]

def connect_to_crypto_provider():
    """ Simulates the existence of an arbitor to facilitate
        model generation & client-side utilisation
        
    Returns:
        Arbiter (i.e. TTP) (sy.VirtualWorker)
    """
    return sy.VirtualWorker(
        pt_hook, 
        id="crypto_provider"
    ).clear_objects()




The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


# PyTorch Model Class Declaration

Declare the model classes used for simulation, along with requisite arguments. 

In [9]:
class Model(nn.Module):
    """
    The Model class declares a PySyft neural network based on the specifications contained
    inside the structural_definition dictionary. 

    Args:
        structural_definition (Dict): For each layer of the network, specify name, input
                                      and output size, activation function, layer_type. 

    Attributes:
        layers (list): This is a list of tuples, each containing the layer name and 
                       activation function. 
    """
    def __init__(self, structural_definition):
        super(Model, self).__init__()
        
        self.layers = []
        
        for layer in structural_definition:
            layer_params = structural_definition[layer]
            layer_type = self.__parse_layer_type(layer_params['layer_type'])
            layer_size_mapping = layer_params['layer_size_mapping']
            activation = self.__parse_activation_type(layer_params['activation'])
            
            setattr(self, 
                    layer, 
                    layer_type(**layer_size_mapping))
            
            self.layers.append((layer, activation))
            
    ###########
    # Helpers #
    ###########

    @staticmethod
    def __parse_layer_type(layer_type):
        """ Detects layer type of a specified layer from configuration

        Args:
            layer_type (str): Layer type to initialise
        Returns:
            Layer definition (Function)
        """
        if layer_type == "linear":
            return nn.Linear
        elif layer_type == 'conv2d':
            return nn.Conv2d
        else:
            raise ValueError("Specified layer type is currently not supported!")


    @staticmethod
    def __parse_activation_type(activation_type):
        """ Detects activation function specified from configuration

        Args:
            activation_type (str): Activation function to use
        Returns:
            Activation definition (Function)
        """
        if activation_type == "sigmoid":
            return th.sigmoid
        elif activation_type == "relu":
            return th.relu
        elif activation_type == "nil":
            return None
        else:
            raise ValueError("Specified activation is currently not supported!")

    ##################
    # Core Functions #
    ##################

    def forward(self, x):
        for layer_activation_tuple in self.layers:
            current_layer =  getattr(self, layer_activation_tuple[0])
            if layer_activation_tuple[1] is None:
                x = current_layer(x)
            else:
                x = layer_activation_tuple[1](current_layer(x))

        return x


# FL Specific Set up

Set up FL environments, where workers and a ttp are initialized, and datasets are deployed to individual workers. Computation over these datasets should be done using pointers, for truer-to-life FL simulation.

In [10]:
def secret_share(tensor, workers, crypto_provider, precision_fractional):
    """ Transform to fixed precision and secret share a tensor 
    
    Args:
        tensor             (PointerTensor): Pointer to be shared
        workers   (list(sy.VirtualWorker)): Involved workers of the grid
        crypto_provider (sy.VirtualWorker): Arbiter (i.e. TTP) of the grid
        precision_fractional (int): Precision for casting integer ring arithimetic
    """
    return (
        tensor
        .fix_precision(precision_fractional=precision_fractional)
        .share(
            *workers, 
            crypto_provider=crypto_provider, 
            requires_grad=True
        )
    )

def setup_FL_env(training_datasets, validation_datasets, 
                 testing_dataset, is_shared=False):
    """ Sets up a basic federated learning environment using virtual workers,
        with a allocated arbiter (i.e. TTP) to faciliate in model development
        & utilisation, and deploys datasets to their respective workers
        
    Args:

        training_datasets   (dict(tuple(th.Tensor))): Datasets to be used for training
        validation_datasets (dict(tuple(th.Tensor))): Datasets to be used for validation
        testing_dataset           (tuple(th.Tensor)): Datasets to be used for testing
        is_shared (bool): Toggles if SMPC encryption protocols are active
    Returns:
        training_pointers  (dict(sy.BaseDataset))
        validation_pointer (dict(sy.BaseDataset))
        testing_pointer    (sy.BaseDataset)
        workers            (list(sy.VirtualWorker))
        crypto_provider    (sy.VirtualWorker)
    """
    # Simulate FL computation amongst K worker nodes, 
    # where K is the no. of datasets to be federated
    workers = connect_to_workers(n_workers=len(training_datasets))
    
    # Allow for 1 exchanger/Arbiter (i.e. TTP)
    crypto_provider = connect_to_crypto_provider()
    crypto_provider.clear_objects()
    
    assert (len(crypto_provider._objects) == 0)
    
    # Send training & validation datasets to their respective workers
    training_pointers = {}
    validation_pointers = {}
    for w_idx in range(len(workers)):

        # Retrieve & prepare worker for receiving dataset
        curr_worker = workers[w_idx]
        curr_worker.clear_objects()

        assert (len(curr_worker._objects) == 0)

        train_data = training_datasets[w_idx]
        validation_data = validation_datasets[w_idx]
        
        # Cast dataset into a Tensor & send it to the relevant worker
        train_pointer = sy.BaseDataset(*train_data).send(curr_worker)
        validation_pointer = sy.BaseDataset(*validation_data).send(curr_worker)
        
        # Store data pointers for subsequent reference
        training_pointers[curr_worker] = train_pointer
        validation_pointers[curr_worker] = validation_pointer
    
    # 'Me' serves as the client -> test pointer stays with me, but is shared via SMPC
    testing_pointer = sy.BaseDataset(*testing_dataset).send(crypto_provider)
    
    return training_pointers, validation_pointers, testing_pointer, workers, crypto_provider

In [11]:
def convert_to_FL_batches(model_hyperparams, train_pointers, validation_pointers, test_pointer): 
    """ Supplementary function to convert initialised datasets into their
        SGD compatible dataloaders in the context of PySyft's federated learning
        (NOTE: This is based on the assumption that querying database size does
               not break FL abstraction (i.e. not willing to share quantity))
    Args:
        model_hyperparams                      (model_hyperparams): Parameters defining current experiment
        train_pointers      (dict(sy.BaseDataset)): Distributed datasets for training
        validation_pointers (dict(sy.BaseDataset)): Distributed datasets for model calibration
        test_pointer              (sy.BaseDataset): Distributed dataset for verifying performance
    Returns:
        train_loaders     (sy.FederatedDataLoader)
        validation_loader (sy.FederatedDataLoader)
        test_loader       (sy.FederatedDataLoader)
    """
    
    def construct_FL_loader(data_pointer, **kwargs):
        """ Cast paired data & labels into configured tensor dataloaders
        Args:
            dataset (list(sy.BaseDataset)): A tuple of X features & y labels
            kwargs: Additional parameters to configure PyTorch's Dataloader
        Returns:
            Configured dataloader (th.utils.data.DataLoader)
        """
        federated_dataset = sy.FederatedDataset(data_pointer)
        
#         print(federated_dataset)
        
        federated_data_loader = sy.FederatedDataLoader(
            federated_dataset, 
            batch_size=(
                model_hyperparams['batch_size']
                if model_hyperparams['batch_size'] 
                else len(federated_dataset)
            ), 
            shuffle=True,
            iter_per_worker=True, # for subsequent parallelization
            **kwargs
        )
        
        return federated_data_loader
        
        
    # Load training pointers into a configured federated dataloader
    train_loader = construct_FL_loader(train_pointers.values())
    
    # Load validation pointer into a configured federated dataloader
    validation_loader = construct_FL_loader(validation_pointers.values())
    
    # Load testing dataset into a configured federated dataloader
    test_loader = construct_FL_loader([test_pointer])
    
    return train_loader, validation_loader, test_loader



# Training Function

Perform training in the FL style.

In [12]:
def perform_FL_training(model_hyperparams, 
                        model_structure,
                        datasets, 
                        workers, 
                        crypto_provider,
                        optimizer=th.optim.SGD):
    """ 
    Simulates a PySyft federated learning cycle using PyTorch, in order
    to prove that it can be done conceptually using the PyTorch interface
        
    Args:
        model_hyperparams (model_hyperparams): 
                                            Parameters defining current experiment
        datasets  (sy.FederatedDataLoader): 
                                        Distributed training datasets
        workers   (list(sy.VirtualWorker)): 
                                        Workers involved in training
        crypto_provider (sy.VirtualWorker): 
                                        Arbiter supervising training
        model     (nn.Module): 
                            Current PyTorch model to train
        optimizer (th.optim): 
                            Optimizer to use
    Returns:
        global_model (nn.Module) : The trained model 
        global_states (dict)
                        {timestep: nn.Module}
                        : The record of trained global models at each timestep.
        client_states (dict)
                        {timestep {worker_id: nn.Module}}
                        : The record of trained models for each worker at each
                          timestep. 
        scale_coeffs (dict)
                        {worker_id: float}
                        : A dictionary of the update weightings for each worker
                          based on individual dataset size. 
    """
    
    criterion = model_hyperparams['criterion']

    def perform_parallel_training(datasets, 
                                  models, 
                                  optimizers, 
                                  criterions, 
                                  epochs):
        """ 
        Parallelizes training across each distributed dataset (i.e. simulated worker)
        Parallelization here refers to the training of all distributed models per
        epoch.
        NOTE: Current approach does not have early stopping implemented
            
        Args:
            datasets   (dict(th.utils.data.DataLoader)): 
                                                       Distributed training datasets
            models     (list(nn.Module)): 
                                        Simulated local models (after distribution)
            optimizers (list(th.optim)): 
                                       Simulated local optimizers (after distribution)
            criterions (list(th.nn)):  
                                    Simulated local objective function (after distribution)
            epochs (int): 
                        No. of epochs to train each local model
        Returns:
            trained local models
        """
        for e in range(epochs):
            for worker in datasets:
#                 print("========================")
#                 print(worker)
                for batch_idx, batch in enumerate(datasets[worker]):
#                     print(batch_idx)
                    data = batch[0]
                    labels = batch[1]
                    '''
                    ========================
                    Each worker trains its own model individually.
                    ========================
                    '''
                    curr_model = models[worker]
                    curr_optimizer = optimizers[worker]
                    curr_criterion = criterions[worker]

                    # Zero gradients to prevent accumulation                    
                    curr_model.train()
                    curr_optimizer.zero_grad()

                    # Forward Propagation
                    predictions = curr_model(data.float())
#                     print(predictions.shape)
#                     print(labels.shape)

                    if model_hyperparams['is_condensed']:
                        loss = curr_criterion(predictions, labels.float())
                    else:
                        loss = curr_criterion(predictions, labels.long())

                    # Backward propagation
                    loss.backward()
                    curr_optimizer.step()

                    # Update models, optimisers & losses
                    models[worker] = curr_model
                    optimizers[worker] = curr_optimizer
                    criterions[worker] = curr_criterion

                    assert (models[worker] == curr_model and 
                            optimizers[worker] == curr_optimizer and 
                            criterions[worker] == curr_criterion)

        trained_models = {w: m.send(crypto_provider) for w,m in models.items()}

        return trained_models
    
    def calculate_global_params(global_model, models, datasets):
        """ Aggregates weights from trained locally trained models after a round.
        
        Args:
            global_model   (nn.Module): Global model to be trained federatedly
            models   (dict(nn.Module)): Simulated local models (after distribution)
            datasets (dict(th.utils.data.DataLoader)): Distributed training datasets
        Returns:
            Aggregated parameters (OrderedDict)
        """
        param_types = global_model.state_dict().keys()
        model_states = {w: m.state_dict() for w,m in models.items()}
        
        # Calculate scaling factors for each worker
        scale_coeffs = {w: 1/len(list(datasets.keys())) for w in list(datasets.keys())}

        # PyTorch models can only swap weights of the same structure
        # Hence, aggregate weights while maintaining original layering structure
        aggregated_params = OrderedDict()
        
        '''
        ======================
        Grab the param_states
        ======================
        '''
        params = {}
        
        for p_type in param_types:
            #param_states = [th.mul(ms[p_type], sc) 
            #                for ms,sc in zip(model_states, scale_coeffs)]
            param_states = [
                th.mul(
                    model_states[w][p_type],
                    scale_coeffs[w]
                ).get().get() for w in workers
            ]
            
            '''
            ======================
            Grab the param_states
            ======================
            '''   
            params.update({p_type : param_states})
            
            layer_shape = tuple(global_model.state_dict()[p_type].shape)
            
            '''
            ======================
            Modification made here to allow multiple layers.
            ======================
            '''  
            aggregated_params[p_type] = th.zeros(param_states[0].shape, dtype=th.float64)
            for param_state in param_states:
                aggregated_params[p_type] += param_state
            aggregated_params[p_type] = aggregated_params[p_type].view(*layer_shape)

        return aggregated_params, params, scale_coeffs

    # Generate a global model & send it to the TTP
    
    template_model = Model(model_structure)
    
    global_model = copy.deepcopy(template_model).send(crypto_provider)
    
    print("Global model parameters:\n", [p.location for p in list(global_model.parameters())],
          "\nID:\n", [p.id_at_location for p in list(global_model.parameters())],
          "\n Cloning effect on global model:\n", [p.clone() for p in list(global_model.parameters())])
    
    rounds = 0
    pbar = tqdm(total=model_hyperparams['rounds'], desc='Rounds', leave=True)
    
    '''
    * Dicts for model and client states
    
    '''
    global_states = {}
    client_states = {}
    global_model_state_dicts = {}

    client_template = copy.deepcopy(template_model)
    
    while rounds < model_hyperparams['rounds']:

        local_models = {w: copy.deepcopy(client_template).send(w) for w in workers}

        optimizers = {
            w: optimizer(
                params=model.parameters(), 
                lr=model_hyperparams['lr'], 
                weight_decay=model_hyperparams['decay']
            ) for w, model in local_models.items()
        }
        
        criterions = {w: criterion(reduction='mean') 
                      for w,m in local_models.items()}

        trained_models = perform_parallel_training(
            datasets, 
            local_models, 
            optimizers, 
            criterions, 
            model_hyperparams['epochs']
        )
        
        aggregated_params, params, scale_coeffs = calculate_global_params(
            global_model, 
            trained_models, 
            datasets
        )

        '''
        ============================
        * Save states to dictionary

        '''
        global_model_transfer_out = global_model.get()
        global_states.update({rounds : copy.deepcopy(global_model_transfer_out)})
        global_model_state_dicts.update({rounds : global_model_transfer_out.state_dict()})
            
        client_states.update({rounds + 1 : params})

        # Update weights with aggregated parameters 
        global_model_transfer_out.load_state_dict(aggregated_params)
#         model = copy.deepcopy(global_model_transfer_out)
        client_template = copy.deepcopy(global_model_transfer_out)
        global_model = global_model_transfer_out.send(crypto_provider)
        
        rounds += 1
        pbar.update(1)
        
    '''
    ============================
    * Save final global state

    '''
    global_model_transfer_out = global_model.get()
    global_states.update({rounds : copy.deepcopy(global_model_transfer_out)})
    global_model = global_model_transfer_out.send(crypto_provider)
    global_model_state_dicts.update({rounds : global_model_transfer_out.state_dict()})
    pbar.close()

    return global_model, global_states, client_states, scale_coeffs, global_model_state_dicts

# Load Synth Data

In [14]:
with open('./data/all_data/data.json') as json_file:
		data = json.load(json_file)

In [55]:
def get_split_indices(length, proportions):
    train_end = round(length * proportions[0])
    val_end = train_end + round(length * proportions[1])
    test_end = val_end + round(length * proportions[2])
    return train_end, val_end, test_end

def prep_synth_data(path, split_proportions):
    training_datasets = {}
    validation_datasets = {}
    testing_datasets = {}
    with open(path) as json_file:
        data = json.load(json_file)
        for client_idx in data:
            num_data = len(data[client_idx]['x'])
            
            train_idx, val_idx, test_idx = get_split_indices(num_data, split_proportions)
            
            x_and_y = list(zip(data[client_idx]['x'], 
                               data[client_idx]['y']))
            random.shuffle(x_and_y)
            x, y = zip(*x_and_y)
            training_datasets.update({int(client_idx) : (th.tensor(x[0:train_idx]), 
                                                         th.tensor(y[0:train_idx]).view(-1, 1))})
            validation_datasets.update({int(client_idx) : (th.tensor(x[train_idx:val_idx]), 
                                                         th.tensor(y[train_idx:val_idx]).view(-1, 1))})
            testing_datasets.update({int(client_idx) : (th.tensor(x[val_idx:test_idx]), 
                                                         th.tensor(y[val_idx:test_idx]).view(-1, 1))})
    return training_datasets, validation_datasets, testing_datasets

In [56]:
def aggregate_testing_datasets(testing_datasets):
    output = None
    for client in testing_datasets:
        if output is None:
            output = (testing_datasets[client][0], 
                      testing_datasets[client][1])
        else:
            output = (th.cat((output[0], 
                             testing_datasets[client][0]), 
                             0), 
                      th.cat((output[1], 
                             testing_datasets[client][1]), 
                             0))
    
    return (output[0], output[1].view(-1, 1))

In [57]:
training_datasets, validation_datasets, testing_datasets = prep_synth_data('./data/all_data/data.json', [0.8, 0.1, 0.1])
testing_dataset = aggregate_testing_datasets(testing_datasets)

FL functions in this notebook convert individual CSVs for each client into a dictionary of tuples, one tuple for each client, where first entry is a torch tensor of data, and second entry is a torch tensor of labels. in set_up_fl_env, indexing data dictionaries is how datasets are disseminated to workers. 

In [13]:
def synth_A(datagen_config,
            is_binary,
            num_workers, 
             val_proportion, 
             test_proportion,
             garbage_proportion,
             garbage_severity):
    
    """ Takes in a synthetic dataset generated via Skl.datasets, 
        splits it into num_workers even segments, converts them into 
        torch tensors, formats into a dictionary, for use in FL 
        training. 
        
    Args:
        num_workers (int): The number of workers to split datasets among. 
        synth_data (tuple(numpy.ndarray, numpy.ndarray)): A synthetic classification dataset - First entry is data, second is labels. 
        val_proportion (float): Proportion of datapoints to be used for validation. Float between 0 and 1.
        test_proportion (float): Proportion of datapoints to be used for testing. Float between 0 and 1. 
    Returns:
        training_datasets (defaultdict{
                                    int:tuple(torch.Tensor, torch.Tensor)
                                    }) : A dictionary where data is organized as tuple of torch tensors and indexed by the worker
                                        it belongs to. Follows same ordering as synth_data.
                                        
        validation_datasets (defaultdict{
                                    int:tuple(torch.Tensor, torch.Tensor)
                                    }) : A dictionary where data is organized as tuple of torch tensors and indexed by the worker
                                        it belongs to. Follows same ordering as synth_data.
                                        
        testing_dataset (tuple(torch.Tensor, torch.Tensor)): A tuple containing test data.
    """
    
#     print(datagen_config)
    
    synth_data = skld.make_classification(**datagen_config)
    
    if is_binary:
        synth_data = (synth_data[0], np.reshape(synth_data[1], (-1, 1)))
    
    #============
    # Number of data points in synthetic data set
    synth_data_length = synth_data[0].shape[0]
    
    #============
    # Number of data points per worker
    segment_size = math.floor(synth_data_length / num_workers)
    
    #============
    # Initialize dictionaries for holding datasets.
    # defaultdict() used to simplify dict modification.
    training_datasets = defaultdict()
    validation_datasets = defaultdict()
    testing_dataset = None
    
    #============
    # Select the workers whose data will be corrupted. 
    garbage_indices = np.random.choice(num_workers, int(np.floor(num_workers * garbage_proportion)), replace=False)
    
    for worker_idx in range(num_workers):
        
        #============
        # Number of data points for test, val, train.
        test_size = math.floor(test_proportion * segment_size)
        val_size = math.floor(val_proportion * segment_size)
        train_size = math.floor((1 - val_proportion - test_proportion) * segment_size)
        
#         print(test_size, val_size, train_size)
        
        #============
        # Start and end index of worker's datasegment out of entire dataset. 
        start_idx = worker_idx * segment_size
        end_idx = (worker_idx + 1) * segment_size
        
#         print(start_idx, end_idx)
        
        #============
        # Within each segment, the indexes of test, val, train subsegments.
        test_idx = start_idx + test_size
        val_idx = test_idx + val_size
        train_idx = val_idx + train_size
        
#         print(test_idx, val_idx, train_idx)

        #============
        # Slice dataset by above segments. Convert to torch tensors. 
        validation_datasets[worker_idx] = (th.from_numpy(synth_data[0][test_idx:val_idx, :]), 
                                           th.from_numpy(synth_data[1][test_idx:val_idx]))
        
        training_datasets[worker_idx] = (th.from_numpy(synth_data[0][val_idx:train_idx, :]), 
                                         th.from_numpy(synth_data[1][val_idx:train_idx]))
        
        #============
        # Scatter random noise over data matrices of selected workers.
        if worker_idx in garbage_indices:
            
            val_shape = validation_datasets[worker_idx][0].shape
            train_shape = training_datasets[worker_idx][0].shape
            
            val_noise = np.random.rand(val_shape[0], val_shape[1]) * garbage_severity
            train_noise = np.random.rand(train_shape[0], train_shape[1]) * garbage_severity
            
            validation_datasets[worker_idx] = (validation_datasets[worker_idx][0] + th.from_numpy(val_noise), 
                                               validation_datasets[worker_idx][1])  
            training_datasets[worker_idx] = (training_datasets[worker_idx][0] + th.from_numpy(train_noise), 
                                             training_datasets[worker_idx][1])

        
        #============
        # Testing dataset is in tuple form since it is used by ttp.
        # Each segment contributes a portion of the total, hence the
        # concatenation. 
        if testing_dataset is None:
            testing_dataset = (synth_data[0][start_idx:test_idx, :], 
                                synth_data[1][start_idx:test_idx])
        else:
            testing_dataset = (np.concatenate((testing_dataset[0], 
                                                synth_data[0][start_idx:test_idx, :]), 
                                               axis = 0), 
                                np.concatenate((testing_dataset[1], 
                                                synth_data[1][start_idx:test_idx]), 
                                               axis = 0))
    
    #============
    # Convert testing dataset to torch tensors. 
    testing_dataset = (th.from_numpy(testing_dataset[0]), 
                        th.from_numpy(testing_dataset[1]))
        
    returned_info_dict = {'garbage_indices': garbage_indices, 'worker_proportions': []}
    return training_datasets, validation_datasets, testing_dataset, returned_info_dict
        

In [29]:
def synth_B(datagen_config,
            is_binary,
            num_workers, 
            val_proportion, 
            test_proportion,
            garbage_proportion,
            garbage_severity):
    
    """ Takes in a synthetic dataset generated via Skl.datasets, 
        splits it into num_workers even segments, converts them into 
        torch tensors, formats into a dictionary, for use in FL 
        training. 
        
    Args:
        num_workers (int): The number of workers to split datasets among. 
        synth_data (tuple(numpy.ndarray, numpy.ndarray)): A synthetic classification dataset - First entry is data, second is labels. 
        val_proportion (float): Proportion of datapoints to be used for validation. Float between 0 and 1.
        test_proportion (float): Proportion of datapoints to be used for testing. Float between 0 and 1. 
    Returns:
        training_datasets (defaultdict{
                                    int:tuple(torch.Tensor, torch.Tensor)
                                    }) : A dictionary where data is organized as tuple of torch tensors and indexed by the worker
                                        it belongs to. Follows same ordering as synth_data.
                                        
        validation_datasets (defaultdict{
                                    int:tuple(torch.Tensor, torch.Tensor)
                                    }) : A dictionary where data is organized as tuple of torch tensors and indexed by the worker
                                        it belongs to. Follows same ordering as synth_data.
                                        
        testing_dataset (tuple(torch.Tensor, torch.Tensor)): A tuple containing test data.
    """
    returned_info_dict = {}
    
    synth_data = skld.make_classification(**datagen_config)
    
    if is_binary:
        synth_data = (synth_data[0], np.reshape(synth_data[1], (-1, 1)))
    
    worker_proportions = [np.random.random() for i in range(num_workers)]
    worker_proportions /= np.sum(worker_proportions)
    
#     print(worker_proportions)
    
    #============
    # Number of data points in synthetic data set
    synth_data_length = synth_data[0].shape[0]
    
    #============
    # Initialize dictionaries for holding datasets.
    # defaultdict() used to simplify dict modification.
    training_datasets = defaultdict()
    validation_datasets = defaultdict()
    testing_dataset = None

    previous_end_idx = 0
    for worker_idx in range(num_workers):
        
        #============
        # Number of data points for test, val, train.
        test_size = math.floor((test_proportion * worker_proportions[worker_idx]) * synth_data_length)
        val_size = math.floor((val_proportion * worker_proportions[worker_idx]) * synth_data_length)
        train_size = math.floor(((1 - val_proportion - test_proportion) * worker_proportions[worker_idx]) * synth_data_length)
         
#         print(test_size, val_size, train_size)
        
        #============
        # Start and end index of worker's datasegment out of entire dataset. 
        
#         print(worker_proportions[worker_idx] * synth_data_length)
        start_idx = previous_end_idx
        end_idx = math.floor(previous_end_idx + (worker_proportions[worker_idx] * synth_data_length))
        previous_end_idx = end_idx
#         print(start_idx, end_idx)
        
        #============
        # Within each segment, the indexes of test, val, train subsegments.
        test_idx = start_idx + test_size
        val_idx = test_idx + val_size
        train_idx = val_idx + train_size
        
#         print(test_idx, val_idx, train_idx)

        #============
        # Slice dataset by above segments. Convert to torch tensors. 
        validation_datasets[worker_idx] = (th.from_numpy(synth_data[0][test_idx:val_idx, :]), 
                                           th.from_numpy(synth_data[1][test_idx:val_idx]))
        
        training_datasets[worker_idx] = (th.from_numpy(synth_data[0][val_idx:train_idx, :]), 
                                         th.from_numpy(synth_data[1][val_idx:train_idx]))
        
        #============
        # Testing dataset is in tuple form since it is used by ttp.
        # Each segment contributes a portion of the total, hence the
        # concatenation. 
        if testing_dataset is None:
            testing_dataset = (synth_data[0][start_idx:test_idx, :], 
                                synth_data[1][start_idx:test_idx])
        else:
            testing_dataset = (np.concatenate((testing_dataset[0], 
                                                synth_data[0][start_idx:test_idx, :]), 
                                               axis = 0), 
                                np.concatenate((testing_dataset[1], 
                                                synth_data[1][start_idx:test_idx]), 
                                               axis = 0))
    
    #============
    # Convert testing dataset to torch tensors. 
    testing_dataset = (th.from_numpy(testing_dataset[0]), 
                        th.from_numpy(testing_dataset[1]))
        
    returned_info_dict = {'garbage_indices': [], 'worker_proportions': worker_proportions}
    return training_datasets, validation_datasets, testing_dataset, returned_info_dict
    

# Set up and Train
Set up FL environment, distribute data to participants, perform FL training to produce trained local and global models.

In [58]:
# ============
# Hyperparams for FL model. 

#============
# Binary Class hyperparams 
binary_model_hyperparams = {
    "batch_size": 45,
    "lr": 0.1,
    "decay": 0.01,
    "rounds":20,
    "epochs": 20,
    "criterion": nn.BCELoss,
    "is_condensed": True
}

#============
# Multi Class hyperparams 
multiclass_model_hyperparams = {
    "batch_size": 45,
    "lr": 0.01,
    "decay": 0.1,
    "rounds":1,
    "epochs": 1,
    "criterion": nn.CrossEntropyLoss,
    "is_condensed": False
}

binary_model_structure =  {
    '0':{'layer_size_mapping': {"in_features": 5,
                                "out_features": 64},
        "layer_type": 'linear',
        "activation": 'sigmoid'}, 
    '1':{'layer_size_mapping': {"in_features": 64,
                                "out_features": 1},
        "layer_type": 'linear',
        "activation": 'sigmoid'},   
}

# multiclass_model_structure =  {
#     '0':{"in_features": 20,
#         "out_features": 200,
#         "layer_type": 'linear',
#         "activation": 'sigmoid'}, 
#     '1':{"in_features": 200,
#         "out_features": 4,
#         "layer_type": 'linear',
#         "activation": 'nil'},   
# }

# {'in_channels': 10, 
#      'out_channels': 10, 
#      'kernel_size': 10,
# }

model_hyperparams = binary_model_hyperparams

#============
# Set up Federated Learning environment.
# Produce points to the datasets stored on workers 
# and ttp. Also produce pointers to workers and ttp.

(training_pointers, 
 validation_pointers, 
 testing_pointer, 
 workers, 
 crypto_provider) = setup_FL_env(
    training_datasets,
    validation_datasets, 
    testing_dataset
)

#=============
# Produce individual trainlaoders for each client
# which make use of the full data available to them. 
trainloaders = {}
for worker_id in list(training_pointers.keys()):
    train = {worker_id:training_pointers[worker_id]}
    val = {worker_id:validation_pointers[worker_id]}
    #============
    # Convert training datasets into syft dataloaders. 
    train_loader, validation_loader, test_loader = convert_to_FL_batches(
        binary_model_hyperparams,
        train, 
        val, 
        testing_pointer
    )
    trainloaders.update({worker_id : train_loader})
    
# for key in trainloaders: 
#     print(key)
#     for batch_idx, batch in enumerate(trainloaders[key]):
#         print(batch[1].shape)
        
# #============
# # Commence FL training and return trained global model, as well as 
# global and local model states over time. 

trained_model, global_states, client_states, scale_coeffs, global_model_state_dicts = perform_FL_training(
    model_hyperparams,
    binary_model_structure,
    trainloaders,
    workers,
    crypto_provider
)

# # trainloaders
# print(scale_coeffs)


Rounds:   0%|                                                                                   | 0/20 [00:00<?, ?it/s][A

Global model parameters:
 [<VirtualWorker id:crypto_provider #objects:5>, <VirtualWorker id:crypto_provider #objects:5>, <VirtualWorker id:crypto_provider #objects:5>, <VirtualWorker id:crypto_provider #objects:5>] 
ID:
 [63952250482, 1219761808, 72919224206, 97425525780] 
 Cloning effect on global model:
 [(Wrapper)>[PointerTensor | me:71609300738 -> crypto_provider:63952250482], (Wrapper)>[PointerTensor | me:39724604074 -> crypto_provider:1219761808], (Wrapper)>[PointerTensor | me:50042905899 -> crypto_provider:72919224206], (Wrapper)>[PointerTensor | me:88230722944 -> crypto_provider:97425525780]]



Rounds:   5%|███▊                                                                       | 1/20 [00:18<05:45, 18.18s/it][A
Rounds:  10%|███████▌                                                                   | 2/20 [00:35<05:20, 17.82s/it][A
Rounds:  15%|███████████▎                                                               | 3/20 [00:53<05:05, 17.96s/it][A
Rounds:  20%|███████████████                                                            | 4/20 [01:11<04:47, 17.98s/it][A
Rounds:  25%|██████████████████▊                                                        | 5/20 [01:29<04:28, 17.88s/it][A
Rounds:  30%|██████████████████████▌                                                    | 6/20 [01:47<04:11, 17.99s/it][A
Rounds:  35%|██████████████████████████▎                                                | 7/20 [02:05<03:53, 17.98s/it][A
Rounds:  40%|██████████████████████████████                                             | 8/20 [02:22<03:34, 17.84s/it][A
Rounds:  45%|██

# Test Evaluation function for CC
This testing function is temporary;
        It is used in contribution calculations with the assumption 
        that the TTP has unfettered access to a testing dataset. This
        may not be in the case in non-simulation environment. 

In [77]:
# Generic
import copy
import math
import os
import random
import sys
import time
from collections import OrderedDict
from pathlib import Path
from collections import defaultdict
import json

# Libs
import sklearn as skl
from sklearn import preprocessing
import sklearn.datasets as skld
from sklearn.metrics import mean_squared_error, accuracy_score, roc_auc_score
import numpy as np
import pandas as pd
import missingno as msno
import matplotlib.pyplot as plt
import seaborn as sbn
import syft as sy
import torch as th
from torch import nn
from torch.utils.data import TensorDataset, DataLoader
from scipy.special import softmax

class Contribution_Calculation:
    def __init__(self, global_states, model_hyperparams, client_state_dict, testing_dataset, scale_coeffs):
        self.global_states = global_states
        self.model_hyperparams = model_hyperparams
        self.client_state_dict = client_state_dict
        self.testing_dataset = testing_dataset
        self.scale_coeffs = scale_coeffs

    def perform_FL_testing(self, model):
        """ Obtains predictions given a validation/test dataset upon
            a given PyTorch model.

        Args:
            model   (nn.Module): A PyTorch model
        Returns:
            accuracy score (float)
            roc_auc score  (float)
        """
        X_test = self.testing_dataset[0]
        y_test = self.testing_dataset[1]
        model.eval()
        with th.no_grad():
            predicted_labels = model(X_test.float())
            if self.model_hyperparams['is_condensed']:
                accuracy = accuracy_score(y_test.numpy(), predicted_labels.round().numpy())
                roc = roc_auc_score(y_test.numpy(), predicted_labels.numpy())
            else:
                accuracy = accuracy_score(y_test.numpy(), np.array([np.argmax(i) for i in predicted_labels.numpy()]))
                roc = roc_auc_score(y_test.numpy(), np.array([softmax(i) for i in predicted_labels.numpy()]), multi_class='ovr')
        return accuracy, roc

    def prep_client_state_dict(self):
        """ Takes in the client states dictionary returned by training function.
            Returns dictionary initialized with empty dictionaries for each client
            at each timestep.

        Args:
            client_states (dict): Client states at each communication round.
        Returns:
            client_dict (dict): Dictionary initialized to be filled with actual model state
                                tensors.
        """
        client_dict = {}
        for timestep in self.client_states.keys():
            client_dict.update({timestep : {}})
            for param_type in self.client_states[timestep].keys():
                for client_idx in range(len(self.client_states[timestep][param_type])):
                    client_dict[timestep].update({client_idx : {}})
                break
        self.client_state_dict = client_dict

    def fill_client_state_dict(self):
        """ Populates prepared client state dictionary with model states
            for each client at each timestep.

        Args:
            client_states (dict): Client states at each communication round.
        Returns:
            client_state_dict (dict): Complete state dictionary
        """
        self.prep_client_state_dict(self.client_states)
        for timestep in self.client_states.keys():
            for param_type in self.client_states[timestep].keys():
                for client_idx in range(len(self.client_states[timestep][param_type])):
                    self.client_dict[timestep][client_idx].update({param_type : self.client_states[timestep][param_type][client_idx]})

    def index_scale_coeffs_by_integer(self):
        """ scale_coeffs are indexed by FL worker id. Since CC functions
            below rely on integer indexing of clients, need to reindex
            keys while keeping values constant.

        Args:
            scale_coeffs (OrderedDict): Scaling factors for each client based
                                        on number of datapoints, indexed by
                                        FL worker ID.
        Returns:
            scale_dictionary (dict): Identical to scale_coeffs, except indexed
                                     by integers 0, 1, 2, ... etc.
        """
        scale_dictionary = {}
        for i, key in enumerate(self.scale_coeffs.keys()):
            scale_dictionary.update({i : self.scale_coeffs[key]})
        return scale_dictionary

    def produce_excluded_scaling_factor(self, idx):
        """ In aggregation of client subsets, scale_coeffs need to be recalculated.
            For example. Three clients. Each contributes 0.33 based on their dataset
            size. I exclude one. When I update global model, I should weight the
            remaining updates by 0.5 instead of 0.33. This function produces a scale
            factor which will divide the coefficients prior to aggregation.

        Args:
            indexed_scale_coeffs (OrderedDict): Output of above function.
        Returns:
            coeffs_scale_factor (float): The sum of scale_coeffs for non-excluded clients.
        """
        coeffs_scale_factor = 0
        for i in indexed_scale_coeffs:
            if i != idx:
                coeffs_scale_factor += indexed_scale_coeffs[i]
        return coeffs_scale_factor

    def prep_dl_dw(self):
        dl_dw = {}
        for layer_name in self.client_state_dict[1][0].keys():
            dl_dw.update({layer_name : th.zeros(self.client_state_dict[1][0][layer_name].shape, dtype=th.float64)})
        return dl_dw

    def calculate_dl_dw(self, global_state, rnd, idx):
        dl_dw = self.prep_dl_dw()
        for layer_name in global_state.state_dict().keys():
            dl_dw[layer_name] = self.client_state_dict[rnd][idx][layer_name] - global_state.state_dict()[layer_name]
        return dl_dw

    def calculate_GRV(self, final_state, current_state):
        diff = final_state
        for layer_name in final_state.state_dict().keys():
            diff.state_dict()[layer_name] -= current_state.state_dict()[layer_name]
        return diff

    def calculate_alignment(self, dl_dw, GRV):
        alignment = 0
        num_layers = 0
        for layer_name in dl_dw:
            alignment += th.dot(th.flatten(GRV.state_dict()[layer_name]), th.flatten(dl_dw[layer_name]))
            num_layers += 1
        return alignment/num_layers

    def singular(self,
                 reference_eval,
                 rnd,
                 client_idx):
        """
        This function evaluates the client's contribution to performance metrics
        in isolation from those of other clients.

        Args:
            global_states (dict): Dictionary of global model states.
            client_state_dict (dict): Dictionary of client model states at each timestep.
            testing_dataset (th.tensor): The testing dataset
            reference_eval (float): The accuracy and ROC-AUC score of the baseline model
                                    at timestep rnd.
            rnd (int): The current round
            client_idx (int): The index of the client being evaluated
            scale_coeffs (OrderedDict): Scaling factors for each client based
                                        on number of datapoints, indexed by
                                        FL worker ID.
        Returns:
            Difference between performance of client model and global model.
        """
        single_client_model = copy.deepcopy(self.global_states[rnd - 1])
        single_client_model.load_state_dict(self.client_state_dict[rnd][client_idx])

        client_eval = self.perform_FL_testing(single_client_model)
        print('---------')
        print(f"Improvement of {client_idx}'s update in isolation when applied to global model at round {rnd - 1}")
        print((client_eval[0] - reference_eval[0], client_eval[1] - reference_eval[1]))
        return (client_eval[0] - reference_eval[0], client_eval[1] - reference_eval[1])

    def aggregate(self,
                  reference_eval,
                  rnd,
                  client_idx,
                  client_subset_size = 1.0):
        """
        This function evaluates the client's contribution to performance metrics
        in isolation from those of other clients.

        Args:
            global_states (dict): Dictionary of global model states.
            client_state_dict (dict): Dictionary of client model states at each timestep.
            testing_dataset (th.tensor): The testing dataset
            reference_eval (float): The accuracy and ROC-AUC score of the baseline model
                                    at timestep rnd.
            rnd (int): The current round
            client_idx (int): The index of the client being evaluated
            scale_coeffs (OrderedDict): Scaling factors for each client based
                                        on number of datapoints, indexed by
                                        FL worker ID.
        Returns:
            Difference between performance of client model and global model.
        """
        aggregate_exclusion_model = copy.deepcopy(self.global_states[rnd - 1])

        dl_dw = self.prep_dl_dw()

        indexed_scale_coeffs = self.index_scale_coeffs_by_integer()
        scaling_soeff_factor = self.produce_excluded_scaling_factor(client_idx)

        num_clients = len(self.client_state_dict[1].keys())
        client_indices = np.random.choice(num_clients, int(np.floor(num_clients * client_subset_size)), replace=False)

        for client in client_indices:
            if client != client_idx:
                for layer_name in self.client_state_dict[rnd][client]:
                    dl_dw[layer_name] += (indexed_scale_coeffs[client]/scaling_soeff_factor) * self.client_state_dict[rnd][client][layer_name]

        aggregate_states = aggregate_exclusion_model.state_dict()
        for param_type in dl_dw:
            aggregate_states[param_type] += dl_dw[param_type]
        aggregate_exclusion_model.load_state_dict(aggregate_states)

        client_eval = self.perform_FL_testing(aggregate_exclusion_model)
        
        print('---------')
        print(f"Difference in performance of global model at round {rnd - 1} when client {client_idx} is excluded.")
        print((client_eval[0] - reference_eval[0], client_eval[1] - reference_eval[1]))

        return (client_eval[0] - reference_eval[0], client_eval[1] - reference_eval[1])

    def contribution_calculation(self, del_method):
        """
        This function calculates the contribution of each client to model
        training using the deletion and alignment methods.

        Args:
            model_hyperparams (dict): Hyperparams used in FL model training.
            global_states (dict): Dictionary of global model states.
            client_state_dict (dict): Dictionary of client model states at each timestep.
            testing_dataset (th.tensor): The testing dataset.
            del_method (string): Choice of singular or aggregate deletion method.
            scale_coeffs (OrderedDict): Scaling factors for each client based
                                        on number of datapoints, indexed by
                                        FL worker ID.
        Returns:
            client_alignment_matrix (np.array): Client alignments at each timestep.
            client_deletion_matrix (np.array): Differences in client performance at each timestep.
        """
        total_num_rounds = self.model_hyperparams['rounds']

        final_global_state = selff.global_states[total_num_rounds]
        client_alignment_matrix = np.zeros([len(self.client_state_dict[1].keys()), total_num_rounds], dtype=np.float64)
        client_deletion_matrix = np.zeros([len(self.client_state_dict[1].keys()), total_num_rounds], dtype=np.float64)

        del_dict = {'Singular' : self.singular,
                   'Aggregate' : self.aggregate}

        for rnd in range(1, total_num_rounds + 1):
            current_global_state = self.global_states[rnd - 1]

            GRV = self.calculate_GRV(final_global_state, current_global_state)

            reference_eval = self.perform_FL_testing(current_global_state)

            print('----------')
            print(f'Performance of global model at timestep {rnd-1}')
            print(reference_eval)

            for client_idx in self.client_state_dict[rnd].keys():

    #             ============
    #             Calculate alignment of dl/dw with GRV
    #             print(client_state_dict[rnd][client_idx])
                dl_dw = self.calculate_dl_dw(current_global_state, rnd, client_idx)
                alignment = self.calculate_alignment(dl_dw, GRV)
                print('---------')
                print(f'Alignment of client {client_idx} with GRV at round {rnd}')
                print(alignment.numpy())
                client_alignment_matrix[client_idx][rnd - 1] = alignment.numpy()

                #============
                # Calculate change in model performance metrics when client contribution is
                # selectively deleted.
                client_eval = del_dict[del_method](global_states,
                                                   client_state_dict,
                                                   testing_dataset,
                                                   reference_eval,
                                                   rnd,
                                                   client_idx,
                                                   scale_coeffs,
                                                   model_hyperparams)
                client_deletion_matrix[client_idx][rnd - 1] = (client_eval[0] + client_eval[1])

        print('===============')
        print('Client alignment matrix')
        print(client_alignment_matrix)
        print('===============')
        print('Client deletion matrix')
        print(client_deletion_matrix)

        return client_alignment_matrix, client_deletion_matrix

    def normalize_contribution_matrix(mat):
        return (mat - np.mean(mat)) / np.std(mat)

    def aggregate_contribution_matrices(arguments, align_mat, del_mat):
    #     align_mat = normalize_contribution_matrix(align_mat)
    #     del_mat = normalize_contribution_matrix(del_mat)

        contributions = defaultdict()

        for i in range(align_mat.shape[0]):
            contributions[i] = np.sum(align_mat[i]) + np.sum(del_mat[i]) / arguments['rounds']
    #         print(np.sum(align_mat[i]) + np.sum(del_mat[i]) / arguments['rounds'])
            print(np.sum(align_mat[i]) + np.sum(del_mat[i]) / arguments['rounds'])
        return contributions


In [78]:
cc = Contribution_Calculation(global_states, model_hyperparams, client_states, testing_dataset, scale_coeffs)

In [79]:
cc.fill_client_state_dict()

AttributeError: 'Contribution_Calculation' object has no attribute 'client_states'

In [None]:
client_alignment_matrix, client_deletion_matrix 

In [59]:
from scipy.special import softmax
def perform_FL_testing(dataset, model, model_hyperparams): 
    """ Obtains predictions given a validation/test dataset upon 
        a given PyTorch model. 
        
    Args:
        model   (nn.Module): A PyTorch model
    Returns:
        accuracy score (float)
        roc_auc score  (float)
    """
    X_test = dataset[0]
    y_test = dataset[1]
#     print(X_test)
    model.eval()
    with th.no_grad():
        predicted_labels = model(X_test.float())
#         print(predicted_labels.shape)
#         print(y_test.shape)
        if model_hyperparams['is_condensed']:
            accuracy = accuracy_score(y_test.numpy(), predicted_labels.round().numpy())
            roc = roc_auc_score(y_test.numpy(), predicted_labels.numpy())
        else:
            accuracy = accuracy_score(y_test.numpy(), np.array([np.argmax(i) for i in predicted_labels.numpy()]))
            roc = roc_auc_score(y_test.numpy(), np.array([softmax(i) for i in predicted_labels.numpy()]), multi_class='ovr')
    return accuracy, roc

# CC Prep Functions

Need to reorganize client states to index first by communication round, then by client, then by param type.

In [60]:
def prep_client_state_dict(client_states):
    """ Takes in the client states dictionary returned by training function.
        Returns dictionary initialized with empty dictionaries for each client
        at each timestep. 
        
    Args:
        client_states (dict): Client states at each communication round. 
    Returns:
        client_dict (dict): Dictionary initialized to be filled with actual model state
                            tensors. 
    """
    client_dict = {}
    for timestep in client_states.keys():
        client_dict.update({timestep : {}})
        for param_type in client_states[timestep].keys():
            for client_idx in range(len(client_states[timestep][param_type])):
                client_dict[timestep].update({client_idx : {}})
            break
    return client_dict

def fill_client_state_dict(client_states):
    """ Populates prepared client state dictionary with model states
        for each client at each timestep. 
        
    Args:
        client_states (dict): Client states at each communication round. 
    Returns:
        client_state_dict (dict): Complete state dictionary
    """
    client_state_dict = prep_client_state_dict(client_states)
    for timestep in client_states.keys():
        for param_type in client_states[timestep].keys():
            for client_idx in range(len(client_states[timestep][param_type])):
                client_state_dict[timestep][client_idx].update({param_type : client_states[timestep][param_type][client_idx]})
    return client_state_dict


In [61]:
def index_scale_coeffs_by_integer(scale_coeffs):
    """ scale_coeffs are indexed by FL worker id. Since CC functions
        below rely on integer indexing of clients, need to reindex
        keys while keeping values constant.
        
    Args:
        scale_coeffs (OrderedDict): Scaling factors for each client based
                                    on number of datapoints, indexed by
                                    FL worker ID. 
    Returns:
        scale_dictionary (dict): Identical to scale_coeffs, except indexed
                                 by integers 0, 1, 2, ... etc.
    """
    scale_dictionary = {}
    for i, key in enumerate(scale_coeffs.keys()):
        scale_dictionary.update({i : scale_coeffs[key]})
    return scale_dictionary

def produce_excluded_scaling_factor(indexed_scale_coeffs, idx):
    """ In aggregation of client subsets, scale_coeffs need to be recalculated.
        For example. Three clients. Each contributes 0.33 based on their dataset 
        size. I exclude one. When I update global model, I should weight the
        remaining updates by 0.5 instead of 0.33. This function produces a scale
        factor which will divide the coefficients prior to aggregation. 
        
    Args:
        indexed_scale_coeffs (OrderedDict): Output of above function.
    Returns:
        coeffs_scale_factor (float): The sum of scale_coeffs for non-excluded clients.
    """
    coeffs_scale_factor = 0
    for i in indexed_scale_coeffs:
        if i != idx:
            coeffs_scale_factor += indexed_scale_coeffs[i]
    return coeffs_scale_factor

# CC Calculation Functions

In [62]:
def prep_dl_dw(client_state_dict):
    dl_dw = {}
    for layer_name in client_state_dict[1][0].keys():
        dl_dw.update({layer_name : th.zeros(client_state_dict[1][0][layer_name].shape, dtype=th.float64)})
    return dl_dw

def calculate_dl_dw(client_state_dict, global_state, rnd, idx):
    dl_dw = prep_dl_dw(client_state_dict)
    for layer_name in global_state.state_dict().keys():
        dl_dw[layer_name] = client_state_dict[rnd][idx][layer_name] - global_state.state_dict()[layer_name]
    return dl_dw

def calculate_GRV(final_state, current_state):
    diff = final_state
    for layer_name in final_state.state_dict().keys():
        diff.state_dict()[layer_name] -= current_state.state_dict()[layer_name]
    return diff

def calculate_alignment(dl_dw, GRV):
    alignment = 0
    num_layers = 0
    for layer_name in dl_dw:
        alignment += th.dot(th.flatten(GRV.state_dict()[layer_name]), th.flatten(dl_dw[layer_name]))
        num_layers += 1
    return alignment/num_layers

In [63]:
def singular(global_states, 
             client_state_dict, 
             testing_dataset, 
             reference_eval, 
             rnd, 
             client_idx, 
             scale_coeffs,
             model_hyperparams,
             client_subset_size = 1.0):
    """ 
    This function evaluates the client's contribution to performance metrics
    in isolation from those of other clients. 
        
    Args:
        global_states (dict): Dictionary of global model states. 
        client_state_dict (dict): Dictionary of client model states at each timestep. 
        testing_dataset (th.tensor): The testing dataset
        reference_eval (float): The accuracy and ROC-AUC score of the baseline model
                                at timestep rnd. 
        rnd (int): The current round
        client_idx (int): The index of the client being evaluated
        scale_coeffs (OrderedDict): Scaling factors for each client based
                                    on number of datapoints, indexed by
                                    FL worker ID. 
    Returns:
        Difference between performance of client model and global model. 
    """
    single_client_model = copy.deepcopy(global_states[rnd - 1])
    single_client_model.load_state_dict(client_state_dict[rnd][client_idx])
    
    client_eval = perform_FL_testing(testing_dataset, 
                                     single_client_model, 
                                     model_hyperparams)
    print('---------')
    print(f"Improvement of {client_idx}'s update in isolation when applied to global model at round {rnd - 1}")
    print((client_eval[0] - reference_eval[0], client_eval[1] - reference_eval[1]))
    return (client_eval[0] - reference_eval[0], client_eval[1] - reference_eval[1])

def aggregate(global_states, 
              client_state_dict, 
              testing_dataset, 
              reference_eval, 
              rnd,
              client_idx, 
              scale_coeffs, 
              model_hyperparams,
              client_subset_size = 1.0):
    """ 
    This function evaluates the client's contribution to performance metrics
    in isolation from those of other clients. 
        
    Args:
        global_states (dict): Dictionary of global model states. 
        client_state_dict (dict): Dictionary of client model states at each timestep. 
        testing_dataset (th.tensor): The testing dataset
        reference_eval (float): The accuracy and ROC-AUC score of the baseline model
                                at timestep rnd. 
        rnd (int): The current round
        client_idx (int): The index of the client being evaluated
        scale_coeffs (OrderedDict): Scaling factors for each client based
                                    on number of datapoints, indexed by
                                    FL worker ID. 
    Returns:
        Difference between performance of client model and global model. 
    """
    aggregate_exclusion_model = copy.deepcopy(global_states[rnd - 1])
    
    dl_dw = prep_dl_dw(client_state_dict)

    indexed_scale_coeffs = index_scale_coeffs_by_integer(scale_coeffs)
    scaling_soeff_factor = produce_excluded_scaling_factor(indexed_scale_coeffs, client_idx)
    
    num_clients = len(client_state_dict[1].keys())
    client_indices = np.random.choice(num_clients, int(np.floor(num_clients * client_subset_size)), replace=False)
    
    for client in client_indices:
        
        if client != client_idx:
            for layer_name in client_state_dict[rnd][client]:
                dl_dw[layer_name] += (indexed_scale_coeffs[client]/scaling_soeff_factor) * client_state_dict[rnd][client][layer_name]
                
    aggregate_states = aggregate_exclusion_model.state_dict()
    for param_type in dl_dw:
        aggregate_states[param_type] += dl_dw[param_type]
    aggregate_exclusion_model.load_state_dict(aggregate_states)
    
    client_eval = perform_FL_testing(testing_dataset, 
                                     aggregate_exclusion_model, 
                                     model_hyperparams)
    print('---------')
    print(f"Difference in performance of global model at round {rnd - 1} when client {client_idx} is excluded.")
    print((client_eval[0] - reference_eval[0], client_eval[1] - reference_eval[1]))
    
    return (client_eval[0] - reference_eval[0], client_eval[1] - reference_eval[1])
    
def contribution_calculation(model_hyperparams, global_states, client_state_dict, testing_dataset, del_method, scale_coeffs):
    """ 
    This function calculates the contribution of each client to model 
    training using the deletion and alignment methods. 
        
    Args:
        model_hyperparams (dict): Hyperparams used in FL model training. 
        global_states (dict): Dictionary of global model states. 
        client_state_dict (dict): Dictionary of client model states at each timestep. 
        testing_dataset (th.tensor): The testing dataset.
        del_method (string): Choice of singular or aggregate deletion method. 
        scale_coeffs (OrderedDict): Scaling factors for each client based
                                    on number of datapoints, indexed by
                                    FL worker ID. 
    Returns:
        client_alignment_matrix (np.array): Client alignments at each timestep. 
        client_deletion_matrix (np.array): Differences in client performance at each timestep. 
    """
    total_num_rounds = model_hyperparams['rounds']
    
    final_global_state = global_states[total_num_rounds]
    client_alignment_matrix = np.zeros([len(client_state_dict[1].keys()), total_num_rounds], dtype=np.float64)
    client_deletion_matrix = np.zeros([len(client_state_dict[1].keys()), total_num_rounds], dtype=np.float64)
    
    del_dict = {'Singular' : singular,
               'Aggregate' : aggregate}
    
    for rnd in range(1, total_num_rounds + 1):
        current_global_state = global_states[rnd - 1]
        
        GRV = calculate_GRV(final_global_state, current_global_state)
        
        reference_eval = perform_FL_testing(testing_dataset, current_global_state, model_hyperparams)
        
        print('----------')
        print(f'Performance of global model at timestep {rnd-1}')
        print(reference_eval)
        
        for client_idx in client_state_dict[rnd].keys():
            
#             ============
#             Calculate alignment of dl/dw with GRV
#             print(client_state_dict[rnd][client_idx])
            dl_dw = calculate_dl_dw(client_state_dict, current_global_state, rnd, client_idx)
            alignment = calculate_alignment(dl_dw, GRV)
            print('---------')
            print(f'Alignment of client {client_idx} with GRV at round {rnd}')
            print(alignment.numpy())
            client_alignment_matrix[client_idx][rnd - 1] = alignment.numpy()
            
            #============
            # Calculate change in model performance metrics when client contribution is 
            # selectively deleted.
            client_eval = del_dict[del_method](global_states, 
                                               client_state_dict, 
                                               testing_dataset, 
                                               reference_eval, 
                                               rnd, 
                                               client_idx,
                                               scale_coeffs, 
                                               model_hyperparams)
            client_deletion_matrix[client_idx][rnd - 1] = (client_eval[0] + client_eval[1])

    print('===============')
    print('Client alignment matrix')
    print(client_alignment_matrix)
    print('===============')
    print('Client deletion matrix')
    print(client_deletion_matrix)
    
    return client_alignment_matrix, client_deletion_matrix
            
        
        

In [64]:
training_datasets[0][0].shape

torch.Size([318, 5])

In [65]:
training_datasets[1][0].shape

torch.Size([478, 5])

In [66]:
multiclass_model_hyperparams['is_condensed']

False

In [67]:
client_state_dict = fill_client_state_dict(client_states)

In [68]:
client_alignment_matrix, client_deletion_matrix = contribution_calculation(model_hyperparams, global_states, client_state_dict, testing_dataset, 'Singular', scale_coeffs)

----------
Performance of global model at timestep 0
(0.63, 0.019305019305019298)
---------
Alignment of client 0 with GRV at round 1
3.81867
---------
Improvement of 0's update in isolation when applied to global model at round 0
(0.16000000000000003, 0.9776919776919777)
---------
Alignment of client 1 with GRV at round 1
4.3231726
---------
Improvement of 1's update in isolation when applied to global model at round 0
(0.31999999999999995, 0.9772629772629773)
----------
Performance of global model at timestep 1
(0.97, 0.996996996996997)
---------
Alignment of client 0 with GRV at round 2
5.0260267
---------
Improvement of 0's update in isolation when applied to global model at round 1
(-0.15000000000000002, 0.00042900042900040347)
---------
Alignment of client 1 with GRV at round 2
5.2369576
---------
Improvement of 1's update in isolation when applied to global model at round 1
(-0.020000000000000018, 0.00042900042900040347)
----------
Performance of global model at timestep 2
(0.96

# CC Metric Aggregation

In [44]:
scale_coeffs

{<VirtualWorker id:worker1 #objects:2>: 0.25405786873676783,
 <VirtualWorker id:worker2 #objects:2>: 0.23782639378969656,
 <VirtualWorker id:worker3 #objects:2>: 0.25405786873676783,
 <VirtualWorker id:worker4 #objects:2>: 0.25405786873676783}

In [123]:
( np.sum(client_deletion_matrix[0])) ** 3

0.24981869263317313

In [124]:
(np.sum(client_deletion_matrix[1])) ** 3

0.2075893178065091

In [60]:
np.mean(client_deletion_matrix)

0.036435247342763544

In [61]:
np.mean(client_deletion_matrix[0])

0.02973279789019421

In [62]:
np.mean(client_deletion_matrix[1])

0.04313769679533288

In [92]:
np.min(client_alignment_matrix)

0.2257797122001648

In [66]:
((np.sum(client_deletion_matrix[1])) ** 3) 

-0.25909007418311314

In [65]:
((np.sum(client_deletion_matrix[0])) ** 3)

-13.450413662422447

In [69]:
np.sum(((client_alignment_matrix - np.min(client_alignment_matrix)) / np.max(client_alignment_matrix))[0]) / len(client_alignment_matrix[0]) - ((np.sum(client_deletion_matrix[0])) ** 3)

3.1700837621578986

In [70]:
np.sum(((client_alignment_matrix - np.min(client_alignment_matrix)) / np.max(client_alignment_matrix))[1]) / len(client_alignment_matrix[0]) - ((np.sum(client_deletion_matrix[1])) ** 3) 

-1.514166713553851

In [71]:
def normalize_contribution_matrix(mat):
    return (mat - np.mean(mat)) / np.std(mat)

def aggregate_contribution_matrices(arguments, align_mat, del_mat):
#     align_mat = normalize_contribution_matrix(align_mat)
#     del_mat = normalize_contribution_matrix(del_mat)
    
    contributions = defaultdict()
    
    for i in range(align_mat.shape[0]):
        contributions[i] = np.sum(align_mat[i]) + np.sum(del_mat[i]) / arguments['rounds']
#         print(np.sum(align_mat[i]) + np.sum(del_mat[i]) / arguments['rounds'])
        print(np.sum(align_mat[i]) + np.sum(del_mat[i]) / arguments['rounds'])
    return contributions

In [72]:
normalize_contribution_matrix(client_deletion_matrix)

array([[ 3.94851156, -0.50448041, -0.60974259, -0.57514987, -0.4021863 ,
        -0.43677902, -0.02018242, -0.47137173, -0.43381096, -0.43677902,
        -0.29692413, -0.43677902, -0.47137173, -0.43529499, -0.4698877 ,
        -0.4698877 , -0.40070227, -0.67744398, -0.4698877 , -0.50448041],
       [ 4.50051095, -0.05477514, -0.02166645, -0.02166645, -0.05625916,
         0.04751898,  0.04751898, -0.02166645,  0.01292626,  0.01292626,
         0.01292626,  0.04751898,  0.01292626,  0.01292626,  0.01292626,
         0.01292626,  0.01292626,  0.01292626, -0.02166645,  0.01292626]])

In [73]:
normalize_contribution_matrix(client_alignment_matrix)

array([[-1.60798579, -1.51056888, -1.29833536, -1.10273667, -0.92147225,
        -0.75369194, -0.59704173, -0.42574195, -0.25407591, -0.09501922,
         0.08051425,  0.25752897,  0.44289997,  0.62389322,  0.79624242,
         0.99563139,  1.19162417,  1.36176603,  1.5787506 ,  1.77656398],
       [-1.56727945, -1.49354969, -1.29004869, -1.10096063, -0.92882358,
        -0.7706648 , -0.61713661, -0.45628257, -0.29829407, -0.14652993,
         0.02163005,  0.19003026,  0.36458094,  0.53288497,  0.70787903,
         0.89217229,  1.07801467,  1.25217753,  1.4529211 ,  1.63853387]])

In [74]:
contributions = aggregate_contribution_matrices(model_hyperparams, client_alignment_matrix, client_deletion_matrix)

481.55869753541214
468.33683680835725


In [89]:
training_datasets[0][0].shape

torch.Size([54, 20])

In [90]:
training_datasets[1][0].shape

torch.Size([2045, 20])

In [255]:
contributions

defaultdict(None,
            {0: 184.6675689593634,
             1: 205.90674948397876,
             2: 201.27492577070555,
             3: 216.01619803535587,
             4: 213.85393442505773})

In [77]:
training_datasets[1][0].shape

torch.Size([479, 20])

In [None]:
training_datasets[1]

In [69]:
global_states

{0: LogisticRegression(
   (linear): Linear(in_features=20, out_features=1, bias=True)
 ),
 1: LogisticRegression(
   (linear): Linear(in_features=20, out_features=1, bias=True)
 ),
 2: LogisticRegression(
   (linear): Linear(in_features=20, out_features=1, bias=True)
 ),
 3: LogisticRegression(
   (linear): Linear(in_features=20, out_features=1, bias=True)
 )}

In [90]:
client_states

{1: {'linear.weight': [tensor([[ 0.0675,  0.0123, -0.0878, -0.0320, -0.0404,  0.0702, -0.2106, -0.0237,
             0.2144, -0.0160,  0.1440, -0.0417,  0.1804,  0.1131,  0.0626,  0.1555,
             0.1011,  0.0177, -0.0217,  0.2229]]),
   tensor([[ 0.0710,  0.0338, -0.0854, -0.0220, -0.0225,  0.0676, -0.1689, -0.0601,
             0.2198, -0.0261,  0.1218, -0.0033,  0.1829,  0.0892,  0.0941,  0.1658,
             0.0940,  0.0277,  0.0075,  0.2114]])],
  'linear.bias': [tensor([-0.1748]), tensor([-0.1602])]},
 2: {'linear.weight': [tensor([[ 0.0673,  0.0135, -0.0880, -0.0298, -0.0405,  0.0727, -0.2098, -0.0222,
             0.2144, -0.0149,  0.1440, -0.0409,  0.1790,  0.1131,  0.0621,  0.1557,
             0.1002,  0.0174, -0.0221,  0.2253]]),
   tensor([[ 0.0726,  0.0356, -0.0852, -0.0209, -0.0219,  0.0653, -0.1697, -0.0592,
             0.2204, -0.0238,  0.1242, -0.0053,  0.1822,  0.0901,  0.0939,  0.1654,
             0.0924,  0.0275,  0.0074,  0.2126]])],
  'linear.bias': [tensor

In [63]:
global_states[0].state_dict()['linear.weight']

tensor([[ 0.1710,  0.1856, -0.0524,  0.2054, -0.0490,  0.0451, -0.1089,  0.1313,
          0.1971, -0.1640,  0.1944,  0.0419,  0.1652,  0.0303,  0.1078, -0.0316,
          0.1724,  0.0331, -0.1044,  0.0570]])

In [64]:
global_states[3].state_dict()['linear.weight']

tensor([[ 0.1389,  0.0466, -0.1736, -0.0526, -0.0635,  0.1349, -0.3821, -0.0818,
          0.4338, -0.0406,  0.2675, -0.0452,  0.3614,  0.2028,  0.1563,  0.3203,
          0.1941,  0.0454, -0.0149,  0.4339]])

In [167]:
client_states

{1: {'linear.weight': [tensor([[ 0.2639, -0.1727,  0.0589,  0.0170, -0.1685, -0.0229,  0.0598,  0.4092,
            -0.0129,  0.0305,  0.0777,  0.0037,  0.0768, -0.0218,  0.0458, -0.0179,
             0.2290, -0.1439,  0.0872, -0.0690]]),
   tensor([[ 0.2665, -0.1948, -0.0414, -0.0178, -0.1609,  0.0041,  0.0192,  0.3789,
             0.0055,  0.0171,  0.0241,  0.0216,  0.0726, -0.0494,  0.0140, -0.0183,
             0.2087, -0.1459, -0.0615, -0.0935]])],
  'linear.bias': [tensor([-0.0484]), tensor([0.0329])]},
 2: {'linear.weight': [tensor([[ 0.2614, -0.1653,  0.0704,  0.0209, -0.1635, -0.0239,  0.0569,  0.4063,
            -0.0140,  0.0340,  0.0716, -0.0045,  0.0659, -0.0201,  0.0451, -0.0138,
             0.2262, -0.1293,  0.0772, -0.0756]]),
   tensor([[ 0.2630, -0.1939, -0.0406, -0.0179, -0.1616,  0.0090,  0.0101,  0.3768,
             0.0100, -0.0005,  0.0196,  0.0136,  0.0787, -0.0457,  0.0158, -0.0216,
             0.2061, -0.1513, -0.0692, -0.0842]])],
  'linear.bias': [tensor(

In [60]:
for worker, dict in client_states[1].items():
    print(worker)
    print(dict.state_dict()['linear.weight'])

<VirtualWorker id:worker1 #objects:8>
(Wrapper)>[PointerTensor | me:76483481482 -> crypto_provider:71511273569]::data
<VirtualWorker id:worker2 #objects:8>
(Wrapper)>[PointerTensor | me:54640959924 -> crypto_provider:5913270987]::data


# Testing Function

In [8]:
def perform_FL_testing(arguments, dataset, model): 
    """ Obtains predictions given a validation/test dataset upon 
        a specified trained global model.
        
    Args:
        arguments (Arguments): Parameters defining current experiment
        dataset (tuple(th.Tensor)): A validation/test dataset
        model   (nn.Module): Trained global model
    Returns:
        accuracy score (float)
        roc_auc score  (float)
    """
    # Archive model's location (Default: TTP)
    model_origin = model.location

    print("Before getting, model parameters:\n", list(model.parameters()))
    print("Before getting, model location:\n", [p.location for p in list(model.parameters())])
    print("Before getting, objects at TTP:\n", list(crypto_provider._objects.keys()))
    
    # Trace the location of the dataset to be evaluated
    curr_worker = dataset.location
    print("Current worker:", curr_worker)
    
    # Retrieve model from TTP
    model = model.get()
    
    print("Before sending, model parameters:\n", list(model.parameters()))
    print("Before sending, model location:\n", [p.location for p in list(model.parameters())])
    print("Before sending, objects at TTP:\n", list(crypto_provider._objects.keys()))
    print("Before sending, worker objects:\n", curr_worker._objects.keys())
    print("Before sending, are model parameters in worker?", 
          all([p.location in curr_worker._objects.keys()
           for p in list(model.parameters())]))
    
    model = model.send(curr_worker)
    
    print("-"*100)
    print("After sending, model parameters:\n", list(model.parameters()))
    print("After sending, model location:\n", [p.location for p in list(model.parameters())])
    print("After sending, objects at TTP:\n", list(crypto_provider._objects.keys()))
    print("After sending, worker objects:\n", curr_worker._objects.keys())
    print("After sending, are model parameters in worker?", 
          all([p.location in curr_worker._objects.keys()
           for p in list(model.parameters())]))
    
    X_test = dataset.data.float()
    y_test = dataset.targets.float()
    print(X_test, y_test)
    
    model.eval()
    with th.no_grad():
        
        if arguments.is_condensed:
            predictions = (model(X_test) > 0.5).float()
            
        else:
            # Find best predicted class label representative of sample
            _, predicted_labels = model(X_test).max(axis=1)
            
            # One-hot encode predicted labels
            predictions = th.FloatTensor(y_test.shape)
            predictions.zero_()
            predictions.scatter_(1, predicted_labels.view(-1,1), 1)


        # Retrieve predictions
        predictions = predictions.get()
        
        # Retrieve truth labels
        y_test = y_test.get()
        
        # Calculate accuracy of predictions
        accuracy = accuracy_score(y_test.numpy(), predictions.numpy())
        
        # Calculate ROC-AUC for each label
        roc = roc_auc_score(y_test.numpy(), predictions.numpy())

        #############
        # Quick Fix #
        #############
        # y_test is located at some remote worker. Accuracy & ROC-AUC computation via
        # numpy raises errors because Numpy is currently not supported. Manual retrieval
        # and sending first, manual implementation to be done later.
        y_test = y_test.send(curr_worker)
        
        # Retrieve & send model back to whence it came!
        model.get()
        model = model.send(model_origin)
 
    return accuracy, roc


test_model = copy.deepcopy(binary_sgd_model)

print("="*100)
for worker, dataset in validation_pointers.items():

    print("Validation scores:", perform_FL_testing(best_binary_sgd_args, dataset, test_model))

Before getting, model parameters:
 [Parameter containing:
Parameter>[PointerTensor | me:36712146433 -> crypto_provider:38161928650], Parameter containing:
Parameter>[PointerTensor | me:53766344238 -> crypto_provider:52911434509]]
Before getting, model location:
 [<VirtualWorker id:crypto_provider #objects:84>, <VirtualWorker id:crypto_provider #objects:84>]
Before getting, objects at TTP:
 [21932075915, 9090941217, 79944881906, 89279046959, 89850986629, 57346283611, 73689102421, 82730040201, 79385862149, 73258156428, 44969379986, 4382029053, 36007568665, 46463523158, 32503582200, 27214527800, 44473247553, 39417226305, 18137575301, 33877527535, 20308527827, 20833382737, 47173841097, 67935573675, 31014787181, 57698471772, 31036289465, 53025945477, 74383302444, 79078125948, 71985433180, 29270494094, 94407237437, 43010196674, 15604950872, 73754126399, 34795441738, 30330599691, 37350792097, 48319690060, 91752207350, 59816065840, 7312995878, 25309879949, 69841944925, 26533179963, 53047552083

In [9]:
print("Accuracy scores:", perform_FL_testing(best_binary_sgd_args, testing_pointer, test_model))

Before getting, model parameters:
 [Parameter containing:
(Wrapper)>[PointerTensor | me:32719714031 -> crypto_provider:53438888134], Parameter containing:
(Wrapper)>[PointerTensor | me:70939440670 -> crypto_provider:81508625698]]
Before getting, model location:
 [<VirtualWorker id:crypto_provider #objects:84>, <VirtualWorker id:crypto_provider #objects:84>]
Before getting, objects at TTP:
 [21932075915, 9090941217, 79944881906, 89279046959, 89850986629, 57346283611, 73689102421, 82730040201, 79385862149, 73258156428, 44969379986, 4382029053, 36007568665, 46463523158, 32503582200, 27214527800, 44473247553, 39417226305, 18137575301, 33877527535, 20308527827, 20833382737, 47173841097, 67935573675, 31014787181, 57698471772, 31036289465, 53025945477, 74383302444, 79078125948, 71985433180, 29270494094, 94407237437, 43010196674, 15604950872, 73754126399, 34795441738, 30330599691, 37350792097, 48319690060, 91752207350, 59816065840, 7312995878, 25309879949, 69841944925, 26533179963, 53047552083

---

In [13]:
# Check that model parameters are hidden
list(binary_sgd_model.parameters())

[Parameter containing:
 (Wrapper)>[PointerTensor | me:36712146433 -> crypto_provider:38161928650],
 Parameter containing:
 (Wrapper)>[PointerTensor | me:53766344238 -> crypto_provider:52911434509]]

In [14]:
# Check that training datasets are still hidden from Me (i.e. client)
training_datasets

{0: ((Wrapper)>[PointerTensor | me:98952335063 -> worker1:63999975985],
  (Wrapper)>[PointerTensor | me:68461425098 -> worker1:84285192018]),
 1: ((Wrapper)>[PointerTensor | me:87636762647 -> worker2:31467788079],
  (Wrapper)>[PointerTensor | me:39563190106 -> worker2:91454758974])}

In [16]:
# Check that validation datasets are still hidden from Me (i.e. client)
validation_datasets

{0: ((Wrapper)>[PointerTensor | me:22904807196 -> worker1:3210024390],
  (Wrapper)>[PointerTensor | me:26774087597 -> worker1:21098284358]),
 1: ((Wrapper)>[PointerTensor | me:26488016649 -> worker2:24662750769],
  (Wrapper)>[PointerTensor | me:26274160852 -> worker2:58458599022])}

In [18]:
# Check that test_dataset is hidden from me (i.e. client)
testing_dataset

((Wrapper)>[PointerTensor | me:39940050514 -> crypto_provider:21932075915],
 (Wrapper)>[PointerTensor | me:50952629367 -> crypto_provider:9090941217])

<font color='red'>Model privacy is preserved!</font>

---

<font color='red'>**Conclusion**
    
The manual computation & aggregation of model weights per round seems to give rise to an interesting phenomenom; IID validation performance seem to perform poorly, but the generated model appears to be better able to generalise to testing data, as seen from the 4% increase in accuracy (i.e. ~76% to ~80%) and roc-auc (i.e. ~73% to 78%).</font>

In [None]:
# Things to do
# 1) test data from client must be SMPC-ed
# 2) Model weights & biases must be SMPC-ed (?)

In [None]:
##############
# Deprecated #
##############

"""
from pprint import pprint
for data, labels in sy.FederatedDataLoader(sy.FederatedDataset(training_pointers.values()), 
                                           batch_size=100,
                                           iter_per_worker=False):
    pprint(data)
    
    
from pprint import pprint
for a in sy.FederatedDataLoader(sy.FederatedDataset(training_pointers.values()), 
                                           batch_size=100,
                                           iter_per_worker=True):
    pprint(a)

len(sy.FederatedDataLoader(sy.FederatedDataset(training_pointers.values()), 
                                           batch_size=100,
                                           iter_per_worker=True))
"""