# Fedbiomed Researcher POC with Local DP-FL

Implementing the scheme of https://arxiv.org/pdf/1710.06963.pdf

In [24]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Start the network
Before running this notebook, start the network with `./scripts/fedbiomed_run network`

## Setting the nodes up
It is necessary to previously configure a node:
1. `./scripts/fedbiomed_run node add`
  * Select option 2 (default) to add MNIST to the node
  * Confirm default tags by hitting "y" and ENTER
  * Pick the folder where MNIST is downloaded (this is due torch issue https://github.com/pytorch/vision/issues/3549)
  * Data must have been added (if you get a warning saying that data must be unique is because it's been already added)
  
2. Check that your data has been added by executing `./scripts/fedbiomed_run node list`
3. Run the node using `./scripts/fedbiomed_run node start`. Wait until you get `Starting task manager`. it means you are online.
4. Following the same procedure, you can create a second node for client 2.

Check available clients:

In [25]:
from fedbiomed.researcher.requests import Requests
req = Requests()
req.list(verbose=True)

2022-03-24 13:13:30,282 fedbiomed INFO - Listing available datasets in all nodes... 


{}

## Define an experiment model and parameters

Declare a torch.nn MyTrainingPlan class to send for training on the node

Note : write **only** the code to export in the following cell

In [3]:
import torch
import torch.nn as nn
from fedbiomed.common.training_plans import TorchTrainingPlan
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from math import sqrt, log

# Here we define the model to be used. 
# You can use any class name (here 'Net')
class MyTrainingPlan(TorchTrainingPlan):
    def __init__(self, model_args: dict = {}):
        super(MyTrainingPlan, self).__init__(model_args)
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout(0.25)
        self.dropout2 = nn.Dropout(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)
        
        # DP parameters
        self.C = model_args['DP']['clip_threshold']
#        self.m = model_args['DP']['m']
#        self.epsilon = model_args['DP']['epsilon']
        #self.delta = model_args['DP']['delta']
        self.sigma = model_args['DP']['sigma']
#        self.rounds = model_args['DP']['rounds']
        if 'clip_strategy' in model_args['DP']:
            self.clipping=model_args['DP']['clip_strategy']
        else:
            self.clipping='flat'
        
        # Here we define the custom dependencies that will be needed by our custom Dataloader
        # In this case, we need the torch DataLoader classes
        # Since we will train on MNIST, we need datasets and transform from torchvision
        deps = ["from torchvision import datasets, transforms",
               "from torch.utils.data import DataLoader",
               "import torch",
               "from math import sqrt, log"]
        self.add_dependency(deps)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)      
        
        output = F.log_softmax(x, dim=1)
        return output

    def training_data(self, batch_size = 48):
        # Custom torch Dataloader for MNIST data
        transform = transforms.Compose([transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))])
        dataset1 = datasets.MNIST(self.dataset_path, train=True, download=False, transform=transform)
        train_kwargs = {'batch_size': batch_size, 'shuffle': True}
        data_loader = torch.utils.data.DataLoader(dataset1, **train_kwargs)
        return data_loader
    
    def training_step(self, data, target):
        output = self.forward(data)
        loss   = torch.nn.functional.nll_loss(output, target)
        return loss
    
    def postprocess(self, params):
        delta_params = {}

        for name, param in params.items():

            ###
            ### Extracting the update
            ###
            delta_theta = params[name] - self.init_params[name]
            delta_params[name]=delta_theta
        
#         self.sigma = self.compute_sigma()
        perturbed_params = self.clip_and_perturb(delta_params)

        params.update(perturbed_params)

        #DP parameters to be passed to the central server ?
        #params.update({'DP': {'clip_threshold' : self.clipping, 'epsilon' : self.epsilon, 'delta': self.delta}})

        return params
    
    def clip_and_perturb(self,delta_params):
        """
        Performs gradient clipping and adds Gaussian noise.
        """
        
        perturbed_params = {}
        
        try: 
            self.clipping in ('flat','per_layer')
        except ValueError:
            msg = ErrorNumbers.FB605.value + ": unknown clipping strategy"
            logger.critical(msg)
            raise FedbiomedTrainingPlanError(msg)
        
        if self.clipping=='flat':

            per_param_norms = [params.view(len(params), -1).norm(2,dim=-1) for key, params in delta_params.items()]
            per_sample_norms = torch.cat(per_param_norms,dim=0).norm(2)#torch.stack(per_param_norms, dim=1).norm(2,dim=1)
            per_sample_clip_factor = (self.C / (per_sample_norms + 1e-6)).clamp(max=1.0)
            print('#########################################', per_sample_norms)
            print('*****************************************', per_sample_clip_factor)
            
            for key in delta_params.keys():
                delta_theta_tilde = delta_params[key].mul(per_sample_clip_factor) \
                            + torch.sqrt(torch.tensor([2]))*self.sigma*self.C * torch.randn_like(delta_params[key])
                perturbed_params[key]=self.init_params[key] + delta_theta_tilde
        
        elif self.clipping=='per_layer':
            
            # NB: if the 'per_layer' strategy is chosen the researcher should provide as C a list 
            # containing all layer-specific clipping parameters.
            
            assert len(self.C) == len(delta_params)
            
            for (key, c) in zip(delta_params.keys(), self.C):
                per_sample_norms = delta_params[key].view(len(delta_params[key]), -1).norm(2)
                per_sample_clip_factor = (c / (per_sample_norms + 1e-6)).clamp(max=1.0)
                print(per_sample_clip_factor)
                delta_theta_tilde = delta_params[key].mul(per_sample_clip_factor) \
                            + torch.sqrt(torch.tensor([2]))*self.sigma*c * torch.randn_like(delta_params[key])
                perturbed_params[key]=self.init_params[key] + delta_theta_tilde
                
        return perturbed_params
    
#     def compute_sigma(self):
        
#         size_local_data = len(self.data.dataset)
        
#         if self.delta is None:
#             self.delta = 1.0/size_local_data*1e-1
            
#         local_sensititvity = 2*self.C/size_local_data
#         c = sqrt(2*log(1.25/self.delta))+1e-6
#         sigma = c*self.rounds*local_sensititvity/self.epsilon
#         return sigma

This group of arguments correspond respectively:
* `model_args`: a dictionary with the arguments related to the model (e.g. number of layers, features, etc.). This will be passed to the model class on the node side. 
* `training_args`: a dictionary containing the arguments for the training routine (e.g. batch size, learning rate, epochs, etc.). This will be passed to the routine on the node side.
If FedProx optimisation is requested, `fedprox_mu` parameter must be defined here. It also must be a float between XX and YY.

In [48]:
from fedbiomed.researcher.privacy.rdp_accountant import compute_rdp, get_privacy_spent
import matplotlib.pyplot as plt
import numpy as np

#req = Requests()
#data_info = req.list()
#max_data_size = float(np.max([data_info[i][0]['shape'][0] for i in data_info.keys()]))

max_data_size = 1000

target_delta = .1/max_data_size
max_eps = 50
sigma = 1
clip_threshold = 15e-0


orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
rdp = compute_rdp(q=0.1,
                  noise_multiplier = sigma,
                  steps=100,
                  orders=orders)

In [49]:
rounds = range(1,10)
epsilon_range = np.array([get_privacy_spent(orders, i*rdp, target_delta=target_delta)[0] for i in rounds])

FL_rounds = int(np.sum(epsilon_range<max_eps))

In [50]:
print(epsilon_range)

[ 6.82162896  9.6700392  12.04277341 14.17266347 16.14141108 17.98574106
 19.74985028 21.4535366  23.08023498]


In [51]:
model_args = {'DP': {'clip_threshold' : clip_threshold, 'sigma': sigma}}

training_args = {
    'batch_size': 48, 
    'lr': 1e-3, 
    'fedprox_mu': 0.0, 
    'epochs': 1, 
    'dry_run': False,  
    'batch_maxnum': 1000 # Fast pass for development : only use ( batch_maxnum * batch_size ) samples
}

2022-03-25 07:30:01,895 fedbiomed ERROR - Messaging researcher_cc7c7db6-7b8b-4eef-b951-6ff12b238da3 disconnected with error code rc = 7 - Hint: check for another instance of the same component running or for communication error


## Declare and run the experiment

- search nodes serving data for these `tags`, optionally filter on a list of node ID with `nodes`
- run a round of local training on nodes with model defined in `model_path` + federation with `aggregator`
- run for `round_limit` rounds, applying the `node_selection_strategy` between the rounds

In [11]:
from fedbiomed.researcher.experiment import Experiment
from fedbiomed.researcher.aggregators.fedavg import FedAverage

tags =  ['#MNIST', '#dataset']

exp = Experiment(tags=tags,
                 model_args=model_args,
                 model_class=MyTrainingPlan,
                 training_args=training_args,
                 round_limit=FL_rounds,
                 aggregator=FedAverage(),
                 node_selection_strategy=None)

2022-03-15 09:41:18,643 fedbiomed INFO - Searching dataset with data tags: ['#MNIST', '#dataset'] for all nodes
2022-03-15 09:41:18,648 fedbiomed INFO - log from: node_edb16ff9-3a16-48a0-9f25-ffc473e623ed / DEBUG - Message received: {'researcher_id': 'researcher_cc7c7db6-7b8b-4eef-b951-6ff12b238da3', 'tags': ['#MNIST', '#dataset'], 'command': 'search'}
2022-03-15 09:41:18,650 fedbiomed INFO - log from: node_30f10bb5-4751-4306-8ee2-e0586d6e926b / DEBUG - Message received: {'researcher_id': 'researcher_cc7c7db6-7b8b-4eef-b951-6ff12b238da3', 'tags': ['#MNIST', '#dataset'], 'command': 'search'}
2022-03-15 09:41:28,654 fedbiomed INFO - Node selected for training -> node_edb16ff9-3a16-48a0-9f25-ffc473e623ed
2022-03-15 09:41:28,655 fedbiomed INFO - Node selected for training -> node_30f10bb5-4751-4306-8ee2-e0586d6e926b
2022-03-15 09:41:28,657 fedbiomed INFO - Checking data quality of federated datasets...
2022-03-15 09:41:28,670 fedbiomed DEBUG - Model file has been saved: /Users/mlorenzi/wor

Let's start the experiment.

By default, this function doesn't stop until all the `round_limit` rounds are done for all the nodes

In [12]:
exp.run()

2022-03-15 09:41:29,275 fedbiomed INFO - Sampled nodes in round 0 ['node_edb16ff9-3a16-48a0-9f25-ffc473e623ed', 'node_30f10bb5-4751-4306-8ee2-e0586d6e926b']
2022-03-15 09:41:29,276 fedbiomed INFO - Send message to node node_edb16ff9-3a16-48a0-9f25-ffc473e623ed - {'researcher_id': 'researcher_cc7c7db6-7b8b-4eef-b951-6ff12b238da3', 'job_id': '33529e7b-1b42-457a-93ab-4270ee15a9d1', 'training_args': {'batch_size': 48, 'lr': 0.001, 'fedprox_mu': 0.0, 'epochs': 1, 'dry_run': False, 'batch_maxnum': 1000}, 'model_args': {'DP': {'clip_threshold': 15.0, 'sigma': 5.0}}, 'command': 'train', 'model_url': 'http://localhost:8844/media/uploads/2022/03/15/my_model_84f25685-7918-4865-aeab-dcb625c976d3.py', 'params_url': 'http://localhost:8844/media/uploads/2022/03/15/aggregated_params_init_bd5be25a-a029-49f8-97ed-8e9572ec9867.pt', 'model_class': 'MyTrainingPlan', 'training_data': {'node_edb16ff9-3a16-48a0-9f25-ffc473e623ed': ['dataset_2e82719a-83a4-4de4-972f-7637c45b6d7d']}}
2022-03-15 09:41:29,277 fedb







2022-03-15 09:43:13,283 fedbiomed INFO - log from: node_edb16ff9-3a16-48a0-9f25-ffc473e623ed / DEBUG - Reached 1000 batches for this epoch, ignore remaining data
2022-03-15 09:43:13,284 fedbiomed INFO - log from: node_edb16ff9-3a16-48a0-9f25-ffc473e623ed / DEBUG - running model.postprocess() method
2022-03-15 09:43:13,581 fedbiomed INFO - log from: node_30f10bb5-4751-4306-8ee2-e0586d6e926b / DEBUG - Reached 1000 batches for this epoch, ignore remaining data
2022-03-15 09:43:13,582 fedbiomed INFO - log from: node_30f10bb5-4751-4306-8ee2-e0586d6e926b / DEBUG - running model.postprocess() method
2022-03-15 09:43:13,906 fedbiomed INFO - log from: node_edb16ff9-3a16-48a0-9f25-ffc473e623ed / DEBUG - upload (HTTP POST request) of file /Users/mlorenzi/works/temp/fedbiomed/var/tmp/node_params_0e5541b1-8696-48df-90f7-4312a71dab66.pt successful, with status code 201
2022-03-15 09:43:13,908 fedbiomed INFO - log from: node_edb16ff9-3a16-48a0-9f25-ffc473e623ed / INFO - results uploaded successfully 

2022-03-15 09:43:20,140 fedbiomed DEBUG - researcher_cc7c7db6-7b8b-4eef-b951-6ff12b238da3
2022-03-15 09:43:20,141 fedbiomed INFO - Send message to node node_30f10bb5-4751-4306-8ee2-e0586d6e926b - {'researcher_id': 'researcher_cc7c7db6-7b8b-4eef-b951-6ff12b238da3', 'job_id': '33529e7b-1b42-457a-93ab-4270ee15a9d1', 'training_args': {'batch_size': 48, 'lr': 0.001, 'fedprox_mu': 0.0, 'epochs': 1, 'dry_run': False, 'batch_maxnum': 1000}, 'model_args': {'DP': {'clip_threshold': 15.0, 'sigma': 5.0}}, 'command': 'train', 'model_url': 'http://localhost:8844/media/uploads/2022/03/15/my_model_84f25685-7918-4865-aeab-dcb625c976d3.py', 'params_url': 'http://localhost:8844/media/uploads/2022/03/15/aggregated_params_682d4c31-272e-4290-910f-1cdca4012aad.pt', 'model_class': 'MyTrainingPlan', 'training_data': {'node_30f10bb5-4751-4306-8ee2-e0586d6e926b': ['dataset_aeeba6e7-ffb4-4c01-8da5-1d549cedbf01']}}
2022-03-15 09:43:20,142 fedbiomed DEBUG - researcher_cc7c7db6-7b8b-4eef-b951-6ff12b238da3
2022-03-15



2022-03-15 09:43:52,410 fedbiomed CRITICAL - Fed-BioMed researcher stopped due to keyboard interrupt



--------------------
Fed-BioMed researcher stopped due to keyboard interrupt
--------------------


## Model trained with Central DP-FL 

In [None]:
import torch
import torch.nn as nn
from fedbiomed.common.training_plans import TorchTrainingPlan
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from math import sqrt, log

# Here we define the model to be used. 
# You can use any class name (here 'Net')
class MyTrainingPlan_noRDP(TorchTrainingPlan):
    def __init__(self, model_args: dict = {}):
        super(MyTrainingPlan_noRDP, self).__init__(model_args)
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout(0.25)
        self.dropout2 = nn.Dropout(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)
        
        
        # Here we define the custom dependencies that will be needed by our custom Dataloader
        # In this case, we need the torch DataLoader classes
        # Since we will train on MNIST, we need datasets and transform from torchvision
        deps = ["from torchvision import datasets, transforms",
               "from torch.utils.data import DataLoader",
               "import torch",
               "from math import sqrt, log"]
        self.add_dependency(deps)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)      
        
        output = F.log_softmax(x, dim=1)
        return output

    def training_data(self, batch_size = 48):
        # Custom torch Dataloader for MNIST data
        transform = transforms.Compose([transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))])
        dataset1 = datasets.MNIST(self.dataset_path, train=True, download=False, transform=transform)
        train_kwargs = {'batch_size': batch_size, 'shuffle': True}
        data_loader = torch.utils.data.DataLoader(dataset1, **train_kwargs)
        return data_loader
    
    def training_step(self, data, target):
        output = self.forward(data)
        loss   = torch.nn.functional.nll_loss(output, target)
        return loss

In [None]:
training_args = {
    'batch_size': 48, 
    'lr': 1e-3, 
    'fedprox_mu': 0.01, 
    'epochs': 1, 
    'dry_run': False,  
    'batch_maxnum': 100 # Fast pass for development : only use ( batch_maxnum * batch_size ) samples
}

In [None]:
from fedbiomed.researcher.experiment import Experiment
from fedbiomed.researcher.aggregators.fedavg import FedAverage

tags =  ['#MNIST', '#dataset']

central_DP_params = {'clip_threshold' : 1., 'sigma': .1}

exp = Experiment(tags=tags,
                 #nodes=None,
#                 model_args=model_args,
                 model_class=MyTrainingPlan_noRDP,
                 training_args=training_args,
                 round_limit=FL_rounds,
                 aggregator=FedAverage(Central_DP_params = central_DP_params),
                 node_selection_strategy=None)


In [None]:
exp.run()