# Data Preprocessing : Download Used Cars Dataset 
https://www.kaggle.com/adityadesai13/used-car-dataset-ford-and-mercedes

In [None]:
# change here with the directory where you downloaded the dataset
data_dir = './data'

In [None]:
import pandas as pd

# Dataset Details
The data consists of used cars listings. 100,000 listings, which have been separated into files corresponding to each car manufacturer. Each file will simulate data for each node.

# Goal

The goal of this tutorial is to build a federated regression model on Non-IID dataset and generate the best model by performing validation on hold out dataset and tuning hyperparameters.The metric used to decide best model is RMSE.

In [None]:
import os

# Use audi and bmw for training on 2 nodes
audi = pd.read_csv(os.path.join(data_dir, "audi.csv"))
bmw = pd.read_csv(os.path.join(data_dir, "bmw.csv"))

# Use Ford for final validation at central researcher (test dataset)
ford = pd.read_csv(os.path.join(data_dir, "ford.csv"))

# Use the following csvs if you want to run more than 2 nodes. Uncomment Corresponding lines in the following cell blocks
# cclass = pd.read_csv(os.path.join(data_dir, "cclass.csv"))
# focus = pd.read_csv(os.path.join(data_dir, "focus.csv"))
# hyundai = pd.read_csv(os.path.join("data_dir, "huyndai.csv"))
# merc = pd.read_csv(os.path.join(data_dir, "merc.csv"))
# skoda = pd.read_csv(os.path.join(data_dir, "skoda.csv"))
# toyota = pd.read_csv(os.path.join(data_dir, "toyota.csv"))
# vauxhall = pd.read_csv(os.path.join(data_dir, "vauxhall.csv"))
# vw = pd.read_csv(os.path.join(data_dir, "vw.csv"))

Drop columns model & fuelType as labels are not consistent across files. A better solution could be vertical federated learning

In [None]:
audi.drop(columns = ['model','fuelType'],inplace = True)
bmw.drop(columns = ['model','fuelType'],inplace = True)
ford.drop(columns = ['model','fuelType'],inplace = True)

# cclass.drop(columns = ['model','fuelType'],inplace = True)
# focus.drop(columns = ['model','fuelType'],inplace = True)
# hyundai.drop(columns = ['model','fuelType'],inplace = True)
# merc.drop(columns = ['model','fuelType'],inplace = True)
# skoda.drop(columns = ['model','fuelType'],inplace = True)
# toyata.drop(columns = ['model','fuelType'],inplace = True)
# vauxhall.drop(columns = ['model','fuelType'],inplace = True)
# vw.drop(columns = ['model','fuelType'],inplace = True)

Label encode transmission column

In [None]:
audi['transmission'] = audi['transmission'].map({'Automatic':0,'Manual':1,'Semi-Auto':2,'Other':3})
bmw['transmission'] = bmw['transmission'].map({'Automatic':0,'Manual':1,'Semi-Auto':2,'Other':3})
ford['transmission'] = ford['transmission'].map({'Automatic':0,'Manual':1,'Semi-Auto':2,'Other':3})

# cclass['transmission'] = cclass['transmission'].map({'Automatic':0,'Manual':1,'Semi-Auto':2,'Other':3})
# focus['transmission'] = focus['transmission'].map({'Automatic':0,'Manual':1,'Semi-Auto':2,'Other':3})
# hyundai['transmission'] = hyundai['transmission'].map({'Automatic':0,'Manual':1,'Semi-Auto':2,'Other':3})
# merc['transmission'] = merc['transmission'].map({'Automatic':0,'Manual':1,'Semi-Auto':2,'Other':3})
# skoda['transmission'] = skoda['transmission'].map({'Automatic':0,'Manual':1,'Semi-Auto':2,'Other':3})
# toyata['transmission'] = toyata['transmission'].map({'Automatic':0,'Manual':1,'Semi-Auto':2,'Other':3})
# vauxhall['transmission'] = vauxhall['transmission'].map({'Automatic':0,'Manual':1,'Semi-Auto':2,'Other':3})
# vw['transmission'] = vw['transmission'].map({'Automatic':0,'Manual':1,'Semi-Auto':2,'Other':3})

In [None]:
audi.to_csv(os.path.join(data_dir, 'audi_transformed.csv'),header = True,index= False)
bmw.to_csv(os.path.join(data_dir, 'bmw_transformed.csv'),header = True,index= False)
ford.to_csv(os.path.join(data_dir, 'ford_transformed.csv'),header = True,index= False)

# cclass.to_csv('cclass_transformed.csv',header = True,index= False)
# focus.to_csv('focus_transformed.csv',header = True,index= False)
# hyundai.to_csv('huydai_transformed.csv',header = True,index= False)
# merc.to_csv('merc_transformed.csv',header = True,index= False)
# skoda.to_csv('skoda_transformed.csv',header = True,index= False)
# toyata.to_csv('toyata_transformed.csv',header = True,index= False)
# vauxhall.to_csv('vaxhall_transformed.csv',header = True,index= False)
# vw.to_csv('vw_transformed.csv',header = True,index= False)

# Fedbiomed Researcher to train a model on a Used Cars dataset

Use for developing (autoreloads changes made across packages)

In [None]:
%load_ext autoreload
%autoreload 2

## Start the network
Before running this notebook, start the network with `./scripts/fedbiomed_run network`

## Setting the nodes up
It is necessary to previously configure 2 nodes:
1. `./scripts/fedbiomed_run node config config1.ini add` (node1) and `./scripts/fedbiomed_run node config config2.ini add` (node2)
  * Select option 1 to add a csv file to the node
    * use the `audi_transformed.csv` file (node 1) and `bmw_transformed.csv` file (node 2)
  * Choose the name, tags and description of the dataset
    * choose tag `UsedCars` (or modify the used tag in this notebook)
  * Spin as many nodes as you want(max nodes 11 for 11 csv files in used cars dataset). Hold out one file for testing.
  * Load the .csv file generated using above mentioned notebook to individual nodes
2. Check that your data has been added by executing `./scripts/fedbiomed_run node config config1.ini list` (node1) and `./scripts/fedbiomed_run node config config2.ini list` (node2)
3. Run the node using `./scripts/fedbiomed_run node config config1.ini start` (node1) and `./scripts/fedbiomed_run node config config2.ini start` (node2). Wait until you get `Starting task manager`. it means you are online.

## Create an experiment to train a model on the data found

Declare a torch.nn MyTrainingPlan class to send for training on the node

In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F

from torch.utils.data import Dataset
from fedbiomed.common.training_plans import TorchTrainingPlan
from fedbiomed.common.data import DataManager 


# Here we define the model to be used. 
# You can use any class name (here 'MyTrainingPlan')
class MyTrainingPlan(TorchTrainingPlan):
    def __init__(self, model_args: dict = {}):
        super(MyTrainingPlan, self).__init__(model_args)
        # should match the model arguments passed below to the experiment class
        self.in_features = model_args['in_features']
        self.out_features = model_args['out_features']
        self.fc1 = nn.Linear(self.in_features, 5)
        self.fc2 = nn.Linear(5, self.out_features)
        
        # Here we define the custom dependencies that will be needed by our custom Dataloader
        # In this case, we need the torch Dataset and DataLoader classes
        # We need pandas to read the local .csv file at the node side
        deps = ["from torch.utils.data import Dataset",
                "import pandas as pd"]
        self.add_dependency(deps)

    def forward(self, x):
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        return x

    def training_step(self, data, target):
        output = self.forward(data).float()
        criterion = torch.nn.MSELoss()
        loss   = torch.sqrt(criterion(output, target.unsqueeze(1)))
        return loss

    class csv_Dataset(Dataset):
    # Here we define a custom Dataset class inherited from the general torch Dataset class
    # This class takes as argument a .csv file path and creates a torch Dataset 
        def __init__(self, dataset_path, x_dim):
            self.input_file = pd.read_csv(dataset_path,sep=',',index_col=False)
            x_train = self.input_file.loc[:,('year','transmission','mileage','tax','mpg','engineSize')].values
            y_train = self.input_file.loc[:,'price'].values
            self.X_train = torch.from_numpy(x_train).float()
            self.Y_train = torch.from_numpy(y_train).float()

        def __len__(self):            
            return len(self.Y_train)

        def __getitem__(self, idx):

            return (self.X_train[idx], self.Y_train[idx])
        
    def training_data(self,  batch_size = 48):
    # The training_data creates the Dataloader to be used for training in the general class TorchTrainingPlan of fedbiomed
        dataset = self.csv_Dataset(self.dataset_path, self.in_features)
        train_kwargs = {'batch_size': batch_size, 'shuffle': True}
        return DataManager(dataset=dataset , **train_kwargs)
    
    

In [11]:
# model parameters 
model_args = {
    'in_features': 6, 
    'out_features': 1
}

# training parameters 
training_args = {
    'batch_size': 40, 
    'lr': 1e-3, 
    'epochs': 2, 
    'dry_run': False,  
    #'batch_maxnum': 100 # Fast pass for development : only use ( batch_maxnum * batch_size ) samples
}

Define an experiment
- search nodes serving data for these `tags`, optionally filter on a list of node ID with `nodes`
- run a round of local training on nodes with model defined in `model_path` + federation with `aggregator`
- run for `round_limit` rounds, applying the `node_selection_strategy` between the rounds

In [12]:
from fedbiomed.researcher.experiment import Experiment
from fedbiomed.researcher.aggregators.fedavg import FedAverage

# Calling the training data with specified tags. Change the following tag accordingly
tags =  ['UsedCars']
rounds = 3

exp = Experiment(tags=tags,
                 model_class=MyTrainingPlan,
                 model_args=model_args,
                 training_args=training_args,
                 round_limit=rounds,
                 aggregator=FedAverage(),
                 node_selection_strategy=None)

2022-03-08 15:26:07,170 fedbiomed INFO - Searching dataset with data tags: ['UsedCars'] for all nodes
2022-03-08 15:26:07,175 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - Message received: {'researcher_id': 'researcher_420cfc13-37cb-447c-af20-f7ac5cb2b6ab', 'tags': ['UsedCars'], 'command': 'search'}
2022-03-08 15:26:17,213 fedbiomed INFO - Node selected for training -> node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0
2022-03-08 15:26:17,214 fedbiomed INFO - Node selected for training -> node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0
2022-03-08 15:26:17,245 fedbiomed DEBUG - Model file has been saved: /home/scansiz/Desktop/Inria/development/fedbiomed/var/experiments/Experiment_0082/my_model_8181e794-f0bb-4402-8830-3b687aa03bf0.py
2022-03-08 15:26:17,286 fedbiomed DEBUG - upload (HTTP POST request) of file /home/scansiz/Desktop/Inria/development/fedbiomed/var/experiments/Experiment_0082/my_model_8181e794-f0bb-4402-8830-3b687aa03bf0.py successful, with status code 20

Let's start the experiment.

By default, this function doesn't stop until all the `round_limit` rounds are done for all the nodes

In [13]:
exp.run()

2022-03-08 15:26:20,079 fedbiomed INFO - Sampled nodes in round 0 ['node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0']
2022-03-08 15:26:20,080 fedbiomed INFO - Send message to node node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 - {'researcher_id': 'researcher_420cfc13-37cb-447c-af20-f7ac5cb2b6ab', 'job_id': '09302378-1890-4ee6-a256-76027c15053e', 'training_args': {'batch_size': 40, 'lr': 0.001, 'epochs': 2, 'dry_run': False}, 'model_args': {'in_features': 6, 'out_features': 1}, 'command': 'train', 'model_url': 'http://localhost:8844/media/uploads/2022/03/08/my_model_8181e794-f0bb-4402-8830-3b687aa03bf0.py', 'params_url': 'http://localhost:8844/media/uploads/2022/03/08/aggregated_params_init_42bbfbff-04a9-4cb3-9f55-70627b36eb09.pt', 'model_class': 'MyTrainingPlan', 'training_data': {'node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0': ['dataset_5ed4224a-ae70-4661-b0ef-588f589522a3']}}
2022-03-08 15:26:20,082 fedbiomed DEBUG - researcher_420cfc13-37cb-447c-af20-f7ac5cb2b6ab
2022-03-08 15:26:20,087 fedbiomed

2022-03-08 15:26:21,375 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - running model.postprocess() method
2022-03-08 15:26:21,378 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - model.postprocess() method not provided
2022-03-08 15:26:21,429 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - upload (HTTP POST request) of file /home/scansiz/Desktop/Inria/development/fedbiomed/var/tmp/node_params_da677b74-44ff-40c4-925f-c208327ec5c2.pt successful, with status code 201
2022-03-08 15:26:21,431 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / INFO - results uploaded successfully 
2022-03-08 15:26:30,095 fedbiomed INFO - Downloading model params after training on node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 - from http://localhost:8844/media/uploads/2022/03/08/node_params_da677b74-44ff-40c4-925f-c208327ec5c2.pt
2022-03-08 15:26:30,104 fedbiomed DEBUG - upload (HTTP GET request) of fi

2022-03-08 15:26:30,187 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - upload (HTTP GET request) of file my_model_8c84562a-485c-4d22-bca3-4eb5ee45dcaa.pt successful, with status code 200
2022-03-08 15:26:30,195 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - Dataset path has been set as/home/scansiz/Desktop/Inria/development/data/used-cars/processed/audi_transformed.csv
2022-03-08 15:26:30,224 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / INFO - training with arguments {'monitor': <fedbiomed.node.history_monitor.HistoryMonitor object at 0x7f14ab3b1a60>, 'node_args': {'gpu': False, 'gpu_num': None, 'gpu_only': False}, 'training_data': <torch.utils.data.dataloader.DataLoader object at 0x7f14ab387df0>, 'batch_size': 40, 'lr': 0.001, 'epochs': 2, 'dry_run': False}
2022-03-08 15:26:30,225 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - Using device cpu for training (cuda_av

2022-03-08 15:26:31,553 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - running model.postprocess() method
2022-03-08 15:26:31,557 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - model.postprocess() method not provided
2022-03-08 15:26:31,618 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - upload (HTTP POST request) of file /home/scansiz/Desktop/Inria/development/fedbiomed/var/tmp/node_params_37c527f3-b038-49dd-8785-450b9024f1d8.pt successful, with status code 201
2022-03-08 15:26:31,619 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / INFO - results uploaded successfully 
2022-03-08 15:26:40,174 fedbiomed INFO - Downloading model params after training on node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 - from http://localhost:8844/media/uploads/2022/03/08/node_params_37c527f3-b038-49dd-8785-450b9024f1d8.pt
2022-03-08 15:26:40,180 fedbiomed DEBUG - upload (HTTP GET request) of fi

2022-03-08 15:26:41,791 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - running model.postprocess() method
2022-03-08 15:26:41,797 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - model.postprocess() method not provided
2022-03-08 15:26:41,858 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - upload (HTTP POST request) of file /home/scansiz/Desktop/Inria/development/fedbiomed/var/tmp/node_params_bd7ae0c1-3bf3-426d-acf5-993123305a0c.pt successful, with status code 201
2022-03-08 15:26:41,859 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / INFO - results uploaded successfully 
2022-03-08 15:26:50,230 fedbiomed INFO - Downloading model params after training on node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 - from http://localhost:8844/media/uploads/2022/03/08/node_params_bd7ae0c1-3bf3-426d-acf5-993123305a0c.pt
2022-03-08 15:26:50,237 fedbiomed DEBUG - upload (HTTP GET request) of fi

3

2022-03-08 15:43:49,977 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / CRITICAL - Node stopped in signal_handler, probably by user decision (Ctrl C)
2022-03-08 15:57:25,295 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / INFO - Starting task manager
2022-03-08 15:58:44,871 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - Message received: {'researcher_id': 'researcher_420cfc13-37cb-447c-af20-f7ac5cb2b6ab', 'tags': ['perp'], 'command': 'search'}
2022-03-08 15:59:10,060 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - Message received: {'researcher_id': 'researcher_420cfc13-37cb-447c-af20-f7ac5cb2b6ab', 'job_id': 'dd451bbe-c9b6-476c-8b94-90ddd571b9a3', 'training_args': {'epochs': 5}, 'model_args': {'model': 'Perceptron', 'max_iter': 1000, 'tol': 0.001, 'n_features': 20, 'n_classes': 2, 'verbose': 1}, 'command': 'train', 'model_url': 'http://localhost:8844/media/uploads/2022/03/08/my

2022-03-08 16:08:34,233 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / ERROR - Cannot train model in round: training_routine() got an unexpected keyword argument 'training_data'
2022-03-08 16:08:48,720 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - Message received: {'researcher_id': 'researcher_420cfc13-37cb-447c-af20-f7ac5cb2b6ab', 'tags': ['perp'], 'command': 'search'}
2022-03-08 16:08:58,903 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - Message received: {'researcher_id': 'researcher_420cfc13-37cb-447c-af20-f7ac5cb2b6ab', 'job_id': 'f5a55e30-dc28-4661-b0d1-cd4cb9a1ceb1', 'training_args': {'epochs': 5}, 'model_args': {'model': 'Perceptron', 'max_iter': 1000, 'tol': 0.001, 'n_features': 20, 'n_classes': 2, 'verbose': 1}, 'command': 'train', 'model_url': 'http://localhost:8844/media/uploads/2022/03/08/my_model_05700ab6-74e4-4034-8edb-8e8de2f3e42b.py', 'params_url': 'http://localhost:8844/media/upl

2022-03-08 16:11:48,932 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / INFO - Train Epoch: 0 [Batch All Samples]	Loss: 1.516319
2022-03-08 16:11:48,934 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / INFO - Train Epoch: 1 [Batch All Samples]	Loss: 1.656794
2022-03-08 16:11:48,937 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / INFO - Train Epoch: 2 [Batch All Samples]	Loss: 1.464042
2022-03-08 16:11:48,939 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / INFO - Train Epoch: 3 [Batch All Samples]	Loss: 1.858264
2022-03-08 16:11:48,941 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / INFO - Train Epoch: 4 [Batch All Samples]	Loss: 1.630331
2022-03-08 16:11:48,965 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - upload (HTTP POST request) of file /home/scansiz/Desktop/Inria/development/fedbiomed/var/tmp/node_params_229064b3-c0f3-4c47-b354-55997bf3

2022-03-08 16:12:08,877 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / INFO - training with arguments {'monitor': <fedbiomed.node.history_monitor.HistoryMonitor object at 0x7f05f2dea100>, 'node_args': {'gpu': False, 'gpu_num': None, 'gpu_only': False}, 'training_data': (array([[-1.38508708,  0.14601715,  0.58820641, ...,  0.08618543,
        -1.86175975,  1.64960973],
       [ 0.27933924,  1.49126993,  1.63888944, ...,  0.24927403,
         0.22697878, -1.41378304],
       [-2.24533583, -0.09639113,  0.97855908, ...,  0.16005979,
        -1.94213217,  0.2856502 ],
       ...,
       [-1.85766476,  0.32846248, -0.53351387, ..., -0.64263353,
         1.03823021,  1.03080809],
       [-0.87844727, -0.62743658,  0.6044344 , ..., -0.6277502 ,
         0.3308814 , -1.60877914],
       [ 0.2392055 , -0.81827537, -0.22661307, ...,  0.78690322,
        -0.65464318, -0.06810045]]), array([1., 1., 0., 0., 0., 0., 0., 1., 0., 1., 1., 0., 0., 1., 0., 0., 1.,
       1., 1., 0

2022-03-08 16:12:29,072 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - [TASKS QUEUE] Item:{'researcher_id': 'researcher_420cfc13-37cb-447c-af20-f7ac5cb2b6ab', 'job_id': '291b6772-97cf-40f8-a2bb-7794e4c53a2f', 'params_url': 'http://localhost:8844/media/uploads/2022/03/08/aggregated_params_2c3fecfe-1e24-48a9-9d08-d0ee3681f272.pt', 'training_args': {'epochs': 5}, 'training_data': {'node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0': ['dataset_1e77184c-4217-4c38-a0be-14d43df40c21']}, 'model_args': {'model': 'Perceptron', 'max_iter': 1000, 'tol': 0.001, 'n_features': 20, 'n_classes': 2, 'verbose': 1}, 'model_url': 'http://localhost:8844/media/uploads/2022/03/08/my_model_fa8ad60a-5182-4c99-8042-42e63cb5d356.py', 'model_class': 'SkLearnTrainingPlan', 'command': 'train'}
2022-03-08 16:12:29,082 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - upload (HTTP GET request) of file my_model_c02b4a4de5f34229bfea280b7dd0cf2a.py successful, with sta

2022-03-08 16:12:39,146 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / INFO - Train Epoch: 2 [Batch All Samples]	Loss: 1.717696
2022-03-08 16:12:39,149 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / INFO - Train Epoch: 3 [Batch All Samples]	Loss: 1.589296
2022-03-08 16:12:39,152 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / INFO - Train Epoch: 4 [Batch All Samples]	Loss: 1.667463
2022-03-08 16:12:39,177 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - upload (HTTP POST request) of file /home/scansiz/Desktop/Inria/development/fedbiomed/var/tmp/node_params_55fd212a-ee9b-4c07-b550-43e4d9edd794.pt successful, with status code 201
2022-03-08 16:12:39,178 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / INFO - results uploaded successfully 
2022-03-08 16:12:49,206 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - Message received: {'researc

2022-03-08 16:12:59,366 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / INFO - Train Epoch: 0 [Batch All Samples]	Loss: 1.480221
2022-03-08 16:12:59,367 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / INFO - Train Epoch: 1 [Batch All Samples]	Loss: 1.950884
2022-03-08 16:12:59,370 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / INFO - Train Epoch: 2 [Batch All Samples]	Loss: 1.750854
2022-03-08 16:12:59,372 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / INFO - Train Epoch: 3 [Batch All Samples]	Loss: 1.536525
2022-03-08 16:12:59,374 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / INFO - Train Epoch: 4 [Batch All Samples]	Loss: 1.752948
2022-03-08 16:12:59,396 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - upload (HTTP POST request) of file /home/scansiz/Desktop/Inria/development/fedbiomed/var/tmp/node_params_a64fe66f-a686-4037-9e28-7275abef

2022-03-08 16:14:46,882 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / INFO - Train Epoch: 1 [Batch All Samples]	Loss: 1.455672
2022-03-08 16:14:46,885 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / INFO - Train Epoch: 2 [Batch All Samples]	Loss: 1.946618
2022-03-08 16:14:46,889 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / INFO - Train Epoch: 3 [Batch All Samples]	Loss: 1.429265
2022-03-08 16:14:46,892 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / INFO - Train Epoch: 4 [Batch All Samples]	Loss: 1.820139
2022-03-08 16:14:46,922 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - upload (HTTP POST request) of file /home/scansiz/Desktop/Inria/development/fedbiomed/var/tmp/node_params_2aadc1cc-4d85-4197-bbf1-a2a2deb4f2ac.pt successful, with status code 201
2022-03-08 16:14:46,923 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / INFO - results u

2022-03-08 16:15:07,143 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / INFO - results uploaded successfully 
2022-03-08 16:15:17,098 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - Message received: {'researcher_id': 'researcher_420cfc13-37cb-447c-af20-f7ac5cb2b6ab', 'job_id': '31c8661b-48f8-4db6-a232-acb26c20652e', 'training_args': {'epochs': 5}, 'model_args': {'model': 'Perceptron', 'max_iter': 1000, 'tol': 0.001, 'n_features': 20, 'n_classes': 2, 'verbose': 1}, 'command': 'train', 'model_url': 'http://localhost:8844/media/uploads/2022/03/08/my_model_b508dd3f-46ac-4b62-8f99-11566b0a5aa8.py', 'params_url': 'http://localhost:8844/media/uploads/2022/03/08/aggregated_params_cf679e8d-a3ea-4221-9621-d2a7049a8f63.pt', 'model_class': 'SkLearnTrainingPlan', 'training_data': {'node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0': ['dataset_1e77184c-4217-4c38-a0be-14d43df40c21']}}
2022-03-08 16:15:17,099 fedbiomed INFO - log from: node_bd90cc6f-67c4-

2022-03-08 16:15:37,348 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - [TASKS QUEUE] Item:{'researcher_id': 'researcher_420cfc13-37cb-447c-af20-f7ac5cb2b6ab', 'job_id': '31c8661b-48f8-4db6-a232-acb26c20652e', 'params_url': 'http://localhost:8844/media/uploads/2022/03/08/aggregated_params_b9416a3b-294c-4dc4-9f6f-b241579e70ed.pt', 'training_args': {'epochs': 5}, 'training_data': {'node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0': ['dataset_1e77184c-4217-4c38-a0be-14d43df40c21']}, 'model_args': {'model': 'Perceptron', 'max_iter': 1000, 'tol': 0.001, 'n_features': 20, 'n_classes': 2, 'verbose': 1}, 'model_url': 'http://localhost:8844/media/uploads/2022/03/08/my_model_b508dd3f-46ac-4b62-8f99-11566b0a5aa8.py', 'model_class': 'SkLearnTrainingPlan', 'command': 'train'}
2022-03-08 16:15:37,358 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - upload (HTTP GET request) of file my_model_77e25db7adbd4dc982dba571e153c527.py successful, with sta

2022-03-08 16:18:53,444 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - [TASKS QUEUE] Item:{'researcher_id': 'researcher_420cfc13-37cb-447c-af20-f7ac5cb2b6ab', 'job_id': '099b3d60-decf-4523-81ba-96ffad80abfd', 'params_url': 'http://localhost:8844/media/uploads/2022/03/08/aggregated_params_init_4f42e690-b476-4d91-9288-8ef8b632f01d.pt', 'training_args': {'epochs': 5}, 'training_data': {'node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0': ['dataset_cc5be93f-bfbf-4343-be4e-85252407b135']}, 'model_args': {'max_iter': 2000, 'tol': 1e-05, 'eta0': 0.05, 'model': 'SGDRegressor', 'n_features': 8, 'random_state': 1234, 'verbose': 1}, 'model_url': 'http://localhost:8844/media/uploads/2022/03/08/my_model_16f8ec71-2bbf-4cd5-8187-80a6ff65b97e.py', 'model_class': 'SGDRegressorTrainingPlan', 'command': 'train'}
2022-03-08 16:18:53,453 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - upload (HTTP GET request) of file my_model_c5b0295afacd4225bdce75382

2022-03-08 16:26:19,962 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - Message received: {'researcher_id': 'researcher_420cfc13-37cb-447c-af20-f7ac5cb2b6ab', 'tags': ['adni'], 'command': 'search'}
2022-03-08 16:26:30,123 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - Message received: {'researcher_id': 'researcher_420cfc13-37cb-447c-af20-f7ac5cb2b6ab', 'job_id': '407669c9-93a1-4a1a-ab00-4468a338f5a1', 'training_args': {'epochs': 5}, 'model_args': {'max_iter': 2000, 'tol': 1e-05, 'eta0': 0.05, 'model': 'SGDRegressor', 'n_features': 8, 'random_state': 1234, 'verbose': 1}, 'command': 'train', 'model_url': 'http://localhost:8844/media/uploads/2022/03/08/my_model_463f0b23-03b9-4b64-bc1e-6f56a4d97f26.py', 'params_url': 'http://localhost:8844/media/uploads/2022/03/08/aggregated_params_init_76e2735a-f45e-443f-8cb1-81fd4b3d32ef.pt', 'model_class': 'SGDRegressorTrainingPlan', 'training_data': {'node_bd90cc6f-67c4-4f81-aaa5-588c9c26c

2022-03-08 16:33:10,600 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / ERROR - Did not found proper data in local datasets on node=node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0
2022-03-08 16:36:39,866 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / CRITICAL - Node stopped in signal_handler, probably by user decision (Ctrl C)
2022-03-08 16:36:40,054 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / CRITICAL - Node stopped in signal_handler, probably by user decision (Ctrl C)
2022-03-08 16:37:28,607 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / INFO - Starting task manager
2022-03-08 16:37:33,803 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - Message received: {'researcher_id': 'researcher_420cfc13-37cb-447c-af20-f7ac5cb2b6ab', 'tags': ['#test_data'], 'command': 'search'}
2022-03-08 16:38:10,830 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEB

2022-03-08 16:38:21,874 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - running model.postprocess() method
2022-03-08 16:38:21,876 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - model.postprocess() method not provided
2022-03-08 16:38:21,924 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - upload (HTTP POST request) of file /home/scansiz/Desktop/Inria/development/fedbiomed/var/tmp/node_params_0d73cd00-2a9c-421f-b1fb-9b57390c18a3.pt successful, with status code 201
2022-03-08 16:38:21,925 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / INFO - results uploaded successfully 


Local training results for each round and each node are available via `exp.training_replies()` (index 0 to (`rounds` - 1) ).

For example you can view the training results for the last round below.

Different timings (in seconds) are reported for each dataset of a node participating in a round :
- `rtime_training` real time (clock time) spent in the training function on the node
- `ptime_training` process time (user and system CPU) spent in the training function on the node
- `rtime_total` real time (clock time) spent in the researcher between sending the request and handling the response, at the `Job()` layer

In [None]:
print("\nList the training rounds : ", exp.training_replies().keys())

print("\nList the nodes for the last training round and their timings : ")
round_data = exp.training_replies()[rounds - 1].data()
for c in range(len(round_data)):
    print("\t- {id} :\
    \n\t\trtime_training={rtraining:.2f} seconds\
    \n\t\tptime_training={ptraining:.2f} seconds\
    \n\t\trtime_total={rtotal:.2f} seconds".format(id = round_data[c]['node_id'],
        rtraining = round_data[c]['timing']['rtime_training'],
        ptraining = round_data[c]['timing']['ptime_training'],
        rtotal = round_data[c]['timing']['rtime_total']))
print('\n')
    
exp.training_replies()[rounds - 1].dataframe()

Federated parameters for each round are available via `exp.aggregated_params()` (index 0 to (`rounds` - 1) ).

For example you can view the federated parameters for the last round of the experiment :

In [None]:
print("\nList the training rounds : ", exp.aggregated_params().keys())

print("\nAccess the federated params for the last training round :")
print("\t- params_path: ", exp.aggregated_params()[rounds - 1]['params_path'])
print("\t- parameter data: ", exp.aggregated_params()[rounds - 1]['params'].keys())


# Test Function

In [None]:
fed_model = exp.model_instance()
fed_model.load_state_dict(exp.aggregated_params()[rounds - 1]['params'])

In [None]:
 fed_model

In [None]:
# Hold one file for testing the fed model
test_dataset_path = os.path.join(data_dir, "ford_transformed.csv")

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import pandas as pd

def cal_rmse(actual, prediction):
    return ((actual- prediction)**2).mean()**0.5

def testing_rmse(model, data_loader):
    model.eval()
    test_loss = 0
    correct = 0
    device = 'cpu'
    preds = []
    with torch.no_grad():
        for data, target in data_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            preds.append(output.numpy().flatten())
    rmse = cal_rmse(data_loader.dataset.Y_train.numpy(),np.hstack(preds))
    return rmse

In [None]:
class csv_Dataset(Dataset):
        def __init__(self, dataset_path):
            self.input_file = pd.read_csv(dataset_path,sep=',',index_col=False)
            x_train = self.input_file.loc[:,('year','transmission','mileage','tax','mpg','engineSize')].values
            y_train = self.input_file.loc[:,'price'].values
            self.X_train = torch.from_numpy(x_train).float()
            self.Y_train = torch.from_numpy(y_train).float()

        def __len__(self):            
            return len(self.Y_train)

        def __getitem__(self, idx):

            return (self.X_train[idx], self.Y_train[idx])

In [None]:
dataset = csv_Dataset(test_dataset_path)
train_kwargs = {'batch_size': 64, 'shuffle': True}
data_loader = DataLoader(dataset, **train_kwargs)

In [None]:
rmse = testing_rmse(fed_model, data_loader)

In [None]:
rmse