# Fed-BioMed Researcher base example

This example uses MNIST dataset. Please check `README.md` file in `notebooks` directory for the instructions to load MNIST dataset and configure nodes.


In [1]:
from fedbiomed.researcher.environ import environ
environ["EXPERIMENTS_DIR"]

  from .autonotebook import tqdm as notebook_tqdm


'/Users/scansiz/development/fedbiomed/fbm-researcher/var/experiments'

## Define an experiment model and parameters

Declare a torch training plan MyTrainingPlan class to send for training on the node

In [1]:
import torch
import torch.nn as nn
from fedbiomed.common.training_plans import TorchTrainingPlan
from fedbiomed.common.data import DataManager
from torchvision import datasets, transforms


# Here we define the model to be used. 
# You can use any class name (here 'Net')
class MyTrainingPlan(TorchTrainingPlan):
    
    # Defines and return model 
    def init_model(self, model_args):
        return self.Net(model_args = model_args)
    
    # Defines and return optimizer
    def init_optimizer(self, optimizer_args):
        return torch.optim.Adam(self.model().parameters(), lr = optimizer_args["lr"])
    
    # Declares and return dependencies
    def init_dependencies(self):
        deps = ["from torchvision import datasets, transforms"]
        return deps
    
    class Net(nn.Module):
        def __init__(self, model_args):
            super().__init__()
            self.conv1 = nn.Conv2d(1, 32, 3, 1)
            self.conv2 = nn.Conv2d(32, 64, 3, 1)
            self.dropout1 = nn.Dropout(0.25)
            self.dropout2 = nn.Dropout(0.5)
            self.fc1 = nn.Linear(9216, 128)
            self.fc2 = nn.Linear(128, 10)

        def forward(self, x):
            x = self.conv1(x)
            x = F.relu(x)
            x = self.conv2(x)
            x = F.relu(x)
            x = F.max_pool2d(x, 2)
            x = self.dropout1(x)
            x = torch.flatten(x, 1)
            x = self.fc1(x)
            x = F.relu(x)
            x = self.dropout2(x)
            x = self.fc2(x)


            output = F.log_softmax(x, dim=1)
            return output

    def training_data(self):
        # Custom torch Dataloader for MNIST data
        transform = transforms.Compose([transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))])
        dataset1 = datasets.MNIST(self.dataset_path, train=True, download=False, transform=transform)
        train_kwargs = { 'shuffle': True}
        return DataManager(dataset=dataset1, **train_kwargs)
    
    def training_step(self, data, target):
        output = self.model().forward(data)
        loss   = torch.nn.functional.nll_loss(output, target)
        return loss


### Model arguments and training arguments

In [2]:
from fedbiomed.common.metrics import MetricTypes
model_args = {}

training_args = {
    'loader_args': { 'batch_size': 48, }, 
    'optimizer_args': {
        "lr" : 1e-3
    },
    #'test_ratio' : 0.25,
    #'test_batch_size': 64,
    #'test_metric': MetricTypes.F1_SCORE,
    #'test_on_global_updates': True,
    #'test_on_local_updates': True,
    #'test_metric_args': {'average': 'marco'},
    # 'use_gpu': True,  # automatically falls back to cpu on nodes that don't support gpu
    'epochs': 1, 
    'dry_run': False,  
    'batch_maxnum': 100 # Fast pass for development : only use ( batch_maxnum * batch_size ) samples
}

### Create and run the experiment

In [3]:
from fedbiomed.researcher.federated_workflows import Experiment
from fedbiomed.researcher.aggregators.fedavg import FedAverage

tags =  ['#MNIST', '#dataset']
rounds = 2

exp = Experiment(tags=tags,
                 model_args=model_args,
                 training_plan_class=MyTrainingPlan,
                 training_args=training_args,
                 round_limit=rounds,
                 aggregator=FedAverage(),
                 node_selection_strategy=None,
                save_breakpoints=True,
                )

2024-11-08 09:45:09,288 fedbiomed INFO - Starting researcher service...

2024-11-08 09:45:09,290 fedbiomed INFO - Waiting 3s for nodes to connect...

2024-11-08 09:45:10,244 fedbiomed DEBUG - Node: NODE_933f1657-1ddc-41b3-ba75-c20aad4aef89 polling for the tasks

2024-11-08 09:45:10,422 fedbiomed DEBUG - Node: NODE_46be388b-3c9b-4ede-9608-f99c519ae60b polling for the tasks

2024-11-08 09:45:12,294 fedbiomed INFO - Updating training data. This action will update FederatedDataset, and the nodes that will participate to the experiment.

2024-11-08 09:45:12,303 fedbiomed DEBUG - Node: NODE_46be388b-3c9b-4ede-9608-f99c519ae60b polling for the tasks

2024-11-08 09:45:12,304 fedbiomed DEBUG - Node: NODE_933f1657-1ddc-41b3-ba75-c20aad4aef89 polling for the tasks

2024-11-08 09:45:12,311 fedbiomed INFO - Node selected for training -> NODE_933f1657-1ddc-41b3-ba75-c20aad4aef89

2024-11-08 09:45:12,312 fedbiomed INFO - Node selected for training -> NODE_46be388b-3c9b-4ede-9608-f99c519ae60b

<function extract_symbols at 0x1050a4f70>


2024-11-08 09:45:12,324 fedbiomed DEBUG - Model file has been saved: /Users/scansiz/development/workdir/fbm-researcher/var/experiments/Experiment_0000/model_940e5556-5180-4433-bb06-54ca0a00c7ba.py

Secure RNG turned off. This is perfectly fine for experimentation as it allows for much faster training performance, but remember to turn it on and retrain one last time before production with ``secure_mode`` turned on.


In [4]:
exp.run()

2024-11-08 09:45:15,502 fedbiomed INFO - Sampled nodes in round 0 ['NODE_933f1657-1ddc-41b3-ba75-c20aad4aef89', 'NODE_46be388b-3c9b-4ede-9608-f99c519ae60b']

<function extract_symbols at 0x1050a4f70>


2024-11-08 09:45:15,507 fedbiomed INFO - [1mSending request[0m 
					[1m To[0m: NODE_933f1657-1ddc-41b3-ba75-c20aad4aef89 
					[1m Request: [0m: TRAIN
 -----------------------------------------------------------------

2024-11-08 09:45:15,509 fedbiomed INFO - [1mSending request[0m 
					[1m To[0m: NODE_46be388b-3c9b-4ede-9608-f99c519ae60b 
					[1m Request: [0m: TRAIN
 -----------------------------------------------------------------

2024-11-08 09:45:15,548 fedbiomed DEBUG - Node: NODE_933f1657-1ddc-41b3-ba75-c20aad4aef89 polling for the tasks

2024-11-08 09:45:15,550 fedbiomed DEBUG - Node: NODE_46be388b-3c9b-4ede-9608-f99c519ae60b polling for the tasks

2024-11-08 09:45:15,725 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_46be388b-3c9b-4ede-9608-f99c519ae60b 
					 Round 1 Epoch: 1 | Iteration: 1/100 (1%) | Samples: 48/4800
 					 Loss: [1m2.303103[0m 
					 ---------

2024-11-08 09:45:15,726 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_933f1657-1ddc-41b3-ba75-c20aad4aef89 
					 Round 1 Epoch: 1 | Iteration: 1/100 (1%) | Samples: 48/4800
 					 Loss: [1m2.315710[0m 
					 ---------

2024-11-08 09:45:16,116 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_933f1657-1ddc-41b3-ba75-c20aad4aef89 
					 Round 1 Epoch: 1 | Iteration: 10/100 (10%) | Samples: 480/4800
 					 Loss: [1m1.536834[0m 
					 ---------

2024-11-08 09:45:16,120 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_46be388b-3c9b-4ede-9608-f99c519ae60b 
					 Round 1 Epoch: 1 | Iteration: 10/100 (10%) | Samples: 480/4800
 					 Loss: [1m1.330327[0m 
					 ---------

2024-11-08 09:45:16,587 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_933f1657-1ddc-41b3-ba75-c20aad4aef89 
					 Round 1 Epoch: 1 | Iteration: 20/100 (20%) | Samples: 960/4800
 					 Loss: [1m0.743463[0m 
					 ---------

2024-11-08 09:45:16,588 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_46be388b-3c9b-4ede-9608-f99c519ae60b 
					 Round 1 Epoch: 1 | Iteration: 20/100 (20%) | Samples: 960/4800
 					 Loss: [1m0.800446[0m 
					 ---------

2024-11-08 09:45:16,953 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_933f1657-1ddc-41b3-ba75-c20aad4aef89 
					 Round 1 Epoch: 1 | Iteration: 30/100 (30%) | Samples: 1440/4800
 					 Loss: [1m0.739626[0m 
					 ---------

2024-11-08 09:45:16,956 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_46be388b-3c9b-4ede-9608-f99c519ae60b 
					 Round 1 Epoch: 1 | Iteration: 30/100 (30%) | Samples: 1440/4800
 					 Loss: [1m0.546479[0m 
					 ---------

2024-11-08 09:45:17,350 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_933f1657-1ddc-41b3-ba75-c20aad4aef89 
					 Round 1 Epoch: 1 | Iteration: 40/100 (40%) | Samples: 1920/4800
 					 Loss: [1m0.679128[0m 
					 ---------

2024-11-08 09:45:17,355 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_46be388b-3c9b-4ede-9608-f99c519ae60b 
					 Round 1 Epoch: 1 | Iteration: 40/100 (40%) | Samples: 1920/4800
 					 Loss: [1m0.904091[0m 
					 ---------

2024-11-08 09:45:17,732 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_46be388b-3c9b-4ede-9608-f99c519ae60b 
					 Round 1 Epoch: 1 | Iteration: 50/100 (50%) | Samples: 2400/4800
 					 Loss: [1m0.294478[0m 
					 ---------

2024-11-08 09:45:17,738 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_933f1657-1ddc-41b3-ba75-c20aad4aef89 
					 Round 1 Epoch: 1 | Iteration: 50/100 (50%) | Samples: 2400/4800
 					 Loss: [1m0.527399[0m 
					 ---------

2024-11-08 09:45:18,173 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_933f1657-1ddc-41b3-ba75-c20aad4aef89 
					 Round 1 Epoch: 1 | Iteration: 60/100 (60%) | Samples: 2880/4800
 					 Loss: [1m0.454452[0m 
					 ---------

2024-11-08 09:45:18,175 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_46be388b-3c9b-4ede-9608-f99c519ae60b 
					 Round 1 Epoch: 1 | Iteration: 60/100 (60%) | Samples: 2880/4800
 					 Loss: [1m0.366642[0m 
					 ---------

2024-11-08 09:45:18,557 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_46be388b-3c9b-4ede-9608-f99c519ae60b 
					 Round 1 Epoch: 1 | Iteration: 70/100 (70%) | Samples: 3360/4800
 					 Loss: [1m0.366709[0m 
					 ---------

2024-11-08 09:45:18,559 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_933f1657-1ddc-41b3-ba75-c20aad4aef89 
					 Round 1 Epoch: 1 | Iteration: 70/100 (70%) | Samples: 3360/4800
 					 Loss: [1m0.334487[0m 
					 ---------

2024-11-08 09:45:18,933 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_46be388b-3c9b-4ede-9608-f99c519ae60b 
					 Round 1 Epoch: 1 | Iteration: 80/100 (80%) | Samples: 3840/4800
 					 Loss: [1m0.307973[0m 
					 ---------

2024-11-08 09:45:18,936 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_933f1657-1ddc-41b3-ba75-c20aad4aef89 
					 Round 1 Epoch: 1 | Iteration: 80/100 (80%) | Samples: 3840/4800
 					 Loss: [1m0.572789[0m 
					 ---------

2024-11-08 09:45:19,292 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_46be388b-3c9b-4ede-9608-f99c519ae60b 
					 Round 1 Epoch: 1 | Iteration: 90/100 (90%) | Samples: 4320/4800
 					 Loss: [1m0.165697[0m 
					 ---------

2024-11-08 09:45:19,297 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_933f1657-1ddc-41b3-ba75-c20aad4aef89 
					 Round 1 Epoch: 1 | Iteration: 90/100 (90%) | Samples: 4320/4800
 					 Loss: [1m0.323365[0m 
					 ---------

2024-11-08 09:45:19,664 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_933f1657-1ddc-41b3-ba75-c20aad4aef89 
					 Round 1 Epoch: 1 | Iteration: 100/100 (100%) | Samples: 4800/4800
 					 Loss: [1m0.264104[0m 
					 ---------

2024-11-08 09:45:19,665 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_46be388b-3c9b-4ede-9608-f99c519ae60b 
					 Round 1 Epoch: 1 | Iteration: 100/100 (100%) | Samples: 4800/4800
 					 Loss: [1m0.271520[0m 
					 ---------

2024-11-08 09:45:19,702 fedbiomed INFO - Nodes that successfully reply in round 0 ['NODE_933f1657-1ddc-41b3-ba75-c20aad4aef89', 'NODE_46be388b-3c9b-4ede-9608-f99c519ae60b']

<function extract_symbols at 0x1050a4f70>


2024-11-08 09:45:19,716 fedbiomed DEBUG - Model file has been saved: /Users/scansiz/development/workdir/fbm-researcher/var/experiments/Experiment_0000/model_14dea328-787c-4afb-8fc8-a1cb722d897c.py

2024-11-08 09:45:19,719 fedbiomed INFO - breakpoint number 0 saved at /Users/scansiz/development/workdir/fbm-researcher/var/experiments/Experiment_0000/breakpoint_0000

2024-11-08 09:45:19,720 fedbiomed INFO - Sampled nodes in round 1 ['NODE_933f1657-1ddc-41b3-ba75-c20aad4aef89', 'NODE_46be388b-3c9b-4ede-9608-f99c519ae60b']

<function extract_symbols at 0x1050a4f70>


2024-11-08 09:45:19,721 fedbiomed INFO - [1mSending request[0m 
					[1m To[0m: NODE_933f1657-1ddc-41b3-ba75-c20aad4aef89 
					[1m Request: [0m: TRAIN
 -----------------------------------------------------------------

2024-11-08 09:45:19,721 fedbiomed INFO - [1mSending request[0m 
					[1m To[0m: NODE_46be388b-3c9b-4ede-9608-f99c519ae60b 
					[1m Request: [0m: TRAIN
 -----------------------------------------------------------------

2024-11-08 09:45:19,739 fedbiomed DEBUG - Node: NODE_933f1657-1ddc-41b3-ba75-c20aad4aef89 polling for the tasks

2024-11-08 09:45:19,744 fedbiomed DEBUG - Node: NODE_46be388b-3c9b-4ede-9608-f99c519ae60b polling for the tasks

2024-11-08 09:45:19,890 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_933f1657-1ddc-41b3-ba75-c20aad4aef89 
					 Round 2 Epoch: 1 | Iteration: 1/100 (1%) | Samples: 48/4800
 					 Loss: [1m0.145330[0m 
					 ---------

2024-11-08 09:45:19,904 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_46be388b-3c9b-4ede-9608-f99c519ae60b 
					 Round 2 Epoch: 1 | Iteration: 1/100 (1%) | Samples: 48/4800
 					 Loss: [1m0.277376[0m 
					 ---------

2024-11-08 09:45:20,257 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_933f1657-1ddc-41b3-ba75-c20aad4aef89 
					 Round 2 Epoch: 1 | Iteration: 10/100 (10%) | Samples: 480/4800
 					 Loss: [1m0.153346[0m 
					 ---------

2024-11-08 09:45:20,277 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_46be388b-3c9b-4ede-9608-f99c519ae60b 
					 Round 2 Epoch: 1 | Iteration: 10/100 (10%) | Samples: 480/4800
 					 Loss: [1m0.594119[0m 
					 ---------

2024-11-08 09:45:20,648 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_933f1657-1ddc-41b3-ba75-c20aad4aef89 
					 Round 2 Epoch: 1 | Iteration: 20/100 (20%) | Samples: 960/4800
 					 Loss: [1m0.139673[0m 
					 ---------

2024-11-08 09:45:20,681 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_46be388b-3c9b-4ede-9608-f99c519ae60b 
					 Round 2 Epoch: 1 | Iteration: 20/100 (20%) | Samples: 960/4800
 					 Loss: [1m0.196160[0m 
					 ---------

2024-11-08 09:45:21,086 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_933f1657-1ddc-41b3-ba75-c20aad4aef89 
					 Round 2 Epoch: 1 | Iteration: 30/100 (30%) | Samples: 1440/4800
 					 Loss: [1m0.410729[0m 
					 ---------

2024-11-08 09:45:21,133 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_46be388b-3c9b-4ede-9608-f99c519ae60b 
					 Round 2 Epoch: 1 | Iteration: 30/100 (30%) | Samples: 1440/4800
 					 Loss: [1m0.148228[0m 
					 ---------

2024-11-08 09:45:21,478 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_933f1657-1ddc-41b3-ba75-c20aad4aef89 
					 Round 2 Epoch: 1 | Iteration: 40/100 (40%) | Samples: 1920/4800
 					 Loss: [1m0.278409[0m 
					 ---------

2024-11-08 09:45:21,525 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_46be388b-3c9b-4ede-9608-f99c519ae60b 
					 Round 2 Epoch: 1 | Iteration: 40/100 (40%) | Samples: 1920/4800
 					 Loss: [1m0.190168[0m 
					 ---------

2024-11-08 09:45:21,856 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_933f1657-1ddc-41b3-ba75-c20aad4aef89 
					 Round 2 Epoch: 1 | Iteration: 50/100 (50%) | Samples: 2400/4800
 					 Loss: [1m0.193891[0m 
					 ---------

2024-11-08 09:45:21,904 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_46be388b-3c9b-4ede-9608-f99c519ae60b 
					 Round 2 Epoch: 1 | Iteration: 50/100 (50%) | Samples: 2400/4800
 					 Loss: [1m0.174528[0m 
					 ---------

2024-11-08 09:45:22,213 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_933f1657-1ddc-41b3-ba75-c20aad4aef89 
					 Round 2 Epoch: 1 | Iteration: 60/100 (60%) | Samples: 2880/4800
 					 Loss: [1m0.384290[0m 
					 ---------

2024-11-08 09:45:22,259 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_46be388b-3c9b-4ede-9608-f99c519ae60b 
					 Round 2 Epoch: 1 | Iteration: 60/100 (60%) | Samples: 2880/4800
 					 Loss: [1m0.161665[0m 
					 ---------

2024-11-08 09:45:22,591 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_933f1657-1ddc-41b3-ba75-c20aad4aef89 
					 Round 2 Epoch: 1 | Iteration: 70/100 (70%) | Samples: 3360/4800
 					 Loss: [1m0.108583[0m 
					 ---------

2024-11-08 09:45:22,635 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_46be388b-3c9b-4ede-9608-f99c519ae60b 
					 Round 2 Epoch: 1 | Iteration: 70/100 (70%) | Samples: 3360/4800
 					 Loss: [1m0.199949[0m 
					 ---------

2024-11-08 09:45:22,980 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_933f1657-1ddc-41b3-ba75-c20aad4aef89 
					 Round 2 Epoch: 1 | Iteration: 80/100 (80%) | Samples: 3840/4800
 					 Loss: [1m0.088527[0m 
					 ---------

2024-11-08 09:45:23,004 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_46be388b-3c9b-4ede-9608-f99c519ae60b 
					 Round 2 Epoch: 1 | Iteration: 80/100 (80%) | Samples: 3840/4800
 					 Loss: [1m0.310526[0m 
					 ---------

2024-11-08 09:45:23,355 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_933f1657-1ddc-41b3-ba75-c20aad4aef89 
					 Round 2 Epoch: 1 | Iteration: 90/100 (90%) | Samples: 4320/4800
 					 Loss: [1m0.123607[0m 
					 ---------

2024-11-08 09:45:23,373 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_46be388b-3c9b-4ede-9608-f99c519ae60b 
					 Round 2 Epoch: 1 | Iteration: 90/100 (90%) | Samples: 4320/4800
 					 Loss: [1m0.128585[0m 
					 ---------

2024-11-08 09:45:23,731 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_933f1657-1ddc-41b3-ba75-c20aad4aef89 
					 Round 2 Epoch: 1 | Iteration: 100/100 (100%) | Samples: 4800/4800
 					 Loss: [1m0.236755[0m 
					 ---------

2024-11-08 09:45:23,745 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_46be388b-3c9b-4ede-9608-f99c519ae60b 
					 Round 2 Epoch: 1 | Iteration: 100/100 (100%) | Samples: 4800/4800
 					 Loss: [1m0.125582[0m 
					 ---------

2024-11-08 09:45:23,760 fedbiomed INFO - Nodes that successfully reply in round 1 ['NODE_933f1657-1ddc-41b3-ba75-c20aad4aef89', 'NODE_46be388b-3c9b-4ede-9608-f99c519ae60b']

<function extract_symbols at 0x1050a4f70>


2024-11-08 09:45:23,774 fedbiomed DEBUG - Model file has been saved: /Users/scansiz/development/workdir/fbm-researcher/var/experiments/Experiment_0000/model_94c360a5-e1d1-44ae-a589-07653aa471f5.py

2024-11-08 09:45:23,777 fedbiomed INFO - breakpoint number 1 saved at /Users/scansiz/development/workdir/fbm-researcher/var/experiments/Experiment_0000/breakpoint_0001

2

In [16]:
loaded_exp = Experiment.load_breakpoint()

2024-11-04 17:25:10,383 fedbiomed DEBUG - found json file containing states at                breakpoint_0001.json

<function extract_symbols at 0x104f10550>


2024-11-04 17:25:10,393 fedbiomed DEBUG - Model file has been saved: /Users/scansiz/development/test/my-researcher/var/experiments/Experiment_0003/model_d9a490d2-46db-477c-8e10-15ca5425b3c2.py

2024-11-04 17:25:10,416 fedbiomed INFO - Experimentation reload from None successful!

In [None]:
loaded_exp.run_once(increase=True)

2024-11-04 17:25:54,612 fedbiomed INFO - Sampled nodes in round 4 ['NODE_d59683b3-c090-46b8-8d84-625ec29eed8d', 'NODE_4238258b-586d-4ed7-9a7e-47c0b35dd9d6']

<function extract_symbols at 0x104f10550>


2024-11-04 17:25:54,617 fedbiomed INFO - [1mSending request[0m 
					[1m To[0m: NODE_d59683b3-c090-46b8-8d84-625ec29eed8d 
					[1m Request: [0m: TRAIN
 -----------------------------------------------------------------

2024-11-04 17:25:54,618 fedbiomed INFO - [1mSending request[0m 
					[1m To[0m: NODE_4238258b-586d-4ed7-9a7e-47c0b35dd9d6 
					[1m Request: [0m: TRAIN
 -----------------------------------------------------------------

2024-11-04 17:25:54,659 fedbiomed DEBUG - Node: NODE_d59683b3-c090-46b8-8d84-625ec29eed8d polling for the tasks

2024-11-04 17:25:54,663 fedbiomed DEBUG - Node: NODE_4238258b-586d-4ed7-9a7e-47c0b35dd9d6 polling for the tasks

2024-11-04 17:25:54,723 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_d59683b3-c090-46b8-8d84-625ec29eed8d 
					 Round 5 Epoch: 1 | Iteration: 1/100 (1%) | Samples: 48/4800
 					 Loss: [1m0.070887[0m 
					 ---------

2024-11-04 17:25:54,733 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_4238258b-586d-4ed7-9a7e-47c0b35dd9d6 
					 Round 5 Epoch: 1 | Iteration: 1/100 (1%) | Samples: 48/4800
 					 Loss: [1m0.130460[0m 
					 ---------

2024-11-04 17:25:55,064 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_d59683b3-c090-46b8-8d84-625ec29eed8d 
					 Round 5 Epoch: 1 | Iteration: 10/100 (10%) | Samples: 480/4800
 					 Loss: [1m0.035985[0m 
					 ---------

2024-11-04 17:25:55,072 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_4238258b-586d-4ed7-9a7e-47c0b35dd9d6 
					 Round 5 Epoch: 1 | Iteration: 10/100 (10%) | Samples: 480/4800
 					 Loss: [1m0.046282[0m 
					 ---------

2024-11-04 17:25:55,461 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_d59683b3-c090-46b8-8d84-625ec29eed8d 
					 Round 5 Epoch: 1 | Iteration: 20/100 (20%) | Samples: 960/4800
 					 Loss: [1m0.043231[0m 
					 ---------

2024-11-04 17:25:55,462 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_4238258b-586d-4ed7-9a7e-47c0b35dd9d6 
					 Round 5 Epoch: 1 | Iteration: 20/100 (20%) | Samples: 960/4800
 					 Loss: [1m0.082631[0m 
					 ---------

2024-11-04 17:25:55,855 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_4238258b-586d-4ed7-9a7e-47c0b35dd9d6 
					 Round 5 Epoch: 1 | Iteration: 30/100 (30%) | Samples: 1440/4800
 					 Loss: [1m0.097936[0m 
					 ---------

2024-11-04 17:25:55,856 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_d59683b3-c090-46b8-8d84-625ec29eed8d 
					 Round 5 Epoch: 1 | Iteration: 30/100 (30%) | Samples: 1440/4800
 					 Loss: [1m0.176758[0m 
					 ---------

2024-11-04 17:25:56,246 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_d59683b3-c090-46b8-8d84-625ec29eed8d 
					 Round 5 Epoch: 1 | Iteration: 40/100 (40%) | Samples: 1920/4800
 					 Loss: [1m0.119879[0m 
					 ---------

2024-11-04 17:25:56,251 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_4238258b-586d-4ed7-9a7e-47c0b35dd9d6 
					 Round 5 Epoch: 1 | Iteration: 40/100 (40%) | Samples: 1920/4800
 					 Loss: [1m0.030287[0m 
					 ---------

2024-11-04 17:25:56,606 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_d59683b3-c090-46b8-8d84-625ec29eed8d 
					 Round 5 Epoch: 1 | Iteration: 50/100 (50%) | Samples: 2400/4800
 					 Loss: [1m0.143194[0m 
					 ---------

2024-11-04 17:25:56,635 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_4238258b-586d-4ed7-9a7e-47c0b35dd9d6 
					 Round 5 Epoch: 1 | Iteration: 50/100 (50%) | Samples: 2400/4800
 					 Loss: [1m0.137385[0m 
					 ---------

2024-11-04 17:25:56,964 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_d59683b3-c090-46b8-8d84-625ec29eed8d 
					 Round 5 Epoch: 1 | Iteration: 60/100 (60%) | Samples: 2880/4800
 					 Loss: [1m0.023109[0m 
					 ---------

2024-11-04 17:25:57,021 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_4238258b-586d-4ed7-9a7e-47c0b35dd9d6 
					 Round 5 Epoch: 1 | Iteration: 60/100 (60%) | Samples: 2880/4800
 					 Loss: [1m0.068903[0m 
					 ---------

2024-11-04 17:25:57,357 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_d59683b3-c090-46b8-8d84-625ec29eed8d 
					 Round 5 Epoch: 1 | Iteration: 70/100 (70%) | Samples: 3360/4800
 					 Loss: [1m0.090915[0m 
					 ---------

2024-11-04 17:25:57,418 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_4238258b-586d-4ed7-9a7e-47c0b35dd9d6 
					 Round 5 Epoch: 1 | Iteration: 70/100 (70%) | Samples: 3360/4800
 					 Loss: [1m0.131431[0m 
					 ---------

2024-11-04 17:25:57,731 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_d59683b3-c090-46b8-8d84-625ec29eed8d 
					 Round 5 Epoch: 1 | Iteration: 80/100 (80%) | Samples: 3840/4800
 					 Loss: [1m0.378437[0m 
					 ---------

2024-11-04 17:25:57,799 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_4238258b-586d-4ed7-9a7e-47c0b35dd9d6 
					 Round 5 Epoch: 1 | Iteration: 80/100 (80%) | Samples: 3840/4800
 					 Loss: [1m0.039465[0m 
					 ---------

2024-11-04 17:25:58,119 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_d59683b3-c090-46b8-8d84-625ec29eed8d 
					 Round 5 Epoch: 1 | Iteration: 90/100 (90%) | Samples: 4320/4800
 					 Loss: [1m0.251992[0m 
					 ---------

2024-11-04 17:25:58,172 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_4238258b-586d-4ed7-9a7e-47c0b35dd9d6 
					 Round 5 Epoch: 1 | Iteration: 90/100 (90%) | Samples: 4320/4800
 					 Loss: [1m0.124467[0m 
					 ---------

2024-11-04 17:25:58,522 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_d59683b3-c090-46b8-8d84-625ec29eed8d 
					 Round 5 Epoch: 1 | Iteration: 100/100 (100%) | Samples: 4800/4800
 					 Loss: [1m0.128850[0m 
					 ---------

2024-11-04 17:25:58,549 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_4238258b-586d-4ed7-9a7e-47c0b35dd9d6 
					 Round 5 Epoch: 1 | Iteration: 100/100 (100%) | Samples: 4800/4800
 					 Loss: [1m0.092966[0m 
					 ---------

2024-11-04 17:25:58,564 fedbiomed INFO - Nodes that successfully reply in round 4 ['NODE_d59683b3-c090-46b8-8d84-625ec29eed8d', 'NODE_4238258b-586d-4ed7-9a7e-47c0b35dd9d6']

In [22]:
import os
os.environ["FEDBIOMED_RESEARCHER_COMPONENT_ROOT"]

KeyError: 'FEDBIOMED_RESEARCHER_COMPONENT_ROOT'

In [None]:
exp.run_once(increase=True)

Save trained model to file

In [None]:
exp.training_plan().export_model('./trained_model')

#### Display results

In [None]:
print("\nList the training rounds : ", exp.training_replies().keys())

print("\nList the nodes for the last training round and their timings : ")
round_data = exp.training_replies()[rounds - 1]
for r in round_data.values():
    print("\t- {id} :\
    \n\t\trtime_training={rtraining:.2f} seconds\
    \n\t\tptime_training={ptraining:.2f} seconds\
    \n\t\trtime_total={rtotal:.2f} seconds".format(id = r['node_id'],
        rtraining = r['timing']['rtime_training'],
        ptraining = r['timing']['ptime_training'],
        rtotal = r['timing']['rtime_total']))
print('\n')

Federated parameters for each round are available via `exp.aggregated_params()` (index 0 to (`rounds` - 1) ).

For example you can view the federated parameters for the last round of the experiment :

In [None]:
print("\nList the training rounds : ", exp.aggregated_params().keys())

print("\nAccess the federated params for the last training round :")
print("\t- parameter data: ", exp.aggregated_params()[rounds - 1]['params'].keys())
