# Data Preprocessing : Download Used Cars Dataset 
https://www.kaggle.com/adityadesai13/used-car-dataset-ford-and-mercedes

In [None]:
# change here with the directory where you downloaded the dataset
data_dir = './data'

In [None]:
import pandas as pd

# Dataset Details
The data consists of used cars listings. 100,000 listings, which have been separated into files corresponding to each car manufacturer. Each file will simulate data for each node.

# Goal

The goal of this tutorial is to build a federated regression model on Non-IID dataset and generate the best model by performing validation on hold out dataset and tuning hyperparameters.The metric used to decide best model is RMSE.

In [None]:
import os

# Use audi and bmw for training on 2 nodes
audi = pd.read_csv(os.path.join(data_dir, "audi.csv"))
bmw = pd.read_csv(os.path.join(data_dir, "bmw.csv"))

# Use Ford for final validation at central researcher (test dataset)
ford = pd.read_csv(os.path.join(data_dir, "ford.csv"))

# Use the following csvs if you want to run more than 2 nodes. Uncomment Corresponding lines in the following cell blocks
# cclass = pd.read_csv(os.path.join(data_dir, "cclass.csv"))
# focus = pd.read_csv(os.path.join(data_dir, "focus.csv"))
# hyundai = pd.read_csv(os.path.join("data_dir, "huyndai.csv"))
# merc = pd.read_csv(os.path.join(data_dir, "merc.csv"))
# skoda = pd.read_csv(os.path.join(data_dir, "skoda.csv"))
# toyota = pd.read_csv(os.path.join(data_dir, "toyota.csv"))
# vauxhall = pd.read_csv(os.path.join(data_dir, "vauxhall.csv"))
# vw = pd.read_csv(os.path.join(data_dir, "vw.csv"))

Drop columns model & fuelType as labels are not consistent across files. A better solution could be vertical federated learning

In [None]:
audi.drop(columns = ['model','fuelType'],inplace = True)
bmw.drop(columns = ['model','fuelType'],inplace = True)
ford.drop(columns = ['model','fuelType'],inplace = True)

# cclass.drop(columns = ['model','fuelType'],inplace = True)
# focus.drop(columns = ['model','fuelType'],inplace = True)
# hyundai.drop(columns = ['model','fuelType'],inplace = True)
# merc.drop(columns = ['model','fuelType'],inplace = True)
# skoda.drop(columns = ['model','fuelType'],inplace = True)
# toyata.drop(columns = ['model','fuelType'],inplace = True)
# vauxhall.drop(columns = ['model','fuelType'],inplace = True)
# vw.drop(columns = ['model','fuelType'],inplace = True)

Label encode transmission column

In [None]:
audi['transmission'] = audi['transmission'].map({'Automatic':0,'Manual':1,'Semi-Auto':2,'Other':3})
bmw['transmission'] = bmw['transmission'].map({'Automatic':0,'Manual':1,'Semi-Auto':2,'Other':3})
ford['transmission'] = ford['transmission'].map({'Automatic':0,'Manual':1,'Semi-Auto':2,'Other':3})

# cclass['transmission'] = cclass['transmission'].map({'Automatic':0,'Manual':1,'Semi-Auto':2,'Other':3})
# focus['transmission'] = focus['transmission'].map({'Automatic':0,'Manual':1,'Semi-Auto':2,'Other':3})
# hyundai['transmission'] = hyundai['transmission'].map({'Automatic':0,'Manual':1,'Semi-Auto':2,'Other':3})
# merc['transmission'] = merc['transmission'].map({'Automatic':0,'Manual':1,'Semi-Auto':2,'Other':3})
# skoda['transmission'] = skoda['transmission'].map({'Automatic':0,'Manual':1,'Semi-Auto':2,'Other':3})
# toyata['transmission'] = toyata['transmission'].map({'Automatic':0,'Manual':1,'Semi-Auto':2,'Other':3})
# vauxhall['transmission'] = vauxhall['transmission'].map({'Automatic':0,'Manual':1,'Semi-Auto':2,'Other':3})
# vw['transmission'] = vw['transmission'].map({'Automatic':0,'Manual':1,'Semi-Auto':2,'Other':3})

In [None]:
audi.to_csv(os.path.join(data_dir, 'audi_transformed.csv'),header = True,index= False)
bmw.to_csv(os.path.join(data_dir, 'bmw_transformed.csv'),header = True,index= False)
ford.to_csv(os.path.join(data_dir, 'ford_transformed.csv'),header = True,index= False)

# cclass.to_csv('cclass_transformed.csv',header = True,index= False)
# focus.to_csv('focus_transformed.csv',header = True,index= False)
# hyundai.to_csv('huydai_transformed.csv',header = True,index= False)
# merc.to_csv('merc_transformed.csv',header = True,index= False)
# skoda.to_csv('skoda_transformed.csv',header = True,index= False)
# toyata.to_csv('toyata_transformed.csv',header = True,index= False)
# vauxhall.to_csv('vaxhall_transformed.csv',header = True,index= False)
# vw.to_csv('vw_transformed.csv',header = True,index= False)

# Fedbiomed Researcher to train a model on a Used Cars dataset

Use for developing (autoreloads changes made across packages)

In [None]:
%load_ext autoreload
%autoreload 2

## Start the network
Before running this notebook, start the network with `./scripts/fedbiomed_run network`

## Setting the nodes up
It is necessary to previously configure 2 nodes:
1. `./scripts/fedbiomed_run node config config1.ini add` (node1) and `./scripts/fedbiomed_run node config config2.ini add` (node2)
  * Select option 1 to add a csv file to the node
    * use the `audi_transformed.csv` file (node 1) and `bmw_transformed.csv` file (node 2)
  * Choose the name, tags and description of the dataset
    * choose tag `UsedCars` (or modify the used tag in this notebook)
  * Spin as many nodes as you want(max nodes 11 for 11 csv files in used cars dataset). Hold out one file for testing.
  * Load the .csv file generated using above mentioned notebook to individual nodes
2. Check that your data has been added by executing `./scripts/fedbiomed_run node config config1.ini list` (node1) and `./scripts/fedbiomed_run node config config2.ini list` (node2)
3. Run the node using `./scripts/fedbiomed_run node config config1.ini start` (node1) and `./scripts/fedbiomed_run node config config2.ini start` (node2). Wait until you get `Starting task manager`. it means you are online.

## Create an experiment to train a model on the data found

Declare a torch.nn MyTrainingPlan class to send for training on the node

In [6]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F

from torch.utils.data import Dataset
from fedbiomed.common.training_plans import TorchTrainingPlan
from fedbiomed.common.data import DataManager 


# Here we define the model to be used. 
# You can use any class name (here 'MyTrainingPlan')
class MyTrainingPlan(TorchTrainingPlan):
    def __init__(self, model_args: dict = {}):
        super(MyTrainingPlan, self).__init__(model_args)
        # should match the model arguments passed below to the experiment class
        self.in_features = model_args['in_features']
        self.out_features = model_args['out_features']
        self.fc1 = nn.Linear(self.in_features, 5)
        self.fc2 = nn.Linear(5, self.out_features)
        
        # Here we define the custom dependencies that will be needed by our custom Dataloader
        # In this case, we need the torch Dataset and DataLoader classes
        # We need pandas to read the local .csv file at the node side
        deps = ["from torch.utils.data import Dataset, DataLoader",
                "import pandas as pd"]
        self.add_dependency(deps)

    def forward(self, x):
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        return x

    def training_step(self, data, target):
        output = self.forward(data).float()
        criterion = torch.nn.MSELoss()
        loss   = torch.sqrt(criterion(output, target.unsqueeze(1)))
        return loss

    class csv_Dataset(Dataset):
    # Here we define a custom Dataset class inherited from the general torch Dataset class
    # This class takes as argument a .csv file path and creates a torch Dataset 
        def __init__(self, dataset_path, x_dim):
            self.input_file = pd.read_csv(dataset_path,sep=',',index_col=False)
            x_train = self.input_file.loc[:,('year','transmission','mileage','tax','mpg','engineSize')].values
            y_train = self.input_file.loc[:,'price'].values
            self.X_train = torch.from_numpy(x_train).float()
            self.Y_train = torch.from_numpy(y_train).float()

        def __len__(self):            
            return len(self.Y_train)

        def __getitem__(self, idx):

            return (self.X_train[idx], self.Y_train[idx])
        
    def training_data(self,  batch_size = 48):
    # The training_data creates the Dataloader to be used for training in the general class TorchTrainingPlan of fedbiomed
        dataset = self.csv_Dataset(self.dataset_path, self.in_features)
        train_kwargs = {'batch_size': batch_size, 'shuffle': True}
        data_manager = DataManager(dataset=dataset , **train_kwargs)
        return data_manager
    
    

In [7]:
# model parameters 
model_args = {
    'in_features': 6, 
    'out_features': 1
}

# training parameters 
training_args = {
    'batch_size': 40, 
    'lr': 1e-3, 
    'epochs': 2, 
    'dry_run': False,  
    #'batch_maxnum': 100 # Fast pass for development : only use ( batch_maxnum * batch_size ) samples
}

Define an experiment
- search nodes serving data for these `tags`, optionally filter on a list of node ID with `nodes`
- run a round of local training on nodes with model defined in `model_path` + federation with `aggregator`
- run for `round_limit` rounds, applying the `node_selection_strategy` between the rounds

In [8]:
from fedbiomed.researcher.experiment import Experiment
from fedbiomed.researcher.aggregators.fedavg import FedAverage

# Calling the training data with specified tags. Change the following tag accordingly
tags =  ['UsedCars']
rounds = 3

exp = Experiment(tags=tags,
                 model_class=MyTrainingPlan,
                 model_args=model_args,
                 training_args=training_args,
                 round_limit=rounds,
                 aggregator=FedAverage(),
                 node_selection_strategy=None)

2022-03-08 13:31:23,067 fedbiomed INFO - Searching dataset with data tags: ['UsedCars'] for all nodes
2022-03-08 13:31:23,073 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - Message received: {'researcher_id': 'researcher_420cfc13-37cb-447c-af20-f7ac5cb2b6ab', 'tags': ['UsedCars'], 'command': 'search'}
2022-03-08 13:31:33,109 fedbiomed INFO - Node selected for training -> node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0
2022-03-08 13:31:33,144 fedbiomed DEBUG - Model file has been saved: /home/scansiz/Desktop/Inria/development/fedbiomed/var/experiments/Experiment_0080/my_model_b29b3ca2-e574-489b-9723-2c36546be344.py
2022-03-08 13:31:33,188 fedbiomed DEBUG - upload (HTTP POST request) of file /home/scansiz/Desktop/Inria/development/fedbiomed/var/experiments/Experiment_0080/my_model_b29b3ca2-e574-489b-9723-2c36546be344.py successful, with status code 201
2022-03-08 13:31:33,221 fedbiomed DEBUG - upload (HTTP POST request) of file /home/scansiz/Desktop/Inria/develo

Let's start the experiment.

By default, this function doesn't stop until all the `round_limit` rounds are done for all the nodes

In [9]:
exp.run()

2022-03-08 13:31:33,228 fedbiomed INFO - Sampled nodes in round 0 ['node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0']
2022-03-08 13:31:33,230 fedbiomed INFO - Send message to node node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 - {'researcher_id': 'researcher_420cfc13-37cb-447c-af20-f7ac5cb2b6ab', 'job_id': 'eea9cb40-2008-4dea-95cf-8b3fc8058361', 'training_args': {'batch_size': 40, 'lr': 0.001, 'epochs': 2, 'dry_run': False}, 'model_args': {'in_features': 6, 'out_features': 1}, 'command': 'train', 'model_url': 'http://localhost:8844/media/uploads/2022/03/08/my_model_b29b3ca2-e574-489b-9723-2c36546be344.py', 'params_url': 'http://localhost:8844/media/uploads/2022/03/08/aggregated_params_init_9335614a-d2e5-4feb-9710-2f0c5ea861ce.pt', 'model_class': 'MyTrainingPlan', 'training_data': {'node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0': ['dataset_5ed4224a-ae70-4661-b0ef-588f589522a3']}}
2022-03-08 13:31:33,231 fedbiomed DEBUG - researcher_420cfc13-37cb-447c-af20-f7ac5cb2b6ab
2022-03-08 13:31:33,237 fedbiomed

2022-03-08 13:31:34,088 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - running model.postprocess() method
2022-03-08 13:31:34,090 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - model.postprocess() method not provided
2022-03-08 13:31:34,141 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - upload (HTTP POST request) of file /home/scansiz/Desktop/Inria/development/fedbiomed/var/tmp/node_params_14bbc5ad-da2a-405b-98f0-01e4c016211a.pt successful, with status code 201
2022-03-08 13:31:34,143 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / INFO - results uploaded successfully 
2022-03-08 13:31:43,244 fedbiomed INFO - Downloading model params after training on node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 - from http://localhost:8844/media/uploads/2022/03/08/node_params_14bbc5ad-da2a-405b-98f0-01e4c016211a.pt
2022-03-08 13:31:43,261 fedbiomed DEBUG - upload (HTTP GET request) of fi

2022-03-08 13:31:43,367 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - upload (HTTP GET request) of file my_model_7ed61878-5be4-47e4-900b-c84ea3fa5812.pt successful, with status code 200
2022-03-08 13:31:43,372 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - Dataset path has been set as/home/scansiz/Desktop/Inria/development/data/used-cars/processed/audi_transformed.csv
2022-03-08 13:31:43,389 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / INFO - training with arguments {'monitor': <fedbiomed.node.history_monitor.HistoryMonitor object at 0x7f62e22d9490>, 'node_args': {'gpu': False, 'gpu_num': None, 'gpu_only': False}, 'training_data': <torch.utils.data.dataloader.DataLoader object at 0x7f62e141bb20>, 'batch_size': 40, 'lr': 0.001, 'epochs': 2, 'dry_run': False}
2022-03-08 13:31:43,390 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - Using device cpu for training (cuda_av

2022-03-08 13:31:44,134 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - running model.postprocess() method
2022-03-08 13:31:44,136 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - model.postprocess() method not provided
2022-03-08 13:31:44,185 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - upload (HTTP POST request) of file /home/scansiz/Desktop/Inria/development/fedbiomed/var/tmp/node_params_3786ed6d-a094-4efc-b80f-f4b9df0ba50d.pt successful, with status code 201
2022-03-08 13:31:44,186 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / INFO - results uploaded successfully 
2022-03-08 13:31:53,354 fedbiomed INFO - Downloading model params after training on node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 - from http://localhost:8844/media/uploads/2022/03/08/node_params_3786ed6d-a094-4efc-b80f-f4b9df0ba50d.pt
2022-03-08 13:31:53,365 fedbiomed DEBUG - upload (HTTP GET request) of fi

2022-03-08 13:31:54,333 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - running model.postprocess() method
2022-03-08 13:31:54,334 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - model.postprocess() method not provided
2022-03-08 13:31:54,390 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / DEBUG - upload (HTTP POST request) of file /home/scansiz/Desktop/Inria/development/fedbiomed/var/tmp/node_params_179ba7e0-0426-4d4e-9363-e376dc5a8621.pt successful, with status code 201
2022-03-08 13:31:54,393 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / INFO - results uploaded successfully 
2022-03-08 13:32:03,449 fedbiomed INFO - Downloading model params after training on node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 - from http://localhost:8844/media/uploads/2022/03/08/node_params_179ba7e0-0426-4d4e-9363-e376dc5a8621.pt
2022-03-08 13:32:03,468 fedbiomed DEBUG - upload (HTTP GET request) of fi

3

2022-03-08 13:38:15,806 fedbiomed INFO - log from: node_bd90cc6f-67c4-4f81-aaa5-588c9c26c6e0 / CRITICAL - Node stopped in signal_handler, probably by user decision (Ctrl C)
2022-03-08 13:38:39,535 fedbiomed INFO - log from: test_logger_node_cc4aafec-a371-4265-8fdb-bbabe6ef094f / ERROR - mqtt+console ERROR message
2022-03-08 13:38:48,687 fedbiomed INFO - log from: node_1234 / INFO - Messaging mock_researcher_XXX successfully connected to the message broker, object = <fedbiomed.common.messaging.Messaging object at 0x7fe639cb6ac0>
2022-03-08 13:38:50,733 fedbiomed INFO - log from: node_1234 / INFO - Controlling Models Dir
2022-03-08 13:38:50,735 fedbiomed INFO - log from: node_1234 / INFO - /tmp/_nod_/default_models
2022-03-08 13:38:51,370 fedbiomed INFO - log from: node_1234 / INFO - {'name': 'variational-autoencoder.txt', 'description': 'Default model', 'hash': '2677f2ee0c098d6b3417f66a314e95df13632d504492550745eb8ca52e51fad2', 'model_path': '/tmp/_nod_/default_models/variational-autoen

2022-03-08 13:38:51,724 fedbiomed INFO - log from: node_1234 / INFO - Recreating hashing for : sklearn-perceptron.txt 	 model_c20a3130-4534-4262-a7ec-488e417547db
2022-03-08 13:38:51,736 fedbiomed INFO - log from: node_1234 / INFO - Recreating hashing for : pytorch-csv.txt 	 model_76b917c0-da54-49eb-a90c-43ccaa6b99e2
2022-03-08 13:38:51,748 fedbiomed INFO - log from: node_1234 / INFO - Recreating hashing for : sklearn-sgdregressor.txt 	 model_b3b5e5b3-0c3d-4dcd-9424-f797dff7122f
2022-03-08 13:38:51,755 fedbiomed INFO - log from: node_1234 / INFO - Recreating hashing for : pytorch-mnist.txt 	 model_951063d1-14e2-4998-8f16-aeda885e13da
2022-03-08 13:38:51,775 fedbiomed INFO - log from: node_1234 / INFO - Recreating hashing for : pytorch-celaba.txt 	 model_56eaa8c1-fb07-474e-8fe6-dc54d2136c61
2022-03-08 13:38:51,797 fedbiomed INFO - log from: node_1234 / INFO - Recreating hashing for : pytorch-usedcars.txt 	 model_5318dc8a-266b-48a6-9f04-e8911c1c7526
2022-03-08 13:38:51,808 fedbiomed INFO

2022-03-08 13:38:52,584 fedbiomed INFO - log from: node_1234 / DEBUG - Message received: {'command': 'model-status', 'researcher_id': 'researcher_id_1234'}
2022-03-08 13:38:52,585 fedbiomed INFO - log from: node_1234 / DEBUG - Message received: {'command': 'unknown', 'researcher_id': 'researcher_id_1234'}
2022-03-08 13:38:52,704 fedbiomed INFO - log from: node_1234 / DEBUG - Message received: {'command': 'ping', 'researcher_id': 'researcher_id_1234', 'sequence': 1234}
2022-03-08 13:38:52,705 fedbiomed INFO - log from: node_1234 / DEBUG - Message received: {'researcher_id': 'researcher_id_1234', 'sequence': 1234}
2022-03-08 13:38:52,706 fedbiomed INFO - log from: node_1234 / DEBUG - Message received: {'command': 'ping', 'researcher_id': 'researcher_id_1234', 'sequence': 1234}
2022-03-08 13:38:52,727 fedbiomed INFO - log from: node_1234 / ERROR - Did not found proper data in local datasets on node=mock_node_XXX
2022-03-08 13:38:52,756 fedbiomed INFO - log from: node_1234 / DEBUG - [TASKS

Local training results for each round and each node are available via `exp.training_replies()` (index 0 to (`rounds` - 1) ).

For example you can view the training results for the last round below.

Different timings (in seconds) are reported for each dataset of a node participating in a round :
- `rtime_training` real time (clock time) spent in the training function on the node
- `ptime_training` process time (user and system CPU) spent in the training function on the node
- `rtime_total` real time (clock time) spent in the researcher between sending the request and handling the response, at the `Job()` layer

In [None]:
print("\nList the training rounds : ", exp.training_replies().keys())

print("\nList the nodes for the last training round and their timings : ")
round_data = exp.training_replies()[rounds - 1].data()
for c in range(len(round_data)):
    print("\t- {id} :\
    \n\t\trtime_training={rtraining:.2f} seconds\
    \n\t\tptime_training={ptraining:.2f} seconds\
    \n\t\trtime_total={rtotal:.2f} seconds".format(id = round_data[c]['node_id'],
        rtraining = round_data[c]['timing']['rtime_training'],
        ptraining = round_data[c]['timing']['ptime_training'],
        rtotal = round_data[c]['timing']['rtime_total']))
print('\n')
    
exp.training_replies()[rounds - 1].dataframe()

Federated parameters for each round are available via `exp.aggregated_params()` (index 0 to (`rounds` - 1) ).

For example you can view the federated parameters for the last round of the experiment :

In [None]:
print("\nList the training rounds : ", exp.aggregated_params().keys())

print("\nAccess the federated params for the last training round :")
print("\t- params_path: ", exp.aggregated_params()[rounds - 1]['params_path'])
print("\t- parameter data: ", exp.aggregated_params()[rounds - 1]['params'].keys())


# Test Function

In [None]:
fed_model = exp.model_instance()
fed_model.load_state_dict(exp.aggregated_params()[rounds - 1]['params'])

In [None]:
 fed_model

In [None]:
# Hold one file for testing the fed model
test_dataset_path = os.path.join(data_dir, "ford_transformed.csv")

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import pandas as pd

def cal_rmse(actual, prediction):
    return ((actual- prediction)**2).mean()**0.5

def testing_rmse(model, data_loader):
    model.eval()
    test_loss = 0
    correct = 0
    device = 'cpu'
    preds = []
    with torch.no_grad():
        for data, target in data_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            preds.append(output.numpy().flatten())
    rmse = cal_rmse(data_loader.dataset.Y_train.numpy(),np.hstack(preds))
    return rmse

In [None]:
class csv_Dataset(Dataset):
        def __init__(self, dataset_path):
            self.input_file = pd.read_csv(dataset_path,sep=',',index_col=False)
            x_train = self.input_file.loc[:,('year','transmission','mileage','tax','mpg','engineSize')].values
            y_train = self.input_file.loc[:,'price'].values
            self.X_train = torch.from_numpy(x_train).float()
            self.Y_train = torch.from_numpy(y_train).float()

        def __len__(self):            
            return len(self.Y_train)

        def __getitem__(self, idx):

            return (self.X_train[idx], self.Y_train[idx])

In [None]:
dataset = csv_Dataset(test_dataset_path)
train_kwargs = {'batch_size': 64, 'shuffle': True}
data_loader = DataLoader(dataset, **train_kwargs)

In [None]:
rmse = testing_rmse(fed_model, data_loader)

In [None]:
rmse