# Fedbiomed Researcher to train a federated PPCA (Probabilistic PCA) model.

## Description of the exercise :

Three datasets `n1.csv` , `n2.csv` and `n3.csv` will be generated randomly using 3-views PPCA from a 4-dimensional latent space, with views dimensions [15,8,10] and 2 groups. Henceforth, we will distribute the 3 dataset to 3 distinct nodes and use Fed-mv-PPCA. In each center we check the evolution of expected LL during training.

## Data Generation

We will generate three datasets using mv-PPCA.
Then save them in a path of your choice on your machine.

In [1]:
import numpy as np
import pandas as pd
from typing import List, Union, Dict

def sample_x_n(N:int, q:int, random_state:int=None):
    """samples from a Guassian dsitribution
    Args:
    
    :N first dimension array
    :q second dimesnion array
    """
    return np.random.RandomState(random_state).randn(N,q)

def generate_data(N: int,
                  W: np.ndarray,
                  #a_g: np.ndarray,
                  mu:float,
                  sigma2:float,
                  x_n,
                  view:int,
                  random_state=None):
    
    """generates Gaussian dataset given several groups of data points, using the following
    gausian generative proccess (for a given view):
    
    Y = WX + mu + epsilon with epsilon ~ N(0, sigma2)
    
    where X is the latent space of size (q, n_features), Y the observation matrix, W the matrix used
    for data reconstructionY
    
    Params:
    :N_g: List[int] number of data to generate per group (list of size number of group)
    :W: reconstruction matrix, of size (n_features, q)
    #:a_g: (np.ndarray) array of size (nb_group, n_components), introduces shift when creating different group. 
    :mu: offset of the dataset
    :sigma2: variance used for generating
    :x_n: random variable 
    :view: (int) the given view
    :random_state: ransom seed for reproducibility
    
    Returns:
    :Y (pd.DataFrame): synthetic dataset generated wrt above expression
    of size (n_features, n_samples)
    """
    rnd=np.random.RandomState(random_state)

    #N=N_g.sum()
    d, q = W.shape
    sigma=np.sqrt(sigma2)
    #G=len(N_g)

    #g_ind=np.concatenate((np.zeros(1, dtype=np.int64), np.cumsum(N_g)))
    

    y_n=np.empty((N, d))

    #for g in range(G):
    # computing Y = W.transpose(X) + mu
    y_n[:]= np.einsum("dq,nq->nd", W, x_n[:]) + mu
        
    y_n = pd.DataFrame(data=y_n,
                     columns=[f'var_{view},{i + 1}' for i in range(d)])

    return y_n + sigma*rnd.randn(N,d)

#start

def generate_ppca_nodes_dataset(n_nodes: int,
                           n_features: Union[List[int],int],
                           
                           n_components: int,
                               #n_group: int=2,
                           absent_view: Dict[str, int]=None,
                           W_init: List[np.ndarray]=None,
                           mu_init: List[np.ndarray]=None,
                           sigma_init : List[np.ndarray]=None,
                           is_validation: bool=True,
                           n_sample_validation: int=None):
    """
    Generate a synthetic dataset for each node
    
    """
    # generate PPCA parameters if not defined
    ## case where W parameter is not defined
    if W_init is None:
        W_init = []
        for i in range(n_nodes):
            W_gen = np.random.uniform(-10, 10, (n_features[i], n_components))
            W_init.append(W_gen)
    ## case where mu not generated
    if mu_init is None:
        mu_init = []
        for i in range(n_nodes):
            mu_gen = np.random.uniform(-10, 10, n_features[i])
            mu_init.append(mu_gen)
            
    ## case where sigma is not definied (we will set sigma =1 for each clients)
    if sigma_init is None:
        sigma_init = []
        for i in range(n_nodes):
            sigma_init.append(1)
            
    
    #shift = np.concatenate((np.zeros((1, n_components)),
    #                      np.random.uniform(-10, 10, (nb_group - 1, n_components))))

In [2]:
from typing import Dict, Union

def create_multi_view_dataframe(datasets: Dict[str, pd.DataFrame]) -> pd.DataFrame:
    _header_labels = ['views', 'feature_name']
    # 1. create multiindex header
    
    _feature_name_array = np.array([])  # store all feature names
    _view_name_array = []  # store all views (ie modalities) names
    
    _concatenated_datasets = np.array([])  # store dataframe values
    
    for key in datasets.keys():
        #_sub_dataframe_header.append(list(datasets[key].columns.values))
        _feature_name_array = np.concatenate([_feature_name_array,
                                              datasets[key].columns.values])
        if len(_concatenated_datasets) <= 0:
            # first pass 
            _concatenated_datasets = datasets[key].values
        else:
            # other pass
            try:
                _concatenated_datasets = np.concatenate([_concatenated_datasets,
                                                         datasets[key].to_numpy()],
                                                        axis=1)
            except ValueError as val_err:
                # catching case where nb_samples are differents
                raise ValueError('Cannot create multi view dataset: different number of samples for each modality have been detected')
        for _ in datasets[key].columns.values:
            _view_name_array.append(key)

    _header = pd.MultiIndex.from_arrays([_view_name_array, _feature_name_array],
                                        names=_header_labels)
    
    
    
    
    # 2. create multi index dataframe
    
    mulit_view_df = pd.DataFrame(_concatenated_datasets,
                                 columns = _header)
    return mulit_view_df


def save_multi_view_dataframe(dataframe: pd.DataFrame, file_name: str):
    dataframe.to_csv(file_name)
    
def load_multi_view_dataframe(file_name: str) -> pd.DataFrame:
    df = pd.read_csv(file_name, delimiter=',', index_col=0, header=[0,1])
    return df




In [3]:
np.random.seed(100)

D_i = [15, 8, 10]
#nb_group = 2
n_centers = 3
testing_samples = 40

n_components_generated = 4

In [6]:



# initializing PPCA variables
sigma2_gen1, sigma2_gen2, sigma2_gen3 = 2, 1, 3
W_gen1 = np.random.uniform(-10, 10, (D_i[0], n_components_generated))
W_gen2 = np.random.uniform(-5, 5, (D_i[1], n_components_generated))
W_gen3 = np.random.uniform(-15, 15, (D_i[2], n_components_generated))
mu_gen1 = np.random.uniform(-10, 10, D_i[0])
mu_gen2 = np.random.uniform(-5, 5, D_i[1])
mu_gen3 = np.random.uniform(-15, 15, D_i[2])

#a_g_gen = np.concatenate((np.zeros((1, n_components_generated)),
#                          np.random.uniform(-10, 10, (nb_group - 1, n_components_generated))))

W = [W_gen1,W_gen2,W_gen3]
mu = [mu_gen1,mu_gen2,mu_gen3]
sigma = [sigma2_gen1,sigma2_gen2,sigma2_gen3]

# absent_views contains as key the id of a center in which we want to simulate absent views,
# and as argumet the id of the missing view. 
absent_views = {'2': 2}

for i in range(n_centers):
    # N_g = np.array([np.random.randint(25,300) for _ in range(nb_group)]) 
    # g_ind = np.concatenate((np.zeros(1, dtype=np.int64), np.cumsum(N_g)))
    N = np.random.randint(50,600)
    x_n_gen = sample_x_n(N, n_components_generated, random_state=150)  # randomly generate a Gaussian
    # dataset
    #Y = []
    Y = {}
    for d in range(len(D_i)):
        y_t = generate_data(N, W[d], mu[d], sigma[d], x_n_gen, view = d+1, random_state=250)
        if ((str(i+1) in absent_views.keys()) \
            and (type(absent_views[str(i+1)])== int) \
            and (absent_views[str(i+1)]==d+1)):
            absent_views.update({str(i+1): y_t})
            y_abs=pd.DataFrame(np.nan, index = np.arange(N), \
                               columns = [f'var_{d+1},{i + 1}' for i in range(D_i[d])])
            #Y.append(y_abs)
            Y['view_' + str(d+1)] = y_abs
        else:
            # Y.append(y_t)
            Y['view_' + str(d+1)] = y_t

    #gr = []
    #for g in range(nb_group):
    #   gr += [int(g) for _ in range(N_g[g])]
    #gr = pd.Series(gr)
    #Y['Label'] = pd.DataFrame(gr, columns=['Labels'])

###
# the output will be a list of n_centers dataset containing: dataframe for each centers,of 
# different dimensions
# 
    #t_i = pd.concat(Y, axis=1)
    
    #t_i.columns.values[-1] = 'Label'
    t_i = create_multi_view_dataframe(Y)
    # t_i.to_csv('../../data/PPCA/ppca-' + str(i+1) + '.csv',sep=',')
    t_i.to_csv('== Local path to node' + str(i+1) + '.csv',sep=',')
    #np.savetxt('== Local path to node' + str(i+1) + '.csv',t_i,delimiter=',')
               
# building the test dataset
# N_g_test = np.array([testing_samples//2,testing_samples//2])
# g_ind_test = np.concatenate((np.zeros(1, dtype=np.int64), np.cumsum(N_g_test)))
N_test = testing_samples
x_n_gen = sample_x_n(N_test, n_components_generated, random_state=150)
Y_test = {}
for d in range(len(D_i)):
    y_t = generate_data(N_test, W[d], mu[d], sigma[d], x_n_gen, view = d+1, random_state=250)
    Y_test['view_' + str(d+1)] = y_t

#gr_test = [0 for _ in range(N_g_test[0])]+[1 for _ in range(N_g_test[1])]
#gr_test = pd.Series(gr_test)
#Y_test['Label'] = pd.DataFrame(gr_test, columns=['Labels'])

t_test = pd.concat(Y_test, axis=1)

## Start the network and setting the client up
Before running this notebook:
1. You should start the network from fedbiomed-network, as detailed in :
https://gitlab.inria.fr/fedbiomed/fedbiomed

2. You need to configure at least 2 nodes: <br/>
* **Node 1 :** `./scripts/fedbiomed_run node add`
  * Select option 1 to add a csv file to the client
  * Choose the name, tags and description of the dataset (you can write 'ppca_data' always and it will be good)
  * Pick the .csv file where you stored t_0.
  * Check that your data has been added in node 1 by executing `./scripts/fedbiomed_run node list`
  * Run the node using `./scripts/fedbiomed_run node start`. <br/>

* **Node 2 :** Open a second terminal and run ./scripts/fedbiomed_run node add config n2.ini
  * Select option 1 to add a csv file to the client
  * Choose the name, tags and description of the dataset (you can write 'ppca_data' always and it will be good)
  * Pick the .csv file where you stored t_1.
  * Check that your data has been added in node 2 by executing `./scripts/fedbiomed_run node config n2.ini list`
  * Run the node using `./scripts/fedbiomed_run node config n2.ini start`.
  


* **Node 3 :** Open a third terminal and run ./scripts/fedbiomed_run node add config n3.ini
  * Select option 1 to add a csv file to the client
  * Choose the name, tags and description of the dataset (you can write 'ppca_data' always and it will be good)
  * Pick the .csv file where you stored t_2.
  * Check that your data has been added in node 2 by executing `./scripts/fedbiomed_run node config n3.ini list`
  * Run the node using `./scripts/fedbiomed_run node config n3.ini start `.

 Wait until you get `Connected with result code 0`. it means node is online.


In [7]:
%load_ext autoreload
%autoreload 2

In [8]:
import numpy as np
from fedbiomed.researcher.environ import TMP_DIR
import tempfile
tmp_dir_model = tempfile.TemporaryDirectory(dir=TMP_DIR+'/')
model_file = tmp_dir_model.name + '/fed_mv_ppca.py'

Hereafter the template of the class you should provide to Fedbiomed :
       
**training_data** : you must return here a tuple (X,X_k,ViewsX,y) or (X,X_k,ViewsX). Note that all centers should provide a dataset with the same view-specific columns. If in a specific center a view has not been observed, then the corresponding columns will be filled of nan. The training_data method take care of identifying view-specific sub-datasets and collecting information concerning non-available observations. Data can also been normalized here.

In [9]:
%%writefile "$model_file"

from fedbiomed.common.ppca import PpcaPlan

import numpy as np
import pandas as pd


class Fed_MV_PPCA(PpcaPlan):
    def __init__(self, kwargs):
        super(Fed_MV_PPCA, self).__init__(kwargs)
        deps = ['import numpy as np', 
               'import pandas as pd']
        self.add_dependency(deps)
        self.multi_view = True
    
    def training_data(self):
        """
            Perform in this method all data reading and data transformations you need.
            At the end you should provide a tuple (X_obs,Xk,ViewsX,y), where: 
            X_obs is the training dataset, 
            Xk is a list containing the k-specific dataframe if it exists or 'NaN' otherwise,
            ViewsX is the indicator function for observed views (ViewsX[k]=1 if view k is observed, 0 otherwise)
            y the corresponding labels (optional)
            The dataset is normalized using min max scaler if model_args['norm'] is true
            Note: since labels are not needed for the optimization, 
            training_data can also simply return (X_obs,Xk,ViewsX)
            :raise NotImplementedError if researcher do not implement this method.
        """
        
        #dataset = pd.read_csv(self.dataset_path,delimiter=',', index_col=0)
        #dataset = self.load_multi_view_dataframe(self.dataset_path)
        #X = dataset.iloc[:,:-1]
        #y = dataset[dataset.columns[-1]]
        X = self.load_multi_view_dataframe(self.dataset_path)
        
        # Xk is a list contaning the view-specific local datasets
        Xk = []
        Xk_obs = []
        ViewsX = []
        for k in range(self.K):
            if ((self.views_id[k] not in X) or (X[self.views_id[k]].isnull().values.any())):
                Xk.append(np.nan)
                #Xk.append('NaN')
                ViewsX.append(0)
            else:
                # if norm = true, data are normalized with min max scaler
                # X_k = X.iloc[:, ind:ind + self.dim_views[k]]
                X_k = X[self.views_id[k]]
                if self.is_norm:
                    X_k = self.normalize_data(X_k)                
                Xk.append(X_k)
                Xk_obs.append(X_k)                
                ViewsX.append(1)
            
        
        # The entire dataset is re-built without empty columns
        #Xk_obs = [item for item in Xk if item is not np.nan]
        # Xk_obs = [item for item in Xk if type(item) is not str]
        X_obs = pd.concat(Xk_obs, axis=1)
        
        return (X_obs,Xk,ViewsX)
    
    def load_multi_view_dataframe(self, file_name: str) -> pd.DataFrame:
        df = pd.read_csv(file_name, delimiter=',', index_col=0, header=[0,1])
        return df
    

Writing /Users/balelli/ownCloud/INRIA_EPIONE/FedBioMed/fedbiomed/var/tmp/tmpftbo2e4w/fed_mv_ppca.py


**model_args** is a dictionary containing the mv-ppca model arguments: the total number of views across all datasets (tot_views), the dimension of each view (dim_views), the latent space size (n_components), and a boolean (norm) for data preprocessing. Additionaly, the researcher can provide priors for one ore more global parameters.

**training_args** contains here the number of local iterations for EM/MAP. 

In [10]:
views_id = ['view_1','view_2','view_3']
tot_views = 3
dim_views = [15, 8, 10]
n_components = 4
norm = True

model_args = {'views_id': views_id,'tot_views': tot_views, 'dim_views': dim_views, 'n_components': n_components, 'is_norm': norm}

# better to increase log interval if number of iteration is higher
training_args = {'n_iterations': 15, 'log_interval' : 1} #


tags =  ['ppca_data']
rounds = 5

In [11]:
from fedbiomed.researcher.experiment import Experiment
from fedbiomed.researcher.aggregators.ppca_aggregator import MLaggregator



# select nodes pr into task <Task pending name='Task-27' coro=<HTTP1ServerConnection._server_request_loop() running at /user/ybouilla/home/.conda/envs/fedbiomed-researcher/lib/python3.9/site-packages/tornado/http1connection.py:823> wait_for=<Future finished result=b'GET /kernel...6bd7"\r\n\r\n'> cb=[IOLoop.add_future.<locals>.<lambda>() at /user/ybouilla/home/.conda/envs/fedbiomed-researcher/lib/python3.9/site-packages/tornado/ioloop.py:688]> while another task <Task pending name='Task-2' coro=<KernelManager._async_start_kernel() running at /user/ybouilla/home/.conda/envs/fedbiomed-researcher/lib/python3.9/site-packages/jupyter_carticiping to this experiment
exp = Experiment(tags=tags,
                 #clients=None,
                 model_path=model_file,
                 model_args=model_args,
                 model_class='Fed_MV_PPCA',
                 training_args=training_args,
                 rounds=rounds,
                 aggregator=MLaggregator(),
                 client_selection_strategy=None,
                 tensorboard=True)

2021-11-05 16:57:51,986 fedbiomed INFO - Messaging researcher_1d8e4b13-c8cc-4a47-a8db-d6b3a2e64cce successfully connected to the message broker, object = <fedbiomed.common.messaging.Messaging object at 0x15740b310>
2021-11-05 16:57:52,114 fedbiomed INFO - Searching dataset with data tags: ['ppca_data'] for all nodes
2021-11-05 16:57:52,169 fedbiomed INFO - log from: node_5a749bac-ebb6-4acc-ae7b-fdf91cbec87d - DEBUG Message received: {'researcher_id': 'researcher_1d8e4b13-c8cc-4a47-a8db-d6b3a2e64cce', 'tags': ['ppca_data'], 'command': 'search'}
2021-11-05 16:57:52,193 fedbiomed INFO - log from: node_6909f8bc-5d7c-4b4e-9a67-651de68ae2c9 - DEBUG Message received: {'researcher_id': 'researcher_1d8e4b13-c8cc-4a47-a8db-d6b3a2e64cce', 'tags': ['ppca_data'], 'command': 'search'}
2021-11-05 16:57:52,508 fedbiomed INFO - log from: node_77fbcf1f-c0fc-4730-9f5a-85b6ffa6bb91 - DEBUG Message received: {'researcher_id': 'researcher_1d8e4b13-c8cc-4a47-a8db-d6b3a2e64cce', 'tags': ['ppca_data'], 'comman

In [12]:
from fedbiomed.researcher.requests import Requests

req = Requests()
datasets = req.list(verbose=True)

2021-11-05 16:59:20,522 fedbiomed INFO - Listing available datasets in all nodes... 
2021-11-05 16:59:20,560 fedbiomed INFO - log from: node_5a749bac-ebb6-4acc-ae7b-fdf91cbec87d - DEBUG Message received: {'researcher_id': 'researcher_1d8e4b13-c8cc-4a47-a8db-d6b3a2e64cce', 'command': 'list'}
2021-11-05 16:59:20,569 fedbiomed INFO - log from: node_6909f8bc-5d7c-4b4e-9a67-651de68ae2c9 - DEBUG Message received: {'researcher_id': 'researcher_1d8e4b13-c8cc-4a47-a8db-d6b3a2e64cce', 'command': 'list'}
2021-11-05 16:59:20,575 fedbiomed INFO - log from: node_77fbcf1f-c0fc-4730-9f5a-85b6ffa6bb91 - DEBUG Message received: {'researcher_id': 'researcher_1d8e4b13-c8cc-4a47-a8db-d6b3a2e64cce', 'command': 'list'}
2021-11-05 16:59:30,543 fedbiomed INFO - 
 Node: node_5a749bac-ebb6-4acc-ae7b-fdf91cbec87d | Number of Datasets: 1 
+--------+-------------+---------------+---------------+-----------+--------------+
| name   | data_type   | tags          | description   | shape     | multi_view   |
|        |

In [None]:
from fedbiomed.researcher.environ import TENSORBOARD_RESULTS_DIR

In [None]:
%load_ext tensorboard

Following cell will launch tensorboard on `TENSORBOARD_RESULTS_DIR`. Since the experiment is not started, it won't show any results. After runing experiment, you can click refresh button to see changes. 

In [None]:
tensorboard --logdir "$TENSORBOARD_RESULTS_DIR"

In [None]:
# start federated training
exp.run()

In [None]:
print("\nList the training rounds : ", exp.aggregated_params.keys())


print("\nAccess the federated params for the last training round :")
print("\t- params_path: ", exp.aggregated_params[rounds - 1]['params_path'])
print("\t- parameter data: ", exp.aggregated_params[rounds - 1]['params'].keys())

## Test

Herafter we test the performance of the aggregated parameters on a test dataset. In particular, for each round we use the global parameters to evaluate the mean absolute error and the separation in the latent space using LDA. Note that we have already defined the test dataset at the beginning of this notebook.

In [None]:

import numpy as np
import pandas as pd
from numpy.linalg import solve

from sklearn.metrics import confusion_matrix
from sklearn.metrics import mean_absolute_error




def eval_MB(Wk, q, D_i, K, Sigma2, ViewsX):
    """
    Computes matrices M:=inv(I_q+sum_k Wk.TWk/sigma2k) and B:= [W1.T/sigma2K,...,W1.T/sigma2K].
    :param Wk: list of matrices (d_k x q)
    :param Sigma2: list of float > 0
    :return np.arrays
    """
    index = ViewsX.index(1)

    M1 = Wk[index].reshape(D_i[index], q).T.dot(Wk[index].reshape(D_i[index],q)) / Sigma2[index]
    B = Wk[index].reshape(D_i[index], q).T / Sigma2[index]
    for k in range(index + 1, K):
        if ViewsX[k] == 1:
            M1 += Wk[k].reshape(D_i[k], q).T.dot(Wk[k].reshape(D_i[k],q)) / Sigma2[k]
            B = np.concatenate((B, (Wk[k].reshape(D_i[k], q)).T / Sigma2[k]), axis=1)

    M = solve(np.eye(q) + M1,np.eye(q))

    return M, B

def concat_params(park, K, ViewsX):
    """
    This function concatenates parameters from a list
    :param park: list of vectors/matrices to concatenate
    :return np.array
    """
    index = ViewsX.index(1)

    par = park[index]
    for k in range(index + 1, K):
        if ViewsX[k] == 1:
            par = np.concatenate((par, park[k]), axis=0)

    return par
[]
def simu_latent(q,dataset,ViewsX,global_params):
    """
    This function allows sampling of x_n (latent variables) from the posterior distribution 
    (with global parameters).
    :return pandas dataframe
    """
    d = dataset.shape[1]
    N = dataset.shape[0]
    K = len(ViewsX)

    mu = concat_params(global_params['tilde_muk'], K, ViewsX)
    M, B = eval_MB(global_params['tilde_Wk'], q, D_i, K, global_params['tilde_Sigma2k'],ViewsX)

    Xn = [(M.dot(B).dot(dataset.iloc[n].values.reshape(d, 1) - mu)).reshape(1, q) for n in range(N)]

    df = pd.DataFrame(np.vstack(Xn), index=dataset.index)

    return df

def MAE(dataset,ViewsX,q,global_params):
    """
    This function evaluates the MAE using global parameters
    :return float
    """    
    d = dataset.shape[1]
    N = dataset.shape[0]
    K = len(ViewsX)
    
    mu = concat_params(global_params['tilde_muk'], K, ViewsX)
    W = concat_params(global_params['tilde_Wk'], K, ViewsX)
    M, B = eval_MB(global_params['tilde_Wk'], q, D_i, K, global_params['tilde_Sigma2k'],ViewsX)
    
    T_true = dataset.values.tolist()

    T_pred = []
    for n in range(N):
        Xng = (M.dot(B).dot(dataset.iloc[n].values.reshape(d, 1) - mu)).reshape(q, 1)
        T_pred.append((W.dot(Xng) + mu).reshape(d))
    
    MAE = mean_absolute_error(T_true, T_pred)
    return MAE


In [None]:
from fedbiomed.common.ppca import PpcaPlan

ppca = PpcaPlan(model_args)
ppca.is_multi_view = True

In [None]:
# Test datasetnp.random.seed(100)

D_i = [15, 8, 10]
nb_group = 2
n_centers = 3
testing_samples = 40

n_components_generated = 4


def test_data(ppca, dataset,norm,K,dim_views):
        """
            Equivalent to training_data, for the test dataset
        """
        X = dataset.iloc[:,:-1]
        y = dataset[dataset.columns[-1]]
        
        # Xk is a list contianing the view-specific local datasets
        Xk = []
        ViewsX = []
        ind = 0
        for k in range(K):
            if X.iloc[:, ind].isnull().values.any():
                Xk.append(np.nan)
                ViewsX.append(0)
            else:
                # if norm = true, data are normalized with min max scaler
                X_k = X.iloc[:, ind:ind + dim_views[k]]
                if norm:
                    
                    X_k = ppca.normalize_data(X_k)
                Xk.append(X_k)
                ViewsX.append(1)
            ind += dim_views[k]
        
        # The entire dataset is re-built without empty columns
        Xk_obs = [item for item in Xk if item is not np.nan]
        X_obs = pd.concat(Xk_obs, axis=1)
        
        return (X_obs,Xk,ViewsX,y)

In [None]:
from sklearn import preprocessing

for r in range(1,rounds):
    global_params = exp.aggregated_params[r]['params']
    ######## Train data
    MAE_train = []
    Latent_Train = pd.DataFrame()
    Label_Train = pd.Series(dtype='int64')
    for c in range(n_centers):
        dataset_c = load_multi_view_dataframe('== Local path to node' + str(c+1) + '.csv')
        
        X_obs_c,Xk_c,ViewsX_c,y_c = test_data(ppca, dataset_c,norm,tot_views,dim_views)
        # Dataframe of latent space for LDA
        Latent_Train = Latent_Train.append(simu_latent(n_components,X_obs_c,ViewsX_c,global_params))
        Label_Train = Label_Train.append(y_c)
        # MAE Train
        MAE_train.append(MAE(X_obs_c,ViewsX_c,n_components,global_params))

    

    ######## Test data
    
    X_obs_test,Xk_test,ViewsX_test,y_test = test_data(ppca, t_test,norm,tot_views,dim_views)

    Latent_Test = pd.DataFrame()
    Label_Test = pd.Series(dtype='int64')

    # Dataframe of latent space for LDA
    Latent_Test = Latent_Test.append(simu_latent(n_components,X_obs_test,ViewsX_test,global_params))
    Label_Test = Label_Test.append(y_test)
    # MAE Test
    MAE_test = MAE(X_obs_test,ViewsX_test,n_components,global_params)


    print('Round {}:'.format(r))
    print('MAE train (mean,std) = ({:.4f},{:.4f}) \
    \t MAE test = {:.4f} \
    '.format(np.mean(np.array(MAE_train)), \
      np.std(np.array(MAE_train)), MAE_test))