# Fedbiomed Researcher to train a federated PPCA (Probabilistic PCA) model.

## Description of the exercise :

Three datasets `n1.csv` , `n2.csv` and `n3.csv` will be generated randomly using 3-views PPCA from a 4-dimensional latent space, with views dimensions [15,8,10] and 2 groups. Henceforth, we will distribute the 3 dataset to 3 distinct nodes and use Fed-mv-PPCA. In each center we check the evolution of expected LL during training.

## Data Generation

We will generate three datasets using mv-PPCA.
Then save them in a path of your choice on your machine.

In [21]:
import numpy as np
import pandas as pd
from typing import List

def sample_x_n(N:int, q:int, random_state:int=None):
    """samples from a Guassian dsitribution
    Args:
    
    :N first dimension array
    :q second dimesnion array
    """
    return np.random.RandomState(random_state).randn(N,q)

def generate_data(N_g: List[int],
                  W: np.ndarray,
                  a_g: np.ndarray,
                  mu:float,
                  sigma2:float,
                  x_n,
                  view:int,
                  random_state=None):
    """generates Gaussian dataset given several groups of data points, using the following
    gausian generative proccess (for a given view):
    
    Y = WX + mu + epsilon with epsilon ~ N(0, sigma2)
    
    where X is the latent space of size (q, n_features), Y the observation matrix, W the matrix used
    for data reconstruction
    Params:
    :N_g: List[int] number of data to generate per group (list of size number of group)
    :W: reconstruction matrix, of size (n_features, q)
    :a_g: (np.ndarray) array of size (nb_group, n_components), shifts when creating different group. 
    :mu: offset of the dataset
    :sigma2: variance used for generating
    :x_n: random variable 
    :view: (int) the given view
    :random_state:
    
    Returns:
    :Y (pd.DataFrame): synthetic dataset generated wrt given expression
    of size (n_features, n_samples)
    """
    rnd=np.random.RandomState(random_state)

    N=N_g.sum()
    d, q = W.shape
    sigma=np.sqrt(sigma2)
    G=len(N_g)

    g_ind=np.concatenate((np.zeros(1, dtype=np.int64), np.cumsum(N_g)))
    print(g_ind)

    y_n=np.empty((N, d))

    for g in range(G):
        y_n[g_ind[g]:g_ind[g+1]]= np.einsum("dq,nq->nd", W, x_n[g_ind[g]:g_ind[g+1]]+a_g[g]) + mu
        print(x_n[g_ind[g]:g_ind[g+1]])
    y_n = pd.DataFrame(data=y_n,
                     columns=[f'var_{view},{i + 1}' for i in range(d)])

    return y_n


In [22]:
np.random.seed(100)

D_i = [15, 8, 10]
nb_group = 2
n_centers = 3
testing_samples = 40

n_components_generated = 4


# initializing PPCA variables
sigma2_gen1, sigma2_gen2, sigma2_gen3 = 2, 1, 3
W_gen1 = np.random.uniform(-10, 10, (D_i[0], n_components_generated))
W_gen2 = np.random.uniform(-5, 5, (D_i[1], n_components_generated))
W_gen3 = np.random.uniform(-15, 15, (D_i[2], n_components_generated))
mu_gen1 = np.random.uniform(-10, 10, D_i[0])
mu_gen2 = np.random.uniform(-5, 5, D_i[1])
mu_gen3 = np.random.uniform(-15, 15, D_i[2])

a_g_gen = np.concatenate((np.zeros((1, n_components_generated)),
                          np.random.uniform(-10, 10, (nb_group - 1, n_components_generated))))

W = [W_gen1,W_gen2,W_gen3]
mu = [mu_gen1,mu_gen2,mu_gen3]
sigma = [sigma2_gen1,sigma2_gen2,sigma2_gen3]

# absent_views contains as key the id of a center in which we want to simulate absent views,
# and as argumet the id of the missing view. 
absent_views = {'2': 2}

for i in range(n_centers):
    N_g = np.array([np.random.randint(25,300),np.random.randint(25,300)]) # does it work with G != 2?
    # N_g = np.array([np.random.randint(25,300) for _ in range(nb_group)])?
    #g_ind = np.concatenate((np.zeros(1, dtype=np.int64), np.cumsum(N_g)))
    N = N_g.sum()
    x_n_gen = sample_x_n(N, n_components_generated, random_state=150)  # randomly generate a Gaussian
    # dataset
    Y = []
    for d in range(len(D_i)):
        y_t = generate_data(N_g, W[d], a_g_gen, mu[d], sigma[d], x_n_gen, view = d+1, random_state=250)
        if ((str(i+1) in absent_views.keys()) \
            and (type(absent_views[str(i+1)])== int) \
            and (absent_views[str(i+1)]==d+1)):
            absent_views.update({str(i+1): y_t})
            y_abs=pd.DataFrame(np.nan, index = np.arange(N_g.sum()), \
                               columns = [f'var_{d+1},{i + 1}' for i in range(D_i[d])])
            Y.append(y_abs)
        else:
            Y.append(y_t)

    gr = [int(0) for _ in range(N_g[0])]+[int(1) for _ in range(N_g[1])] # !!! BUG if G != 2
    gr = pd.Series(gr)
    Y.append(gr)

###
# the output will be a list of n_centers dataset containing: dataframe for each centers,of 
# different dimensions
# 
    t_i = pd.concat(Y, axis=1)
    t_i.columns.values[-1] = 'Label'
    t_i.to_csv('== Local path to node' + str(i+1) + '.csv',sep=',')
    #np.savetxt('== Local path to node' + str(i+1) + '.csv',t_i,delimiter=',')
               
# building the test dataset
N_g_test = np.array([testing_samples//2,testing_samples//2])
g_ind_test = np.concatenate((np.zeros(1, dtype=np.int64), np.cumsum(N_g_test)))
N_test = N_g_test.sum()
x_n_gen = sample_x_n(N_test, n_components_generated, random_state=150)
Y_test = []
for d in range(len(D_i)):
    y_t = generate_data(N_g_test, W[d], a_g_gen, mu[d], sigma[d], x_n_gen, view = d+1, random_state=250)
    Y_test.append(y_t)

gr_test = [0 for _ in range(N_g_test[0])]+[1 for _ in range(N_g_test[1])]
gr_test = pd.Series(gr_test)
Y_test.append(gr_test)

t_test = pd.concat(Y_test, axis=1)

[  0 148 215]
[[-2.31383247e-01  3.90613228e-01  3.58297467e-01  5.66244478e-01]
 [ 1.66233991e-01  1.91341213e+00  3.02902745e-01 -8.30826311e-01]
 [-6.01423295e-02 -9.42941361e-02 -1.36774478e+00 -5.92518086e-01]
 [-3.72465729e-01  1.72776709e+00  2.69285938e+00 -1.37586941e+00]
 [ 1.95731570e+00 -1.46636077e+00 -1.34942344e-01 -1.94972345e+00]
 [ 5.22374276e-01 -1.26113245e+00 -7.33598004e-02 -2.14093647e+00]
 [-7.33201872e-01 -1.71478494e+00  1.37022965e+00 -5.83354760e-04]
 [-2.67424747e+00  7.94300380e-01  8.84039507e-01  8.68761883e-01]
 [-1.83573278e-01 -1.26425175e-01 -2.73405281e-01 -1.08656921e+00]
 [ 2.33527568e-01 -7.01933604e-02  1.28957684e+00  1.18078268e+00]
 [ 7.63322273e-01  1.33704309e+00 -3.05126827e-01  6.70456271e-01]
 [-2.98222420e-01 -4.21632373e-01 -1.47005302e+00 -1.88429204e-01]
 [-2.24658843e-01 -3.43033335e-01  4.09266180e-01 -6.66287250e-01]
 [-6.64063007e-01 -2.52955360e-01  1.23729552e+00  1.78755783e-03]
 [ 1.45999923e+00 -2.04953130e-01 -5.55489928e-0

In [25]:
y_t.shape

(40, 10)

In [4]:
g_ind=np.concatenate((np.zeros(1, dtype=np.int64), np.cumsum(N_g)))
g_ind

array([  0, 206, 324])

In [10]:
x_n_gen[g_ind[g]:g_ind[g+1]].shape, W[0].shape, a_g_gen[g].shape

((40, 4), (15, 4), (4,))

In [19]:
g = 1
g_ind=np.concatenate((np.zeros(1, dtype=np.int64), np.cumsum(N_g)))

print(W[0].shape)
val = np.einsum("dq,nq->nd", W[0], x_n_gen[g_ind[g]:g_ind[g+1]]+a_g_gen[g])



(15, 4)


In [20]:
x_n_gen[g_ind[g]:g_ind[g+1]]+a_g_gen[g]

array([], shape=(0, 4), dtype=float64)

In [14]:
a_g_gen[1]

array([ 5.40179546, -2.49121505, -3.1252093 ,  3.10070412])

## Start the network and setting the client up
Before running this notebook:
1. You should start the network from fedbiomed-network, as detailed in :
https://gitlab.inria.fr/fedbiomed/fedbiomed

2. You need to configure at least 2 nodes: <br/>
* **Node 1 :** `./scripts/fedbiomed_run node add`
  * Select option 1 to add a csv file to the client
  * Choose the name, tags and description of the dataset (you can write 'sk' always and it will be good)
  * Pick the .csv file you stored the couple X[0],y[0].
  * Check that your data has been added in node 1 by executing `./scripts/fedbiomed_run node list`
  * Run the node using `./scripts/fedbiomed_run node start`. <br/>

* **Node 2 :** Open a second terminal and run ./scripts/fedbiomed_run node add config n2.ini
  * Select option 1 to add a csv file to the client
  * Choose the name, tags and description of the dataset (you can write 'sk' always and it will be good)
  * Pick the .csv file you stored the couple X[1],y[1].
  * Check that your data has been added in node 2 by executing `./scripts/fedbiomed_run node config n2.ini list`
  * Run the node using `./scripts/fedbiomed_run node config n2.ini start`.
  


 Wait until you get `Connected with result code 0`. it means node is online.


In [3]:
%load_ext autoreload
%autoreload 2

In [8]:
import numpy as np
from fedbiomed.researcher.environ import TMP_DIR
import tempfile
tmp_dir_model = tempfile.TemporaryDirectory(dir=TMP_DIR+'/')
model_file = tmp_dir_model.name + '/fed_mv_ppca.py'

Hereafter the template of the class you should provide to Fedbiomed :
       
**training_data** : you must return here a tuple (X,X_k,ViewsX,y) or (X,X_k,ViewsX). Note that all centers should provide a dataset with the same view-specific columns. If in a specific center a view has not been observed, then the corresponding columns will be filled of nan. The training_data method take care of identifying view-specific sub-datasets and collecting information concerning non-available observations. Data can also been normalized here.

In [9]:
%%writefile "$model_file"

from fedbiomed.common.ppca import PpcaPlan
import numpy as np


class Fed_MV_PPCA(PpcaPlan):
    def __init__(self, kwargs):
        super(Fed_MV_PPCA, self).__init__(kwargs)
    
    def training_data(self):
        """
            Perform in this method all data reading and data transformations you need.
            At the end you should provide a tuple (X_obs,Xk,ViewsX,y), where: 
            X_obs is the training dataset, 
            Xk is a list containing the k-specific dataframe if it exists or 'NaN' otherwise,
            ViewsX is the indicator function for observed vies (ViewsX[k]=1 if view k is observed, 0 otherwise)
            y the corresponding labels (optional)
            The dataset is normalized using min max scaler if model_args['norm'] is true
            Note: since labels are not needed for the optimization, 
            training_data can also simply return (X_obs,Xk,ViewsX)
            :raise NotImplementedError if researcher do not implement this method.
        """
        dataset = pd.read_csv(self.dataset_path,delimiter=',', index_col=0)
        X = dataset.iloc[:,:-1]
        y = dataset[dataset.columns[-1]]
        
        # Xk is a list contianing the view-specific local datasets
        Xk = []
        ViewsX = []
        ind = 0
        for k in range(self.K):
            if X.iloc[:, ind].isnull().values.any():
                Xk.append('NaN')
                ViewsX.append(0)
            else:
                # if norm = true, data are normalized with min max scaler
                X_k = self.normalize_data(X.iloc[:, ind:ind + self.dim_views[k]]) if self.norm \
                    else X.iloc[:, ind:ind + self.dim_views[k]]
                Xk.append(X_k)
                ViewsX.append(1)
            ind += self.dim_views[k]
        
        # The entire dataset is re-built without empty columns
        Xk_obs = [item for item in Xk if type(item) != str]
        X_obs = pd.concat(Xk_obs, axis=1)
        
        return (X_obs,Xk,ViewsX,y)
    

Writing /home/ybouilla/fedbiomed/var/tmp/tmpk1rgb5gu/fed_mv_ppca.py


**model_args** is a dictionary containing the mv-ppca model arguments: the total number of views across all datasets (tot_views), the dimension of each view (dim_views), the latent space size (n_components), and a boolean (norm) for data preprocessing. Additionaly, the researcher can provide priors for one ore more global parameters.

**training_args** contains here the number of local iterations for EM/MAP. 

In [10]:
tot_views = 3
dim_views = [15, 8, 10]
n_components = 4
norm = True

model_args = {'tot_views': tot_views, 'dim_views': dim_views, 'n_components': n_components, 'norm': norm}

training_args = {'n_iterations': 15}

In [11]:
from fedbiomed.researcher.experiment import Experiment
from fedbiomed.researcher.aggregators.mlaggregator import MLaggregator

tags =  ['ppca_data']
rounds = 5

# select nodes participing to this experiment
exp = Experiment(tags=tags,
                 #clients=None,
                 model_path=model_file,
                 model_args=model_args,
                 model_class='Fed_MV_PPCA',
                 training_args=training_args,
                 rounds=rounds,
                 aggregator=MLaggregator(),
                 client_selection_strategy=None)

2021-10-22 18:09:39,543 fedbiomed INFO - Messaging researcher_195e096a-732f-4f62-9986-94c5cf03ab80 successfully connected to the message broker, object = <fedbiomed.common.messaging.Messaging object at 0x7f21da476d00>
2021-10-22 18:09:39,571 fedbiomed INFO - Searching for clients with data tags: ['ppca_data']
2021-10-22 18:09:39,578 fedbiomed INFO - message received:{'researcher_id': 'researcher_195e096a-732f-4f62-9986-94c5cf03ab80', 'success': True, 'databases': [{'name': '', 'data_type': 'csv', 'tags': ['ppca_data'], 'description': '', 'shape': [351, 34], 'dataset_id': 'dataset_218a42b6-783a-42d9-9bba-3b8b75b47bb2'}], 'count': 1, 'client_id': 'client_0896ff8d-353b-4a96-bc17-f23de59f621d', 'command': 'search'}
2021-10-22 18:09:39,616 fedbiomed INFO - message received:{'researcher_id': 'researcher_195e096a-732f-4f62-9986-94c5cf03ab80', 'success': True, 'databases': [{'name': 'dd', 'data_type': 'csv', 'tags': ['ppca_data'], 'description': '', 'shape': [215, 34], 'dataset_id': 'dataset_d

In [12]:
# start federated training
exp.run()

2021-10-22 18:09:53,435 fedbiomed INFO - Sampled clients in round 0 ['client_0896ff8d-353b-4a96-bc17-f23de59f621d', 'client_3225bc32-4c70-4f7f-96c4-831ee8c4a933']
2021-10-22 18:09:53,438 fedbiomed INFO - Send message to client client_0896ff8d-353b-4a96-bc17-f23de59f621d - {'researcher_id': 'researcher_195e096a-732f-4f62-9986-94c5cf03ab80', 'job_id': '8a3939a5-a4b7-4714-93f9-1ba334d98b24', 'training_args': {'n_iterations': 15}, 'model_args': {'tot_views': 3, 'dim_views': [15, 8, 10], 'n_components': 4, 'norm': True}, 'command': 'train', 'model_url': 'http://localhost:8844/media/uploads/2021/10/22/my_model_6097cd4b-27d2-4f54-98f2-4b982d7ebfd6.py', 'params_url': 'http://localhost:8844/media/uploads/2021/10/22/my_model_9dde6da4-8284-4112-aeb6-197e541517db.pt', 'model_class': 'Fed_MV_PPCA', 'training_data': {'client_0896ff8d-353b-4a96-bc17-f23de59f621d': ['dataset_218a42b6-783a-42d9-9bba-3b8b75b47bb2']}}
2021-10-22 18:09:53,440 fedbiomed DEBUG - researcher_195e096a-732f-4f62-9986-94c5cf03ab

2021-10-22 18:10:13,760 fedbiomed DEBUG - researcher_195e096a-732f-4f62-9986-94c5cf03ab80
2021-10-22 18:10:16,663 fedbiomed INFO - message received:{'researcher_id': 'researcher_195e096a-732f-4f62-9986-94c5cf03ab80', 'job_id': '8a3939a5-a4b7-4714-93f9-1ba334d98b24', 'success': True, 'client_id': 'client_3225bc32-4c70-4f7f-96c4-831ee8c4a933', 'dataset_id': 'dataset_d1feed9a-aee3-4ee2-9108-42fdbce3ad4a', 'params_url': 'http://localhost:8844/media/uploads/2021/10/22/node_params_14296aa6-ea43-4884-b5bb-ae994180ddc5.pt', 'timing': {'rtime_training': 2.843824613999459, 'ptime_training': 10.312514669000002}, 'msg': '', 'command': 'train'}
2021-10-22 18:10:16,786 fedbiomed INFO - message received:{'researcher_id': 'researcher_195e096a-732f-4f62-9986-94c5cf03ab80', 'job_id': '8a3939a5-a4b7-4714-93f9-1ba334d98b24', 'success': True, 'client_id': 'client_0896ff8d-353b-4a96-bc17-f23de59f621d', 'dataset_id': 'dataset_218a42b6-783a-42d9-9bba-3b8b75b47bb2', 'params_url': 'http://localhost:8844/media/u

2021-10-22 18:10:43,973 fedbiomed INFO - Downloading model params after training on client_3225bc32-4c70-4f7f-96c4-831ee8c4a933 - from http://localhost:8844/media/uploads/2021/10/22/node_params_e88ba7de-6b82-4623-92d8-6fe8914306ac.pt
2021-10-22 18:10:43,989 fedbiomed INFO - Downloading model params after training on client_0896ff8d-353b-4a96-bc17-f23de59f621d - from http://localhost:8844/media/uploads/2021/10/22/node_params_714f21d7-040c-4d80-a358-ebd5696e1346.pt
2021-10-22 18:10:43,998 fedbiomed INFO - Clients that successfully reply in round 4 ['client_3225bc32-4c70-4f7f-96c4-831ee8c4a933', 'client_0896ff8d-353b-4a96-bc17-f23de59f621d']


In [13]:
print("\nList the training rounds : ", exp.aggregated_params.keys())

print("\nAccess the federated params for the last training round :")
print("\t- params_path: ", exp.aggregated_params[rounds - 1]['params_path'])
print("\t- parameter data: ", exp.aggregated_params[rounds - 1]['params'].keys())


List the training rounds :  dict_keys([0, 1, 2, 3, 4])

Access the federated params for the last training round :
	- params_path:  /home/ybouilla/fedbiomed/var/tmp/researcher_params_66edfdd3-da83-4c96-a203-0c00e5c7b734.pt
	- parameter data:  dict_keys(['tilde_muk', 'tilde_Wk', 'tilde_Sigma2k', 'Alpha', 'Beta', 'sigma_til_muk', 'sigma_til_Wk', 'sigma_til_sigma2k'])


## Test

Herafter we test the performance of the aggregated parameters on a test dataset. In particular, for each round we use the global parameters to evaluate the mean absolute error and the separation in the latent space using LDA. Note that we have already defined the test dataset at the beginning of this notebook.

In [14]:
import numpy as np
import pandas as pd
from numpy.linalg import solve
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import confusion_matrix
from sklearn.metrics import mean_absolute_error

def normalize_data(X):
    """
    This function normalize the dataset X
    :param X: pandas dataframe
    :return pandas dataframe
    """
    col_name = [col.strip() for col in list(X.columns)]
    x = X.values  # returns a numpy array
    min_max_scaler = preprocessing.MinMaxScaler()
    x_scaled = min_max_scaler.fit_transform(x)
    norm_dataset = pd.DataFrame(x_scaled, index=X.index, columns=col_name)

    return norm_dataset

def eval_MB(Wk, q, D_i, K, Sigma2, ViewsX):
    """
    Computes matrices M:=inv(I_q+sum_k Wk.TWk/sigma2k) and B:= [W1.T/sigma2K,...,W1.T/sigma2K].
    :param Wk: list of matrices (d_k x q)
    :param Sigma2: list of float > 0
    :return np.arrays
    """
    index = ViewsX.index(1)

    M1 = Wk[index].reshape(D_i[index], q).T.dot(Wk[index].reshape(D_i[index],q)) / Sigma2[index]
    B = Wk[index].reshape(D_i[index], q).T / Sigma2[index]
    for k in range(index + 1, K):
        if ViewsX[k] == 1:
            M1 += Wk[k].reshape(D_i[k], q).T.dot(Wk[k].reshape(D_i[k],q)) / Sigma2[k]
            B = np.concatenate((B, (Wk[k].reshape(D_i[k], q)).T / Sigma2[k]), axis=1)

    M = solve(np.eye(q) + M1,np.eye(q))

    return M, B

def concat_params(park, K, ViewsX):
    """
    This function concatenates parameters from a list
    :param park: list of vectors/matrices to concatenate
    :return np.array
    """
    index = ViewsX.index(1)

    par = park[index]
    for k in range(index + 1, K):
        if ViewsX[k] == 1:
            par = np.concatenate((par, park[k]), axis=0)

    return par

def simu_latent(q,dataset,ViewsX,global_params):
    """
    This function allows sampling of x_n (latent variables) from the posterior distribution 
    (with global parameters).
    :return pandas dataframe
    """
    d = dataset.shape[1]
    N = dataset.shape[0]
    K = len(ViewsX)

    mu = concat_params(global_params['tilde_muk'], K, ViewsX)
    M, B = eval_MB(global_params['tilde_Wk'], q, D_i, K, global_params['tilde_Sigma2k'],ViewsX)

    Xn = [(M.dot(B).dot(dataset.iloc[n].values.reshape(d, 1) - mu)).reshape(1, q) for n in range(N)]

    df = pd.DataFrame(np.vstack(Xn), index=dataset.index)

    return df

def MAE(dataset,ViewsX,q,global_params):
    """
    This function evaluates the MAE using global parameters
    :return float
    """    
    d = dataset.shape[1]
    N = dataset.shape[0]
    K = len(ViewsX)
    
    mu = concat_params(global_params['tilde_muk'], K, ViewsX)
    W = concat_params(global_params['tilde_Wk'], K, ViewsX)
    M, B = eval_MB(global_params['tilde_Wk'], q, D_i, K, global_params['tilde_Sigma2k'],ViewsX)

    T_true = dataset.values.tolist()

    T_pred = []
    for n in range(N):
        Xng = (M.dot(B).dot(dataset.iloc[n].values.reshape(d, 1) - mu)).reshape(q, 1)
        T_pred.append((W.dot(Xng) + mu).reshape(d))

    MAE = mean_absolute_error(T_true, T_pred)

    return MAE

In [15]:
# Test dataset
from sklearn import preprocessing

def test_data(dataset,norm,K,dim_views):
        """
            Equivalent to training_data, for the test dataset
        """
        X = dataset.iloc[:,:-1]
        y = dataset[dataset.columns[-1]]
        
        # Xk is a list contianing the view-specific local datasets
        Xk = []
        ViewsX = []
        ind = 0
        for k in range(K):
            if X.iloc[:, ind].isnull().values.any():
                Xk.append('NaN')
                ViewsX.append(0)
            else:
                # if norm = true, data are normalized with min max scaler
                X_k = normalize_data(X.iloc[:, ind:ind + dim_views[k]]) if norm \
                    else X.iloc[:, ind:ind + dim_views[k]]
                Xk.append(X_k)
                ViewsX.append(1)
            ind += dim_views[k]
        
        # The entire dataset is re-built without empty columns
        Xk_obs = [item for item in Xk if type(item) != str]
        X_obs = pd.concat(Xk_obs, axis=1)
        
        return (X_obs,Xk,ViewsX,y)

In [16]:
for r in range(1,rounds):
    global_params = exp.aggregated_params[r]['params']
    ######## Train data
    MAE_train = []
    Latent_Train = pd.DataFrame()
    Label_Train = pd.Series(dtype='int64')
    for c in range(n_centers):
        dataset_c = pd.read_csv('== Local path to node' + str(c+1) + '.csv', delimiter=',', index_col=0)
        X_obs_c,Xk_c,ViewsX_c,y_c = test_data(dataset_c,norm,tot_views,dim_views)
        # Dataframe of latent space for LDA
        Latent_Train = Latent_Train.append(simu_latent(n_components,X_obs_c,ViewsX_c,global_params))
        Label_Train = Label_Train.append(y_c)
        # MAE Train
        MAE_train.append(MAE(X_obs_c,ViewsX_c,n_components,global_params))

    lda = LinearDiscriminantAnalysis()
    X_Train_lda = lda.fit_transform(Latent_Train, Label_Train.reindex(Latent_Train.index))

    ######## Test data
    X_obs_test,Xk_test,ViewsX_test,y_test = test_data(t_test,norm,tot_views,dim_views)

    Latent_Test = pd.DataFrame()
    Label_Test = pd.Series(dtype='int64')

    # Dataframe of latent space for LDA
    Latent_Test = Latent_Test.append(simu_latent(n_components,X_obs_test,ViewsX_test,global_params))
    Label_Test = Label_Test.append(y_test)
    # MAE Test
    MAE_test = MAE(X_obs_test,ViewsX_test,n_components,global_params)
    # LDA Test
    Size_tes = Latent_Test.shape[0]
    y_pred_test = lda.predict(Latent_Test)
    conf_LDA_Test = confusion_matrix(Label_Test.reindex(Latent_Test.index), y_pred_test)
    TP = np.diag(conf_LDA_Test)
    num_classes = len(np.unique(Label_Test))
    accuracy_LDA_Test = sum(TP) / Size_tes

    print('Round {}:'.format(r))
    print('MAE train (mean,std) = ({:.4f},{:.4f}) \
    \t MAE test = {:.4f} \
    \t Accuracy in latent space = {:.2f}'.format(np.mean(np.array(MAE_train)), \
                                                 np.std(np.array(MAE_train)), MAE_test, accuracy_LDA_Test))

Round 1:
MAE train (mean,std) = (0.0585,0.0035)     	 MAE test = 0.0827     	 Accuracy in latent space = 1.00
Round 2:
MAE train (mean,std) = (0.0298,0.0090)     	 MAE test = 0.0527     	 Accuracy in latent space = 1.00
Round 3:
MAE train (mean,std) = (0.0271,0.0068)     	 MAE test = 0.0470     	 Accuracy in latent space = 1.00
Round 4:
MAE train (mean,std) = (0.0253,0.0053)     	 MAE test = 0.0430     	 Accuracy in latent space = 1.00
