In [1]:
import pandas as pd
import os
import numpy as np

import Utils as ut
import CortesAlignmentFile as ca
import mySampler as ms
 
from sklearn.model_selection import StratifiedShuffleSplit

from threading import Thread, Lock

  from collections import Sequence


## Dataset Configuration

In [2]:
d_clinical = pd.read_csv(os.path.join('data', 'dataset_clinical_cleaned.csv'))
d_genetic = pd.read_csv(os.path.join('data', 'dataset_genetic_cleaned_noOHE.csv'))
d_vampire = pd.read_csv(os.path.join('data', 'dataset_vampire_cleaned.csv'))
outputs = pd.read_csv(os.path.join('data', 'outputs_cleaned.csv'))

In [4]:
d_clinical

Unnamed: 0,c_dbp,c_sbp,chol,dbp,dur_diab,e40,e41,e42,e_age,eversmoker,...,gh,hdl,pre,sbp,ther0,ther1,ther2,ther3,ther4,trig
0,74.000000,144.50000,4.225000,74.0,7.857632,1.0,-1.0,-1.0,80.720055,1,...,7.000000,1.620,1,144.5,1.0,-1.0,-1.0,-1.0,-1.0,1.855000
1,72.000000,138.00000,4.780000,72.0,20.824093,1.0,-1.0,-1.0,75.028061,1,...,7.000000,2.140,-1,138.0,1.0,-1.0,-1.0,-1.0,-1.0,0.930000
2,87.779999,151.78000,4.170000,81.0,9.987680,1.0,-1.0,-1.0,54.362766,1,...,7.350000,1.010,-1,146.0,-1.0,-1.0,-1.0,1.0,-1.0,4.460000
3,81.260002,142.25999,4.340000,79.0,4.479124,1.0,-1.0,-1.0,65.670090,1,...,7.000000,1.230,-1,140.0,-1.0,1.0,-1.0,-1.0,-1.0,2.160000
4,78.260002,139.25999,4.275000,76.0,3.181383,-1.0,1.0,-1.0,62.570843,1,...,7.100000,1.660,-1,137.0,-1.0,1.0,-1.0,-1.0,-1.0,1.090000
5,73.260002,152.52000,4.110000,71.0,18.811773,1.0,-1.0,-1.0,78.250511,1,...,9.197738,1.220,-1,148.0,-1.0,-1.0,1.0,-1.0,-1.0,2.430000
6,87.019997,161.52000,5.320000,82.5,5.062286,1.0,-1.0,-1.0,59.049965,1,...,6.200000,1.270,-1,157.0,-1.0,-1.0,1.0,-1.0,-1.0,1.820000
7,80.000000,143.00000,3.820000,80.0,2.154689,-1.0,1.0,-1.0,84.457222,1,...,6.500000,1.520,-1,143.0,1.0,-1.0,-1.0,-1.0,-1.0,1.765000
8,81.000000,144.00000,4.225000,81.0,6.310746,1.0,-1.0,-1.0,60.016426,1,...,5.900000,1.915,-1,144.0,1.0,-1.0,-1.0,-1.0,-1.0,1.250000
9,76.000000,134.00000,4.495000,76.0,8.506502,1.0,-1.0,-1.0,56.881588,1,...,7.600000,1.175,-1,134.0,1.0,-1.0,-1.0,-1.0,-1.0,1.360000


In [3]:
C = d_clinical.values
G = d_genetic.values
V = d_vampire.values

In [4]:
y_d = outputs["dement_fail"].values
y_c = outputs["cvd_fail"].values

In [5]:
tr_idx, ts_idx = next(StratifiedShuffleSplit(n_splits=1, test_size=0.25).split(C, y_d))

In [6]:
C_ = C[tr_idx]
C_test = C[ts_idx]
G_ = G[tr_idx]
G_test = G[ts_idx]
V_ = V[tr_idx]
V_test = V[ts_idx]

In [7]:
y_d_ = y_d[tr_idx]
y_d_test = y_d[ts_idx]

y_c_ = y_c[tr_idx]
y_c_test = y_c[ts_idx]

In [8]:
ds_list = [C_, G_, V_]
ds_test = [C_test, G_test, V_test]
ds_names = ['clinic', 'genetic', 'vampire']

## Kernel Definition

In [9]:
kernel_names_3 = ['laplacian', 'gaussian']
kernel_type_3 = [{'laplacian':[0.2, 0.6], 'gaussian':[0.3, 0.6]},
               {'laplacian':[0.4, 0.9], 'gaussian':[0.5, 1]}]

In [10]:
kernel_names_4 = ['linear', 'gaussian']
kernel_type_4 = [{'linear':[1], 'gaussian':[0.3, 0.6]},
               {'linear':[1], 'gaussian':[0.5, 1]}]

In [11]:
kernel_names_5 = ['polynomial', 'gaussian']
kernel_type_5 = [{'polynomial':[2, 7], 'gaussian':[0.3, 0.6]},
               {'polynomial':[3, 5], 'gaussian':[0.5, 1]}]

In [12]:
kernel_names_6 = ['sigmoid', 'gaussian']
kernel_type_6 = [{'sigmoid':[0.2, 0.6], 'gaussian':[0.3, 0.6]},
               {'sigmoid':[0.4, 0.9], 'gaussian':[0.5, 1]}]

## Other shared parameters initialization

In [13]:
estimator = ca.centeredKernelAlignment
lock_dementia = Lock()
lock_cardio = Lock()
valid_fold = 3
threads = []
pen_params = [0.5, 0.7, 0.9, 1.3]

## Thread

In [14]:
def child(sampler,estimator,ds_list,ds_test,y_d_,y_d_test,y_c_,y_c_test,lock_dementia,lock_cardio,valid_fold,exclusion_list,verbose,approach):

    #DEMENTIA
    
    #Laplacian - Gaussian
    result1 = sampler.sample(kernel_type_3, estimator, ds_list, y_d_, valid_fold = valid_fold, verbose=verbose)
    w_dict, w_list, lamb_list, sparsity = result1.votingOverCA(ds_names, kernel_names_3)
    ut.testConfigurations(estimator, y_d_, y_d_test, w_list, ds_list, ds_test, kernel_names_3, lamb_list, sparsity, 'classification', lock_dementia, fileToWrite = 'results_temp/Dementia_test.txt', header = 'Dementia Laplacian - Gaussian \n' + approach + '\n', normalize = sampler.normalize_kernels, verbose=verbose)
    result1.performancesFeatures(fileToWrite = 'results_temp/Dementia_train.txt', header = '\nDementia Laplacian - Gaussian\n' + approach + '\n', lock = lock_dementia)
    
    #Linear - Gaussian
    result1 = sampler.sample(kernel_type_4, estimator, ds_list, y_d_, valid_fold = valid_fold, verbose=verbose)
    w_dict, w_list, lamb_list, sparsity = result1.votingOverCA(ds_names, kernel_names_4)
    ut.testConfigurations(estimator, y_d_, y_d_test, w_list, ds_list, ds_test, kernel_names_4, lamb_list, sparsity, 'classification', lock_dementia, fileToWrite = 'results_temp/Dementia_test.txt', header = 'Dementia Linear - Gaussian \n' + approach + '\n', normalize = sampler.normalize_kernels, verbose=verbose)
    result1.performancesFeatures(fileToWrite = 'results_temp/Dementia_train.txt', header = '\nDementia Linear - Gaussian\n' + approach + '\n', lock = lock_dementia)
    
    #Polynomial - Gaussian
    result1 = sampler.sample(kernel_type_5, estimator, ds_list, y_d_, valid_fold = valid_fold, verbose=verbose)
    w_dict, w_list, lamb_list, sparsity = result1.votingOverCA(ds_names, kernel_names_5)
    ut.testConfigurations(estimator, y_d_, y_d_test, w_list, ds_list, ds_test, kernel_names_5, lamb_list, sparsity, 'classification', lock_dementia, fileToWrite = 'results_temp/Dementia_test.txt', header = 'Dementia Polynomial - Gaussian \n' + approach + '\n', normalize = sampler.normalize_kernels, verbose=verbose)
    result1.performancesFeatures(fileToWrite = 'results_temp/Dementia_train.txt', header = '\nDementia Polynomial - Gaussian\n' + approach + '\n', lock = lock_dementia)
    
    #Sigmoid - Gaussian
    result1 = sampler.sample(kernel_type_6, estimator, ds_list, y_d_, valid_fold = valid_fold, verbose=verbose)
    w_dict, w_list, lamb_list, sparsity = result1.votingOverCA(ds_names, kernel_names_6)
    ut.testConfigurations(estimator, y_d_, y_d_test, w_list, ds_list, ds_test, kernel_names_6, lamb_list, sparsity, 'classification', lock_dementia, fileToWrite = 'results_temp/Dementia_test.txt', header = 'Dementia Sigmoid - Gaussian \n' + approach + '\n', normalize = sampler.normalize_kernels, verbose=verbose)
    result1.performancesFeatures(fileToWrite = 'results_temp/Dementia_train.txt', header = '\nDementia Sigmoid - Gaussian\n' + approach + '\n', lock = lock_dementia)
    
    # CARDIO
    
    #Laplacian - Gaussian
    result1 = sampler.sample(kernel_type_3, estimator, ds_list, y_c_, valid_fold = valid_fold, verbose=verbose)
    w_dict, w_list, lamb_list, sparsity = result1.votingOverCA(ds_names, kernel_names_3)
    ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names_3, lamb_list, sparsity, 'classification', lock_cardio, fileToWrite = 'results_temp/Cardio_test.txt', header = 'Cardio Laplacian - Gaussian \n' + approach + '\n', normalize = sampler.normalize_kernels, verbose=verbose)
    result1.performancesFeatures(fileToWrite = 'results_temp/Cardio_train.txt', header = '\nCardio Laplacian - Gaussian\n' + approach + '\n', lock = lock_cardio)
    
    #Linear - Gaussian
    result1 = sampler.sample(kernel_type_4, estimator, ds_list, y_c_, valid_fold = valid_fold, verbose=verbose)
    w_dict, w_list, lamb_list, sparsity = result1.votingOverCA(ds_names, kernel_names_4)
    ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names_4, lamb_list, sparsity, 'classification', lock_cardio, fileToWrite = 'results_temp/Cardio_test.txt', header = 'Cardio Linear - Gaussian \n' + approach + '\n', normalize = sampler.normalize_kernels, verbose=verbose)
    result1.performancesFeatures(fileToWrite = 'results_temp/Cardio_train.txt', header = '\nCardio Linear - Gaussian\n' + approach + '\n', lock = lock_cardio)
    
    #Polynomial - Gaussian
    result1 = sampler.sample(kernel_type_5, estimator, ds_list, y_c_, valid_fold = valid_fold, verbose=verbose)
    w_dict, w_list, lamb_list, sparsity = result1.votingOverCA(ds_names, kernel_names_5)
    ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names_5, lamb_list, sparsity, 'classification', lock_cardio, fileToWrite = 'results_temp/Cardio_test.txt', header = 'Cardio Polynomial - Gaussian \n' + approach + '\n', normalize = sampler.normalize_kernels, verbose=verbose)
    result1.performancesFeatures(fileToWrite = 'results_temp/Cardio_train.txt', header = '\nCardio Polynomial - Gaussian\n' + approach + '\n', lock = lock_cardio)
    
    #Sigmoid - Gaussian
    result1 = sampler.sample(kernel_type_6, estimator, ds_list, y_c_, valid_fold = valid_fold, verbose=verbose)
    w_dict, w_list, lamb_list, sparsity = result1.votingOverCA(ds_names, kernel_names_6)
    ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names_6, lamb_list, sparsity, 'classification', lock_cardio, fileToWrite = 'results_temp/Cardio_test.txt', header = 'Cardio Sigmoid - Gaussian \n' + approach + '\n', normalize = sampler.normalize_kernels, verbose=verbose)
    result1.performancesFeatures(fileToWrite = 'results_temp/Cardio_train.txt', header = '\nCardio Sigmoid - Gaussian\n' + approach + '\n', lock = lock_cardio)
    

## L2 Penalty, Centering, Normalization

In [15]:
sampler = ms.mySampleWrapper(pen_params, n_splits=3, test_size=.25, sparsity = False, centering = True, normalizing = True)

In [16]:
child(sampler, estimator, ds_list, ds_test, y_d_, y_d_test, y_c_, y_c_test, lock_dementia, lock_cardio, valid_fold, [[5,6,7,9,10,13,15,16,17,18,19], list(range(G.shape[1]-3)), []], False, 'L2 - Centering - Normalizing')

## L2 Penalty, Centering, K-Normalization

In [17]:
sampler = ms.mySampleWrapper(pen_params, n_splits=3, test_size=0.25, sparsity = False, centering = True, normalize_kernels = True)

In [18]:
child(sampler, estimator, ds_list, ds_test, y_d_, y_d_test, y_c_, y_c_test, lock_dementia, lock_cardio, valid_fold, [[5,6,7,9,10,13,15,16,17,18,19], list(range(G.shape[1]-3)), []], False, 'L2 - Centering - K_Normalizing')

## L1 Penalty, Centering, Normalization

In [15]:
sampler = ms.mySampleWrapper(pen_params, n_splits=3, test_size=.25, sparsity = True, centering = True, normalizing = True)

In [16]:
child(sampler, estimator, ds_list, ds_test, y_d_, y_d_test, y_c_, y_c_test, lock_dementia, lock_cardio, valid_fold, [[5,6,7,9,10,13,15,16,17,18,19], list(range(G.shape[1]-3)), []], False, 'L1 - Centering - Normalizing')

## L1 Penalty, Centering, K-Normalization

In [17]:
sampler = ms.mySampleWrapper(pen_params, n_splits=3, test_size=.25, sparsity = True, centering = True, normalize_kernels = True)

In [18]:
child(sampler, estimator, ds_list, ds_test, y_d_, y_d_test, y_c_, y_c_test, lock_dementia, lock_cardio, valid_fold, [[5,6,7,9,10,13,15,16,17,18,19], list(range(G.shape[1]-3)), []], False, 'L1 - Centering - K_Normalizing')