In [1]:
import pandas as pd
import os
import numpy as np

import Utils as ut
import CortesAlignmentFile as ca
import mySampler as ms
 
from sklearn.model_selection import StratifiedShuffleSplit

from threading import Thread, Lock

## Dataset Configuration

In [2]:
d_clinical = pd.read_csv(os.path.join('data', 'dataset_clinical_cleaned.csv'))
d_genetic = pd.read_csv(os.path.join('data', 'dataset_genetic_cleaned_noOHE.csv'))
d_vampire = pd.read_csv(os.path.join('data', 'dataset_vampire_cleaned.csv'))
outputs = pd.read_csv(os.path.join('data', 'outputs_cleaned.csv'))

In [3]:
C = d_clinical.values

V = d_vampire.values

In [4]:
y_d = outputs["dement_fail"].values
y_c = outputs["cvd_fail"].values

In [5]:
# COMPUTATIONAL COMPLEXITY: Reduce #samples
tr_idx, ts_idx = next(StratifiedShuffleSplit(n_splits=1, test_size=0.25).split(C, y_d))

In [6]:
C_ = C[tr_idx]
C_test = C[ts_idx]
V_ = V[tr_idx]
V_test = V[ts_idx]

In [7]:
y_d_ = y_d[tr_idx]
y_d_test = y_d[ts_idx]

y_c_ = y_c[tr_idx]
y_c_test = y_c[ts_idx]

In [8]:
ds_list = [C_, V_]
ds_test = [C_test, V_test]
ds_names = ['clinic', 'vampire']

## Kernel Definition

In [9]:
kernel_names_3 = ['laplacian', 'gaussian']
kernel_type_3 = [{'laplacian':[0.2, 0.6], 'gaussian':[0.3, 0.7]},
               {'laplacian':[0.4, 0.9], 'gaussian':[0.5, 1]}]

In [10]:
kernel_names_4 = ['linear', 'gaussian']
kernel_type_4 = [{'linear':[1], 'gaussian':[0.4, 0.7]},
               {'linear':[1], 'gaussian':[0.5, 1]}]

In [11]:
kernel_names_5 = ['polynomial', 'gaussian']
kernel_type_5 = [{'polynomial':[2, 7], 'gaussian':[0.4, 0.7]},
               {'polynomial':[3, 5], 'gaussian':[0.5, 1]}]

In [12]:
kernel_names_6 = ['sigmoid', 'gaussian']
kernel_type_6 = [{'sigmoid':[0.2, 0.6], 'gaussian':[0.3, 0.7]},
               {'sigmoid':[0.4, 0.9], 'gaussian':[0.5, 1]}]

## Other shared parameters initialization

In [13]:
estimator = ca.centeredKernelAlignment
lock_dementia = Lock()
lock_cardio = Lock()
valid_fold = 3
threads = []

## Thread

In [14]:
def child(sampler,estimator,ds_list,ds_test,y_d_,y_d_test,y_c_,y_c_test,lock_dementia,lock_cardio,valid_fold,exclusion_list,verbose,approach):

    #DEMENTIA

    #Linear - Gaussian
    result1 = sampler.sample(kernel_type_4, estimator, ds_list, y_d_, valid_fold = valid_fold, verbose=verbose)
    w_dict, w_list, lamb_list, sparsity = result1.votingOverCA(ds_names, kernel_names_4)
    ut.testConfigurations(estimator, y_d_, y_d_test, w_list, ds_list, ds_test, kernel_names_4, lamb_list, sparsity, 'classification', lock_dementia, fileToWrite = 'results_temp/Dementia_test.txt', header = 'Dementia Linear - Gaussian \n' + approach + '\n', verbose=verbose)
    result1.performancesFeatures(fileToWrite = 'results_temp/Dementia_train.txt', header = '\nDementia Linear - Gaussian\n' + approach + '\n', lock = lock_dementia)
    

## L1 Penalty, Centering, K-Normalization

In [15]:
sampler = ms.mySampleWrapper([0.3, 0.5, 0.7], n_splits=3, test_size=.25, sparsity = True, centering = True, normalize_kernels = True)

In [16]:
t = Thread(target=child, args=(sampler, estimator, ds_list, ds_test, y_d_, y_d_test, y_c_, y_c_test, lock_dementia, lock_cardio, valid_fold, [[5,6,7,9,10,13,15,16,17,18,19], []], False, 'L1 - Centering - K Normalizing'))
t.start()
threads.append(t)

In [17]:
for t in threads:
    t.join()

print("Operations completed")

Operations completed
