In [1]:
import pandas as pd
import os
import numpy as np

import Utils as ut
import CortesAlignmentFile as ca
import mySampler as ms
 
from sklearn.model_selection import StratifiedShuffleSplit

  from collections import Sequence


## Dataset Configuration

In [2]:
d_clinical = pd.read_csv(os.path.join('data', 'dataset_clinical_cleaned.csv'))
d_genetic = pd.read_csv(os.path.join('data', 'dataset_genetic_cleaned_noOHE.csv'))
d_vampire = pd.read_csv(os.path.join('data', 'dataset_vampire_cleaned.csv'))
outputs = pd.read_csv(os.path.join('data', 'outputs_cleaned.csv'))

In [3]:
C = d_clinical.values
G = d_genetic.values
V = d_vampire.values

In [4]:
y_d = outputs["dement_fail"].values
y_c = outputs["cvd_fail"].values

In [5]:
# COMPUTATIONAL COMPLEXITY: Reduce #samples
tr_idx, ts_idx = next(StratifiedShuffleSplit(n_splits=1, test_size=0.25).split(C, y_d))

In [6]:
C_ = C[tr_idx]
C_test = C[ts_idx]
G_ = G[tr_idx]
G_test = G[ts_idx]
V_ = V[tr_idx]
V_test = V[ts_idx]

In [7]:
y_d_ = y_d[tr_idx]
y_d_test = y_d[ts_idx]

y_c_ = y_c[tr_idx]
y_c_test = y_c[ts_idx]

In [8]:
ds_list = [C_, G_, V_]
ds_test = [C_test, G_test, V_test]
ds_names = ['clinic', 'genetic', 'vampire']

## Kernel Definition

In [9]:
kernel_names = ['linear', 'gaussian']
kernel_type = [{'linear':[1], 'gaussian':[0.4, 0.7]},
               {'linear':[1], 'gaussian':[0.5, 1]}]

In [10]:
estimator = ca.centeredKernelAlignment

## Basic approach

In [11]:
sampler = ms.mySampler(n_splits=3, test_size=.25)

### Dementia

In [12]:
result1 = sampler.sample(kernel_type, estimator, ds_list, y_d_, valid_fold = 3, verbose=True)


1 split out of 3 ...
	Working on config 1 of 2: {'linear': [1], 'gaussian': [0.4, 0.7]}
Fold no. 1
Fold no. 2
Fold no. 3
Validation complete, config selected:[[1, 0.4], [1, 0.7], [1, 0.7]]
	Result of 1:
CA: 0.015180968250613602
Accuracy: 0.4883268482490272
Precision: 0.15476190476190477
Recall: 0.43820224719101125
[linear:1, gaussian:0.4, ]
[linear:1, gaussian:0.7, ]
[linear:1, gaussian:0.7, ]

eta vector: [3.79175478e-05 3.54440968e-08 1.25911154e-04 3.54428078e-08
 2.06170103e-11 3.54428013e-08]


	Completed in 0.65 minutes
	Working on config 2 of 2: {'linear': [1], 'gaussian': [0.5, 1]}
Fold no. 1
Fold no. 2
Fold no. 3
Validation complete, config selected:[[1, 0.5], [1, 1], [1, 1]]
	Result of 1:
CA: 0.015180968250613602
Accuracy: 0.4883268482490272
Precision: 0.15476190476190477
Recall: 0.43820224719101125
[linear:1, gaussian:0.4, ]
[linear:1, gaussian:0.7, ]
[linear:1, gaussian:0.7, ]

eta vector: [3.79175478e-05 3.54440968e-08 1.25911154e-04 3.54428078e-08
 2.06170103e-11 3.54428

In [13]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_d_, y_d_test, w_list, ds_list, ds_test, kernel_names)

Perfomances computed for dictionary settings 1:
	Accuracy: 0.5189504373177842
	Precision: 0.20612813370473537
	Recall: 0.6218487394957983
Perfomances computed for dictionary settings 2:
	Accuracy: 0.4620991253644315
	Precision: 0.17616580310880828
	Recall: 0.5714285714285714


In [14]:
w_dict

[{'clinic': {'linear': 1, 'gaussian': 0.4},
  'genetic': {'linear': 1, 'gaussian': 0.4},
  'vampire': {'linear': 1, 'gaussian': 0.4}},
 {'clinic': {'linear': 1, 'gaussian': 0.5},
  'genetic': {'linear': 1, 'gaussian': 0.5},
  'vampire': {'linear': 1, 'gaussian': 1}}]

In [15]:
result1.performancesFeatures()

statistics of configuration 1
{'config': {'linear': [[1, 1, 1], [1, 1, 1], [1, 1, 1]], 'gaussian': [[0.4, 0.7, 0.7], [0.7, 0.4, 0.4], [0.4, 0.4, 0.4]]}, 'CA': (0.01571098627003439, 1.0665164690373546e-06), 'Accuracy': (0.49351491569390404, 0.0002657959326493283), 'Precision': (0.16286935286935286, 0.00014686930218065733), 'Recall': (0.46441947565543074, 0.0008696993926131663), 'eta': (array([3.92250104e-05, 4.13477957e-08, 1.52292874e-04, 4.13488508e-08,
       1.04989398e-11, 4.13469998e-08]), array([1.35197061e-11, 6.19684115e-17, 9.56168161e-10, 6.20172596e-17,
       7.18557348e-23, 6.19976830e-17]))}
statistics of configuration 2
{'config': {'linear': [[1, 1, 1], [1, 1, 1], [1, 1, 1]], 'gaussian': [[0.5, 1, 1], [1, 0.5, 1], [0.5, 0.5, 0.5]]}, 'CA': (0.015710986269934592, 1.0665164691404118e-06), 'Accuracy': (0.49351491569390404, 0.0002657959326493283), 'Precision': (0.16286935286935286, 0.00014686930218065733), 'Recall': (0.46441947565543074, 0.0008696993926131663), 'eta': (array(

### Cardio

In [None]:
result1 = sampler.sample(kernel_type, estimator, ds_list, y_c_, valid_fold = 3, verbose=True)


1 split out of 3 ...
	Working on config 1 of 2: {'linear': [1], 'gaussian': [0.4, 0.7]}
Fold no. 1
Fold no. 2
Fold no. 3
Validation complete, config selected:[[1, 0.7], [1, 0.7], [1, 0.7]]
	Result of 1:
CA: 0.01259490841709298
Accuracy: 0.4669260700389105
Precision: 0.30303030303030304
Recall: 0.47058823529411764
[linear:1, gaussian:0.7, ]
[linear:1, gaussian:0.7, ]
[linear:1, gaussian:0.7, ]

eta vector: [2.99031754e-05 6.69556518e-08 2.59919442e-04 6.69566558e-08
 1.75456113e-12 6.69566385e-08]


	Completed in 0.7 minutes
	Working on config 2 of 2: {'linear': [1], 'gaussian': [0.5, 1]}
Fold no. 1
Fold no. 2
Fold no. 3
Validation complete, config selected:[[1, 1], [1, 1], [1, 1]]
	Result of 1:
CA: 0.01259490841709298
Accuracy: 0.4669260700389105
Precision: 0.30303030303030304
Recall: 0.47058823529411764
[linear:1, gaussian:0.7, ]
[linear:1, gaussian:0.7, ]
[linear:1, gaussian:0.7, ]

eta vector: [2.99031754e-05 6.69556518e-08 2.59919442e-04 6.69566558e-08
 1.75456113e-12 6.69566385e-

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

## Kernel normalization

In [None]:
sampler = ms.mySampler(n_splits=3, test_size=.25, normalize_kernels = True)

### Dementia

In [None]:
result1 = sampler.sample(kernel_type, estimator, ds_list, y_d_, valid_fold = 3, verbose=True)

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_d_, y_d_test, w_list, ds_list, ds_test, kernel_names)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

### Cardio

In [None]:
result1 = sampler.sample(kernel_type, estimator, ds_list, y_c_, valid_fold = 3, verbose=True)

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

## Normalized data

In [None]:
sampler = ms.mySampler(n_splits=3, test_size=.25, normalizing = True)

### Dementia

In [None]:
result1 = sampler.sample(kernel_type, estimator, ds_list, y_d_, valid_fold = 3, verbose=True, exclusion_list = [[5,6,7,9,10,13,15,16,17,18,19], list(range(G.shape[1]-3)), []])

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_d_, y_d_test, w_list, ds_list, ds_test, kernel_names)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

### Cardio

In [None]:
result1 = sampler.sample(kernel_type, estimator, ds_list, y_c_, valid_fold = 3, verbose=True, exclusion_list = [[5,6,7,9,10,13,15,16,17,18,19], list(range(G.shape[1]-3)), []])

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

## Origin Data Centering

In [None]:
sampler = ms.mySampler(n_splits=3, test_size=.25, centering = True)

### Dementia

In [None]:
result1 = sampler.sample(kernel_type, estimator, ds_list, y_d_, valid_fold = 3, verbose=True, exclusion_list = [[5,6,7,9,10,13,15,16,17,18,19], list(range(G.shape[1]-3)), []])

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_d_, y_d_test, w_list, ds_list, ds_test, kernel_names)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

### Cardio

In [None]:
result1 = sampler.sample(kernel_type, estimator, ds_list, y_c_, valid_fold = 3, verbose=True, exclusion_list = [[5,6,7,9,10,13,15,16,17,18,19], list(range(G.shape[1]-3)), []])

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

## Origin Data  Centering and Normalization

In [None]:
sampler = ms.mySampler(n_splits=3, test_size=.25, centering = True, normalizing = True)

### Dementia

In [None]:
result1 = sampler.sample(kernel_type, estimator, ds_list, y_d_, valid_fold = 3, verbose=True, exclusion_list = [[5,6,7,9,10,13,15,16,17,18,19], list(range(G.shape[1]-3)), []])

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_d_, y_d_test, w_list, ds_list, ds_test, kernel_names)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

### Cardio

In [None]:
result1 = sampler.sample(kernel_type, estimator, ds_list, y_c_, valid_fold = 3, verbose=True, exclusion_list = [[5,6,7,9,10,13,15,16,17,18,19], list(range(G.shape[1]-3)), []])

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

## Normalization, Kernel Normalization

In [None]:
sampler = ms.mySampler(n_splits=3, test_size=.25, normalizing = True, normalize_kernels = True)

### Dementia

In [None]:
result1 = sampler.sample(kernel_type, estimator, ds_list, y_d_, valid_fold = 3, verbose=True, exclusion_list = [[5,6,7,9,10,13,15,16,17,18,19], list(range(G.shape[1]-3)), []])

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_d_, y_d_test, w_list, ds_list, ds_test, kernel_names)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

### Cardio

In [None]:
result1 = sampler.sample(kernel_type, estimator, ds_list, y_c_, valid_fold = 3, verbose=True, exclusion_list = [[5,6,7,9,10,13,15,16,17,18,19], list(range(G.shape[1]-3)), []])

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

## Centering, Kernel Normalization

In [None]:
sampler = ms.mySampler(n_splits=3, test_size=.25, centering = True, normalize_kernels = True)

### Dementia

In [None]:
result1 = sampler.sample(kernel_type, estimator, ds_list, y_d_, valid_fold = 3, verbose=True, exclusion_list = [[5,6,7,9,10,13,15,16,17,18,19], list(range(G.shape[1]-3)), []])

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_d_, y_d_test, w_list, ds_list, ds_test, kernel_names)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

### Cardio

In [None]:
result1 = sampler.sample(kernel_type, estimator, ds_list, y_c_, valid_fold = 3, verbose=True, exclusion_list = [[5,6,7,9,10,13,15,16,17,18,19], list(range(G.shape[1]-3)), []])

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

## Centering, Normalization, Kernel Normalization

In [None]:
sampler = ms.mySampler(n_splits=3, test_size=.25, centering = True, normalizing = True, normalize_kernels = True)

### Dementia

In [None]:
result1 = sampler.sample(kernel_type, estimator, ds_list, y_d_, valid_fold = 3, verbose=True, exclusion_list = [[5,6,7,9,10,13,15,16,17,18,19], list(range(G.shape[1]-3)), []])

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_d_, y_d_test, w_list, ds_list, ds_test, kernel_names)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

### Cardio

In [None]:
result1 = sampler.sample(kernel_type, estimator, ds_list, y_c_, valid_fold = 3, verbose=True, exclusion_list = [[5,6,7,9,10,13,15,16,17,18,19], list(range(G.shape[1]-3)), []])

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

## L2 Penalty, Centering, K-Normalization

In [None]:
sampler = ms.mySampler(n_splits=3, test_size=0.25, lamb = 0.5, centering = True, normalize_kernels = False)

### Dementia

In [None]:
result1 = sampler.sample(kernel_type, estimator, ds_list, y_d_, valid_fold = 3, verbose=True, exclusion_list = [[5,6,7,9,10,13,15,16,17,18,19], list(range(G.shape[1]-3)), []])

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_d_, y_d_test, w_list, ds_list, ds_test, kernel_names)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

### Cardio

In [None]:
result1 = sampler.sample(kernel_type, estimator, ds_list, y_c_, valid_fold = 3, verbose=True, exclusion_list = [[5,6,7,9,10,13,15,16,17,18,19], list(range(G.shape[1]-3)), []])

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

## Sparsity in eta, Centering, K-Normalization

In [None]:
# COMPUTATIONAL COMPLEXITY: Reduce #samples
tr_idx, ts_idx = next(StratifiedShuffleSplit(n_splits=1, test_size=0.45).split(C, y_d))

In [None]:
C_ = C[tr_idx]
C_test = C[ts_idx]
G_ = G[tr_idx]
G_test = G[ts_idx]
V_ = V[tr_idx]
V_test = V[ts_idx]

In [None]:
y_d_ = y_d[tr_idx]
y_d_test = y_d[ts_idx]

y_c_ = y_c[tr_idx]
y_c_test = y_c[ts_idx]

In [None]:
ds_list = [C_, G_, V_]
ds_test = [C_test, G_test, V_test]
ds_names = ['clinic', 'genetic', 'vampire']

In [None]:
sampler = ms.mySampler(n_splits=3, test_size=.45, sparsity = 0.7, centering = True, normalize_kernels = True)

### Dementia

In [None]:
result1 = sampler.sample(kernel_type, estimator, ds_list, y_d_, valid_fold = 3, verbose=True, exclusion_list = [[5,6,7,9,10,13,15,16,17,18,19], list(range(G.shape[1]-3)), []])

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_d_, y_d_test, w_list, ds_list, ds_test, kernel_names)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

### Cardio

In [None]:
result1 = sampler.sample(kernel_type, estimator, ds_list, y_c_, valid_fold = 3, verbose=True, exclusion_list = [[5,6,7,9,10,13,15,16,17,18,19], list(range(G.shape[1]-3)), []])

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()