In [1]:
import pandas as pd
import os
import numpy as np

import Utils as ut
import CortesAlignmentFile as ca
import mySampler as ms
 
from sklearn.model_selection import StratifiedShuffleSplit

In [2]:
d_clinical = pd.read_csv(os.path.join('data', 'dataset_clinical_cleaned.csv'))
d_genetic = pd.read_csv(os.path.join('data', 'dataset_genetic_cleaned.csv'))
d_vampire = pd.read_csv(os.path.join('data', 'dataset_vampire_cleaned.csv'))
outputs = pd.read_csv(os.path.join('data', 'outputs_cleaned.csv'))

## Heart Attack

In [3]:
y = outputs["cvd_fail"].values
C = d_clinical.values
G = d_genetic.values
V = d_vampire.values

In [4]:
tr_idx, _ = next(StratifiedShuffleSplit(n_splits=1, test_size=0.95).split(C, y))

In [5]:
y = y[tr_idx]
C_ = C[tr_idx, :]
G_ = G[tr_idx, :]
V_ = V[tr_idx, :]

In [6]:
ds_list = [C_, G_, V_]

## Basic approach

In [19]:
kernel_type = [{'linear':[0.5], 'polynomial':[2, 3, 7], 'gaussian':[0.1, 0.5, 0.7]},
               {'linear':[0.2], 'polynomial':[4, 5, 8], 'gaussian':[0.7, 1]}]
estimator = ca.centeredKernelAlignment
sampler = ms.mySampler(n_splits=3, test_size=.25)

In [20]:
result1 = sampler.sample(kernel_type, estimator, ds_list, y, valid_fold = 3, verbose=True)

1 split out of 3 ...
	Working on config 1 of 2: {'linear': [0.5], 'polynomial': [2, 3, 7], 'gaussian': [0.1, 0.5, 0.7]}
Fold no. 1
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Fold no. 2
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Fold no. 3
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Validation complete, config selected:[[0.5, 2, 0.1], [0.5, 7, 0.7], [0.5, 2, 0.5]]
Triplet: 0.09234098812440253, <KernelFile.kernelWrapper object at 0x000002383A3D99E8>, [ 1.00490099e-11 -2.19870203e-17 -3.01066195e-02  5.58518592e-06
 -1.08303973e-06 -6.89268415e-01 -5.21428573e-09  4.31943381e-11
  7.23880269e-01]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
	Working on config 2 of 2: {'linear': [0.2], 'polynomial': [4, 5, 8], 'gaussian': [0.7, 1]}
Fold no. 1
		Perfomances computed for 200
Fold no.

  'precision', 'predicted', average, warn_for)


In [21]:
result1.performancesFeatures()

statistics of configuration 0
{'config': {'linear': [[0.5, 0.5, 0.5], [0.5, 0.5, 0.5], [0.5, 0.5, 0.5]], 'polynomial': [[2, 7, 2], [7, 7, 2], [3, 3, 2]], 'gaussian': [[0.1, 0.7, 0.5], [0.7, 0.7, 0.7], [0.7, 0.1, 0.5]]}, 'CA': (0.09371156738384001, 3.864378714677002e-06), 'Accuracy': (0.3142857142857143, 0.0), 'Precision': (0.3142857142857143, 0.0), 'Recall': (1.0, 0.0), 'eta': (array([ 1.36570444e-12, -7.32900676e-18, -5.05859001e-01,  4.43970397e-06,
       -9.57385799e-05, -1.54775623e-01, -6.11208005e-09,  4.79588004e-11,
        6.64758102e-01]), array([4.36027835e-23, 1.07428681e-34, 1.15190499e-01, 1.06232728e-11,
       1.81251458e-08, 1.55990455e-01, 2.90871695e-17, 1.70002109e-21,
       7.06686395e-03]))}
statistics of configuration 1
{'config': {'linear': [[0.2, 0.2, 0.2], [0.2, 0.2, 0.2], [0.2, 0.2, 0.2]], 'polynomial': [[4, 4, 5], [4, 5, 4], [8, 8, 5]], 'gaussian': [[1, 0.7, 0.7], [0.7, 1, 1], [0.7, 1, 0.7]]}, 'CA': (0.01436114825099443, 0.00046362046663154), 'Accuracy': (

## Kernel normalization

In [28]:
# wrapper testing

kernel_type = [{'linear':[0.5], 'polynomial':[2, 3], 'gaussian':[0.1, 0.3, 0.6]}#,
               #{'linear':[0.2], 'polynomial':[4, 5, 8], 'gaussian':[0.7, 1]}
              ]
estimator = ca.centeredKernelAlignment
sampler = ms.mySampler(n_splits=3, test_size=.25, normalize_kernels = True)

In [29]:
result3 = sampler.sample(kernel_type, estimator, ds_list, y, valid_fold = 3, verbose=True)

1 split out of 3 ...
	Working on config 1 of 1: {'linear': [0.5], 'polynomial': [2, 3], 'gaussian': [0.1, 0.3, 0.6]}
Fold no. 1
		Perfomances computed for 200
Fold no. 2
		Perfomances computed for 200
Fold no. 3
		Perfomances computed for 200
Validation complete, config selected:[[0.5, 3, 0.3], [0.5, 2, 0.6], [0.5, 3, 0.1]]
Triplet: 0.09028538758176574, <KernelFile.kernelWrapper object at 0x000002383832FF28>, [-5.03450656e-06  7.93548718e-06  7.02079141e-01  2.72812182e-05
 -1.40103560e-05 -7.12029240e-01  2.23289195e-05 -5.01685364e-06
  9.96191202e-03]
[ 1.  1. -1.  1. -1.  1. -1.  1.  1.  1. -1. -1.  1. -1.  1.  1.  1.  1.
  1.  1. -1. -1.  1. -1. -1.  1.  1.  1.  1. -1.  1.  1. -1. -1. -1.]
	Result of 0:
CA: 0.09028538758176574
[linear:0.5, polynomial:3, gaussian:0.3, ]
[linear:0.5, polynomial:2, gaussian:0.6, ]
[linear:0.5, polynomial:3, gaussian:0.1, ]

eta vector: [-5.03450656e-06  7.93548718e-06  7.02079141e-01  2.72812182e-05
 -1.40103560e-05 -7.12029240e-01  2.23289195e-05 -5

LinAlgError: Singular matrix

In [None]:
result3.performancesFeatures()

## Normalized data

In [7]:
kernel_type = [{'linear':[0.5], 'polynomial':[2, 3, 7], 'gaussian':[0.1, 0.5, 0.7]},
               {'linear':[0.2], 'polynomial':[4, 5, 8], 'gaussian':[0.7, 1]}]
estimator = ca.centeredKernelAlignment
sampler = ms.mySampler(n_splits=3, test_size=.25, centering = True)

In [8]:
result4 = sampler.sample(kernel_type, estimator, ds_list, y, valid_fold = 3, verbose=True, exclusion_list = [[0, 3, 8, 13, 14], list(range(G.shape[1]-3)), []])

1 split out of 3 ...
[-1.96078431e-02  8.87667597e+00  7.02138477e+01 -3.92156863e-01
  7.55931373e+01  1.37549020e+02  7.85350978e+01  1.40498232e+02
  1.92973529e+03  7.56601551e+00  4.40205882e+00  1.35960784e+00
  2.05779289e+00  6.86274510e-01  7.28235294e+01]
[9.99615532e-01 3.87754366e+01 8.30192207e+01 8.46212995e-01
 5.36163255e+01 1.37178970e+02 5.41519161e+01 1.42650677e+02
 1.22895658e+07 2.37507430e+00 5.54238388e-01 1.18347394e-01
 1.17778676e+00 5.29027297e-01 1.77720415e+03]
[1.54901961 1.82352941 1.51960784 0.91176471 1.73529412 1.48039216
 0.1372549  1.58823529 0.65686275 0.87254902 0.84313725 0.26470588
 0.32352941 0.34313725 1.69607843 0.1372549  0.74509804 0.40196078
 1.52941176 1.52941176 1.23529412 0.80392157 0.00980392 1.42156863
 0.20588235 1.53921569 1.05882353 1.78431373 1.09803922 0.66666667
 0.96078431 0.42156863 0.80392157 1.05882353 1.7745098  1.01960784
 0.55882353 1.81372549 0.5        0.37254902 0.87254902 0.91176471
 0.60784314 0.83333333 1.5        0

  X_c = np.divide(X-Mean, var)
  new_ts.append(np.divide(Xts-scale[0], scale[1]))


		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Fold no. 2
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Fold no. 3
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Validation complete, config selected:[[0.5, 3, 0.7], [0.5, 7, 0.1], [0.5, 7, 0.5]]
Triplet: 0.08397270868541006, <KernelFile.kernelWrapper object at 0x0000027542C606D8>, [-1.73616240e-14  4.96892360e-27  7.07105018e-01 -1.02074212e-09
  4.80525499e-10  4.14527898e-06  3.20081779e-16 -4.39760799e-36
 -7.07108545e-01]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
	Working on config 2 of 2: {'linear': [0.2], 'polynomial': [4, 5, 8], 'gaussian': [0.7, 1]}
Fold no. 1
		Perfomances computed for 200
Fold no. 2
		Perfomances computed for 200
Fold no. 3
		Perfomances computed for 200
Validation complete, config selected:[[0.2, 4, 0.7], [0

		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Fold no. 2
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Fold no. 3
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Validation complete, config selected:[[0.5, 7, 0.1], [0.5, 2, 0.7], [0.5, 2, 0.7]]
Triplet: 0.09015437595910347, <KernelFile.kernelWrapper object at 0x0000027546FCE2E8>, [-2.19632861e-13  1.47935986e-53  1.22779534e-03 -5.51594402e-07
  4.77869230e-05  7.06602554e-01  2.37143005e-11 -6.21105930e-13
 -7.07609583e-01]
[ 1.  1. -1. -1.  1.  1. -1. -1.  1.  1. -1.  1.  1.  1. -1.  1. -1.  1.
  1.  1.  1. -1. -1.  1. -1.  1.  1.  1.  1.  1. -1.  1.  1. -1.  1.]
	Working on config 2 of 2: {'linear': [0.2], 'polynomial': [4, 5, 8], 'gaussian': [0.7, 1]}
Fold no. 1
		Perfomances computed for 200
Fold no. 2
		Perfomances computed for 200
Fold no. 3
		Perfomances computed for 200
Validation complete,

		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Fold no. 2
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Fold no. 3
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Validation complete, config selected:[[0.5, 3, 0.7], [0.5, 7, 0.5], [0.5, 3, 0.5]]
Triplet: 0.08994232273312291, <KernelFile.kernelWrapper object at 0x00000275450AC358>, [-1.33775634e-15  4.71107913e-28  6.96727155e-01 -3.58088331e-10
  3.40686675e-10  2.03216998e-02  2.06057903e-13 -1.97086171e-19
 -7.17048325e-01]
[-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.]
	Working on config 2 of 2: {'linear': [0.2], 'polynomial': [4, 5, 8], 'gaussian': [0.7, 1]}
Fold no. 1


  'precision', 'predicted', average, warn_for)


		Perfomances computed for 200
Fold no. 2
		Perfomances computed for 200
Fold no. 3
		Perfomances computed for 200
Validation complete, config selected:[[0.2, 8, 1], [0.2, 4, 0.7], [0.2, 4, 0.7]]
Triplet: 0.041488352749804835, <KernelFile.kernelWrapper object at 0x0000027548924358>, [-1.43493721e-13  3.39273295e-60  2.96723010e-01 -3.27746667e-09
  4.39038599e-08 -8.07122450e-01  3.21993184e-13 -3.43935501e-23
  5.10400633e-01]
[ 1.  1.  1.  1.  1.  1.  1.  1. -1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1. -1.  1.  1.  1.  1.  1.  1.  1.  1.]
	Result of 2:
CA: 0.08994232273312291
[linear:0.5, polynomial:3, gaussian:0.7, ]
[linear:0.5, polynomial:7, gaussian:0.5, ]
[linear:0.5, polynomial:3, gaussian:0.5, ]

eta vector: [-1.33775634e-15  4.71107913e-28  6.96727155e-01 -3.58088331e-10
  3.40686675e-10  2.03216998e-02  2.06057903e-13 -1.97086171e-19
 -7.17048325e-01]
CA: 0.041488352749804835
[linear:0.2, polynomial:8, gaussian:1, ]
[linear:0.2, polynomial:4, gaus

In [9]:
result4.performancesFeatures()

statistics of configuration 0
{'config': {'linear': [[0.5, 0.5, 0.5], [0.5, 0.5, 0.5], [0.5, 0.5, 0.5]], 'polynomial': [[3, 7, 7], [7, 2, 2], [3, 7, 3]], 'gaussian': [[0.7, 0.1, 0.5], [0.1, 0.7, 0.7], [0.7, 0.5, 0.5]]}, 'CA': (0.08802313579254549, 8.210474303550187e-06), 'Accuracy': (0.49523809523809526, 0.02303854875283447), 'Precision': (0.220703933747412, 0.024542605952273786), 'Recall': (0.5757575757575758, 0.17814508723599634), 'eta': (array([-7.94440804e-14,  1.81334384e-27,  4.68353323e-01, -1.84324411e-07,
        1.59292481e-05,  2.42309466e-01,  7.97355950e-12, -2.07035376e-13,
       -7.10588817e-01]), array([9.86924114e-27, 5.01583226e-54, 1.09121079e-01, 6.74436963e-14,
       5.07455727e-10, 1.07852836e-01, 1.23892519e-22, 8.57272119e-26,
       2.09044591e-05]))}
statistics of configuration 1
{'config': {'linear': [[0.2, 0.2, 0.2], [0.2, 0.2, 0.2], [0.2, 0.2, 0.2]], 'polynomial': [[4, 5, 8], [5, 8, 4], [8, 4, 4]], 'gaussian': [[0.7, 1, 0.7], [1, 1, 1], [1, 0.7, 0.7]]}, '

## Better Data Normalization by better one hot encoder


In [3]:
d_clinical = ut.oneHotEncoder_v2(d_clinical, ['e4', 'ther'])

e4
[  1  10 100]
round n. 0
round n. 1
round n. 2
ther
[1.e+00 1.e+01 1.e+02 1.e+03 1.e+04]
round n. 0
round n. 1
round n. 2
round n. 3
round n. 4


In [4]:
y = outputs["cvd_fail"].values
C = d_clinical.values
G = d_genetic.values
V = d_vampire.values

In [5]:
tr_idx, _ = next(StratifiedShuffleSplit(n_splits=1, test_size=0.95).split(C, y))

In [6]:
y = y[tr_idx]
_C = C[tr_idx, :]
_G = G[tr_idx, :]
_V = V[tr_idx, :]

In [7]:
ds_list = [_C, _G, _V]

In [8]:
kernel_type = [{'linear':[0.5], 'polynomial':[2, 3, 7], 'gaussian':[0.1, 0.5, 0.7]},
               {'linear':[0.2], 'polynomial':[4, 5, 8], 'gaussian':[0.7, 1]}]
estimator = ca.centeredKernelAlignment
sampler = ms.mySampler(n_splits=3, test_size=.25, centering = True)

In [9]:
result5 = sampler.sample(kernel_type, estimator, ds_list, y, valid_fold = 3, verbose=True, exclusion_list = [[5,6,7,9,10,13,15,16,17,18,19], list(range(G.shape[1]-3)), []])

1 split out of 3 ...
	Working on config 1 of 2: {'linear': [0.5], 'polynomial': [2, 3, 7], 'gaussian': [0.1, 0.5, 0.7]}
Fold no. 1
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Fold no. 2
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Fold no. 3
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Validation complete, config selected:[[0.5, 7, 0.7], [0.5, 2, 0.1], [0.5, 7, 0.5]]
[-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.]
	Working on config 2 of 2: {'linear': [0.2], 'polynomial': [4, 5, 8], 'gaussian': [0.7, 1]}
Fold no. 1


  'precision', 'predicted', average, warn_for)


		Perfomances computed for 200
Fold no. 2
		Perfomances computed for 200
Fold no. 3
		Perfomances computed for 200
Validation complete, config selected:[[0.2, 8, 1], [0.2, 8, 0.7], [0.2, 8, 0.7]]
[-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.]
	Result of 0:
CA: 0.1061111389255871
[linear:0.5, polynomial:7, gaussian:0.7, ]
[linear:0.5, polynomial:2, gaussian:0.1, ]
[linear:0.5, polynomial:7, gaussian:0.5, ]

eta vector: [ 1.29253840e-09 -5.15768544e-31 -7.07091217e-01  3.08108890e-08
 -2.97565387e-06 -2.62337612e-05 -2.13607532e-13  1.25125172e-57
  7.07122344e-01]
CA: 0.09709107268060974
[linear:0.2, polynomial:8, gaussian:1, ]
[linear:0.2, polynomial:8, gaussian:0.7, ]
[linear:0.2, polynomial:8, gaussian:0.7, ]

eta vector: [ 2.53535628e-09 -2.99514522e-34  6.28410529e-01  7.66476389e-09
 -7.64698543e-09 -7.65668632e-01 -1.67613375e-12  1.79448301e-64
  1.37301688e-01]
2 split out of 3 ...
	

In [10]:
result5.performancesFeatures()

statistics of configuration 0
{'config': {'linear': [[0.5, 0.5, 0.5], [0.5, 0.5, 0.5], [0.5, 0.5, 0.5]], 'polynomial': [[7, 2, 7], [7, 7, 3], [2, 2, 7]], 'gaussian': [[0.7, 0.1, 0.5], [0.5, 0.7, 0.1], [0.5, 0.5, 0.1]]}, 'CA': (0.10391968137749624, 0.0001549367633434862), 'Accuracy': (0.5619047619047619, 0.03065759637188209), 'Precision': (0.10476190476190476, 0.021950113378684803), 'Recall': (0.3333333333333333, 0.22222222222222224), 'eta': (array([ 1.06884497e-09, -1.14517820e-12, -7.07234913e-01,  1.31023182e-08,
       -1.31240758e-06,  4.71262300e-01,  2.57150753e-14,  2.22502436e-28,
        2.35975225e-01]), array([6.57025498e-19, 2.62286621e-24, 2.03630214e-08, 1.69956038e-16,
       1.53732774e-12, 1.11056451e-01, 4.53087338e-26, 9.90146700e-56,
       1.10989844e-01]))}
statistics of configuration 1
{'config': {'linear': [[0.2, 0.2, 0.2], [0.2, 0.2, 0.2], [0.2, 0.2, 0.2]], 'polynomial': [[8, 8, 8], [8, 4, 8], [4, 5, 5]], 'gaussian': [[1, 0.7, 0.7], [0.7, 1, 1], [0.7, 1, 1]]}, 

## Original Data  Centering and Normalization

In [3]:
d_clinical = ut.oneHotEncoder_v2(d_clinical, ['e4', 'ther'])

e4
[  1  10 100]
round n. 0
round n. 1
round n. 2
ther
[1.e+00 1.e+01 1.e+02 1.e+03 1.e+04]
round n. 0
round n. 1
round n. 2
round n. 3
round n. 4


In [4]:
y = outputs["cvd_fail"].values
C = d_clinical.values
G = d_genetic.values
V = d_vampire.values

In [5]:
tr_idx, _ = next(StratifiedShuffleSplit(n_splits=1, test_size=0.95).split(C, y))

In [6]:
y = y[tr_idx]
_C = C[tr_idx, :]
_G = G[tr_idx, :]
_V = V[tr_idx, :]

In [7]:
ds_list = [_C, _G, _V]

In [8]:
kernel_type = [{'linear':[0.5], 'polynomial':[2, 3, 7], 'gaussian':[0.1, 0.5, 0.7]},
               {'linear':[0.2], 'polynomial':[4, 5, 8], 'gaussian':[0.7, 1]}]
estimator = ca.centeredKernelAlignment
sampler = ms.mySampler(n_splits=3, test_size=.25, centering = True)

In [9]:
result6 = sampler.sample(kernel_type, estimator, ds_list, y, valid_fold = 3, verbose=True, exclusion_list = [[5,6,7,9,10,13,15,16,17,18,19], list(range(G.shape[1]-3)), []])

1 split out of 3 ...
	Working on config 1 of 2: {'linear': [0.5], 'polynomial': [2, 3, 7], 'gaussian': [0.1, 0.5, 0.7]}
Fold no. 1
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Fold no. 2
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Fold no. 3
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Validation complete, config selected:[[0.5, 7, 0.7], [0.5, 3, 0.7], [0.5, 2, 0.7]]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
	Working on config 2 of 2: {'linear': [0.2], 'polynomial': [4, 5, 8], 'gaussian': [0.7, 1]}
Fold no. 1
		Perfomances computed for 200
Fold no. 2
		Perfomances computed for 200
Fold no. 3
		Perfomances computed for 200
Validation complete, config selected:[[0.2, 4, 0.7], [0.2, 5, 1], [0.2, 5, 1]]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 

  'precision', 'predicted', average, warn_for)


		Perfomances computed for 200
Fold no. 2
		Perfomances computed for 200
Fold no. 3
		Perfomances computed for 200
Validation complete, config selected:[[0.2, 8, 0.7], [0.2, 5, 0.7], [0.2, 8, 0.7]]
[-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.]
	Result of 1:
CA: 0.012434514218279425
[linear:0.5, polynomial:7, gaussian:0.1, ]
[linear:0.5, polynomial:3, gaussian:0.7, ]
[linear:0.5, polynomial:3, gaussian:0.5, ]

eta vector: [ 1.07658256e-10 -7.57824906e-32 -1.50659825e-07 -1.57816890e-10
  4.25875607e-09  7.07106852e-01  7.63499144e-17 -4.10447767e-30
 -7.07106710e-01]
CA: 0.04289669155428136
[linear:0.2, polynomial:8, gaussian:0.7, ]
[linear:0.2, polynomial:5, gaussian:0.7, ]
[linear:0.2, polynomial:8, gaussian:0.7, ]

eta vector: [ 2.88144921e-11 -8.47349738e-36  5.93796940e-02 -1.15781513e-10
  4.31202920e-10  6.75544572e-01  1.66859688e-16 -1.82423579e-68
 -7.34924203e-01]
3 split out of 3

In [10]:
result6.performancesFeatures()

statistics of configuration 0
{'config': {'linear': [[0.5, 0.5, 0.5], [0.5, 0.5, 0.5], [0.5, 0.5, 0.5]], 'polynomial': [[7, 3, 2], [7, 3, 3], [2, 7, 3]], 'gaussian': [[0.7, 0.7, 0.7], [0.1, 0.7, 0.5], [0.7, 0.1, 0.5]]}, 'CA': (0.03949602620026164, 0.0015031077655264877), 'Accuracy': (0.5619047619047619, 0.03065759637188209), 'Precision': (0.10476190476190476, 0.021950113378684803), 'Recall': (0.3333333333333333, 0.22222222222222224), 'eta': (array([ 6.07665491e-11, -4.13770287e-14,  2.11674414e-02, -1.18105823e-10,
        1.92877336e-09,  1.85171625e-01, -3.62880880e-14,  1.79935677e-20,
       -2.06338968e-01]), array([2.96555139e-21, 3.42411700e-27, 3.64163887e-01, 2.83694016e-21,
       2.85842461e-18, 1.40038051e-01, 2.63956367e-27, 6.48141172e-40,
       4.18485701e-01]))}
statistics of configuration 1
{'config': {'linear': [[0.2, 0.2, 0.2], [0.2, 0.2, 0.2], [0.2, 0.2, 0.2]], 'polynomial': [[4, 5, 5], [8, 5, 8], [8, 8, 8]], 'gaussian': [[0.7, 1, 1], [0.7, 0.7, 0.7], [1, 0.7, 0.7]

## Centering, Normalization, Kernel Normalization

In [7]:
d_clinical = ut.oneHotEncoder_v2(d_clinical, ['e4', 'ther'])

e4
[  1  10 100]
round n. 0
round n. 1
round n. 2
ther
[1.e+00 1.e+01 1.e+02 1.e+03 1.e+04]
round n. 0
round n. 1
round n. 2
round n. 3
round n. 4


In [8]:
y = outputs["cvd_fail"].values
C = d_clinical.values
G = d_genetic.values
V = d_vampire.values

In [9]:
tr_idx, _ = next(StratifiedShuffleSplit(n_splits=1, test_size=0.95).split(C, y))

In [10]:
y = y[tr_idx]
_C = C[tr_idx, :]
_G = G[tr_idx, :]
_V = V[tr_idx, :]

In [11]:
ds_list = [_C, _G, _V]

In [12]:
kernel_type = [{'linear':[0.5], 'polynomial':[2, 3, 5], 'gaussian':[0.1, 0.3, 0.5]}#,
               #{'linear':[0.2], 'polynomial':[4, 5, 8], 'gaussian':[0.7, 1]}
              ]
estimator = ca.centeredKernelAlignment
sampler = ms.mySampler(n_splits=3, test_size=.25, centering = True, normalize_kernels = True)

In [9]:
result7 = sampler.sample(kernel_type, estimator, ds_list, y, valid_fold = 3, verbose=True, exclusion_list = [[5,6,7,9,10,13,15,16,17,18,19], list(range(G.shape[1]-3)), []])

1 split out of 3 ...
	Working on config 1 of 1: {'linear': [0.5], 'polynomial': [2, 3, 5], 'gaussian': [0.1, 0.3, 0.5]}
Fold no. 1
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Fold no. 2
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Fold no. 3
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Validation complete, config selected:[[0.5, 5, 0.5], [0.5, 2, 0.5], [0.5, 5, 0.5]]
[-1. -1. -1.  1. -1. -1.  1.  1.  1.  1. -1. -1.  1.  1. -1. -1.  1.  1.
 -1. -1. -1. -1.  1.  1. -1.  1.  1. -1. -1.  1.  1.  1.  1.  1.  1.]
	Result of 0:
CA: 0.013378588765402537
Accuracy: 0.5428571428571428
Recall: 0.6363636363636364
[linear:0.5, polynomial:5, gaussian:0.5, ]
[linear:0.5, polynomial:2, gaussian:0.5, ]
[linear:0.5, polynomial:5, gaussian:0.5, ]

eta vector: [-1.51092697e-04 -1.09413839e-05 -6.52865460e-01  6.02626479e-08
 -2.03251862e-08 -9.82140691e-02 -3.6037

In [10]:
result7.performancesFeatures()

statistics of configuration 0
{'config': {'linear': [[0.5, 0.5, 0.5], [0.5, 0.5, 0.5], [0.5, 0.5, 0.5]], 'polynomial': [[5, 2, 5], [5, 5, 2], [2, 2, 2]], 'gaussian': [[0.5, 0.5, 0.5], [0.3, 0.1, 0.5], [0.3, 0.5, 0.1]]}, 'CA': (0.06672566692931318, 0.0014229870768741673), 'Accuracy': (0.6285714285714286, 0.004897959183673473), 'Precision': (0.4561403508771929, 0.010514004309018157), 'Recall': (0.42424242424242425, 0.023875114784205693), 'eta': (array([ 0.251892  , -0.12502566,  0.2258764 ,  0.00626228, -0.00204844,
       -0.17852045, -0.00039985, -0.00061236, -0.04456262]), array([5.66617999e-02, 1.58971239e-02, 3.87660273e-01, 3.52815180e-05,
       3.30347014e-05, 2.46452829e-02, 1.72553622e-07, 4.18461027e-07,
       3.51066144e-01]))}


## Sparsity in eta, Double normalization

In [3]:
d_clinical = ut.oneHotEncoder_v2(d_clinical, ['e4', 'ther'])

e4
[  1  10 100]
round n. 0
round n. 1
round n. 2
ther
[1.e+00 1.e+01 1.e+02 1.e+03 1.e+04]
round n. 0
round n. 1
round n. 2
round n. 3
round n. 4


In [4]:
y = outputs["cvd_fail"].values
C = d_clinical.values
G = d_genetic.values
V = d_vampire.values

In [5]:
tr_idx, _ = next(StratifiedShuffleSplit(n_splits=1, test_size=0.95).split(C, y))

In [6]:
y = y[tr_idx]
_C = C[tr_idx, :]
_G = G[tr_idx, :]
_V = V[tr_idx, :]

In [7]:
ds_list = [_C, _G, _V]

In [8]:
# wrapper testing

kernel_type = [{'linear':[0.5], 'polynomial':[2, 3, 4], 'gaussian':[0.1, 0.25, 0.4]}#,
               #{'linear':[0.2], 'polynomial':[4, 5, 8], 'gaussian':[0.7, 1]}
              ]
estimator = ca.centeredKernelAlignment
sampler = ms.mySampler(n_splits=3, test_size=.25, sparsity = 0.1, centering = True, normalize_kernels = True)

In [9]:
result8 = sampler.sample(kernel_type, estimator, ds_list, y, valid_fold = 3, verbose=True, exclusion_list = [[5,6,7,9,10,13,15,16,17,18,19], list(range(G.shape[1]-3)), []])

1 split out of 3 ...
	Working on config 1 of 1: {'linear': [0.5], 'polynomial': [2, 3, 4], 'gaussian': [0.1, 0.25, 0.4]}
Fold no. 1
Lasso configuration dropping
Lasso dropping completed
Lasso working on configuration 3000/12096
Lasso working on configuration 6000/12096
Lasso working on configuration 9000/12096
Lasso working on configuration 12000/12096
Smart approach converged. Picked: [ 0.          0.80855216  0.          0.          0.         -0.40850048
  0.         -0.00366334  0.42350602]
Lasso configuration dropping
Lasso dropping completed
Lasso working on configuration 3000/12096
Lasso working on configuration 6000/12096
Lasso working on configuration 9000/12096
Lasso working on configuration 12000/12096
Smart approach converged. Picked: [ 0.          0.80854397  0.          0.          0.         -0.40850852
  0.         -0.0036633   0.4235139 ]
Lasso configuration dropping
Lasso dropping completed
Lasso working on configuration 3000/12096
Lasso working on configuration 6000/

Lasso working on configuration 6000/12096
Lasso working on configuration 9000/12096
Lasso working on configuration 12000/12096
Smart approach converged. Picked: [ 0.          0.99791128  0.         -0.06028242  0.          0.
  0.         -0.00506105  0.02266048]
Lasso configuration dropping
Lasso dropping completed
Lasso working on configuration 3000/12096
Lasso working on configuration 6000/12096
Lasso working on configuration 9000/12096
Lasso working on configuration 12000/12096
Smart approach converged. Picked: [ 0.          0.99791128  0.         -0.06028242  0.          0.
  0.         -0.00506105  0.02266048]
Lasso configuration dropping
Lasso dropping completed
Lasso working on configuration 3000/12096
Lasso working on configuration 6000/12096
Lasso working on configuration 9000/12096
Lasso working on configuration 12000/12096
Smart approach converged. Picked: [ 0.          0.99303619  0.         -0.06035019  0.          0.
  0.06883927 -0.07064395  0.0225294 ]
Lasso configurat

Lasso working on configuration 3000/12096
Lasso working on configuration 6000/12096
Lasso working on configuration 9000/12096
Lasso working on configuration 12000/12096
Smart approach converged. Picked: [ 0.          0.99791331  0.         -0.06028799  0.          0.
  0.         -0.00460366  0.0226536 ]
Lasso configuration dropping
Lasso dropping completed
Lasso working on configuration 3000/12096
Lasso working on configuration 6000/12096
Lasso working on configuration 9000/12096
Lasso working on configuration 12000/12096
Smart approach converged. Picked: [ 0.          0.99791331  0.         -0.0602881   0.          0.
  0.         -0.00460366  0.02265362]
Lasso configuration dropping
Lasso dropping completed
Lasso working on configuration 3000/12096
Lasso working on configuration 6000/12096
Lasso working on configuration 9000/12096
Lasso working on configuration 12000/12096
Smart approach converged. Picked: [ 0.          0.99791331  0.         -0.0602881   0.          0.
  0.        

Lasso working on configuration 3000/12096
Lasso working on configuration 6000/12096
Lasso working on configuration 9000/12096
Lasso working on configuration 12000/12096
Smart approach converged. Picked: [ 0.          0.99791128  0.         -0.06028242  0.          0.
  0.         -0.00506105  0.02266048]
Lasso configuration dropping
Lasso dropping completed
Lasso working on configuration 3000/12096
Lasso working on configuration 6000/12096
Lasso working on configuration 9000/12096
Lasso working on configuration 12000/12096
Smart approach converged. Picked: [ 0.          0.99303619  0.         -0.06035019  0.          0.
  0.06883927 -0.07064395  0.0225294 ]
Lasso configuration dropping
Lasso dropping completed
Lasso working on configuration 3000/12096
Lasso working on configuration 6000/12096
Lasso working on configuration 9000/12096
Lasso working on configuration 12000/12096
Smart approach converged. Picked: [ 0.          0.99303618  0.         -0.06035031  0.          0.
  0.06883927

Lasso working on configuration 3000/12096
Lasso working on configuration 6000/12096
Lasso working on configuration 9000/12096
Lasso working on configuration 12000/12096
Smart approach converged. Picked: [ 0.          0.99791331  0.         -0.0602881   0.          0.
  0.         -0.00460366  0.02265362]
Lasso configuration dropping
Lasso dropping completed
Lasso working on configuration 3000/12096
Lasso working on configuration 6000/12096
Lasso working on configuration 9000/12096
Lasso working on configuration 12000/12096
Smart approach converged. Picked: [ 0.          0.99791331  0.         -0.0602881   0.          0.
  0.         -0.00460366  0.02265362]
Lasso configuration dropping
Lasso dropping completed
Lasso working on configuration 3000/12096
Lasso working on configuration 6000/12096
Lasso working on configuration 9000/12096
Lasso working on configuration 12000/12096
Smart approach converged. Picked: [ 0.          0.99791129  0.         -0.0602823   0.          0.
  0.        

Lasso working on configuration 3000/12096
Lasso working on configuration 6000/12096
Lasso working on configuration 9000/12096
Lasso working on configuration 12000/12096
Smart approach converged. Picked: [ 0.          0.8055347   0.          0.          0.         -0.40776896
  0.05481691 -0.05625623  0.42269217]
Lasso configuration dropping
Lasso dropping completed
Lasso working on configuration 3000/12096
Lasso working on configuration 6000/12096
Lasso working on configuration 9000/12096
Lasso working on configuration 12000/12096
Smart approach converged. Picked: [ 0.          0.8055266   0.          0.          0.         -0.40777699
  0.05481631 -0.05625561  0.42270002]
Lasso configuration dropping
Lasso dropping completed
Lasso working on configuration 3000/12096
Lasso working on configuration 6000/12096
Lasso working on configuration 9000/12096
Lasso working on configuration 12000/12096
Smart approach converged. Picked: [ 0.          0.8055266   0.          0.          0.         

Lasso working on configuration 3000/12096
Lasso working on configuration 6000/12096
Lasso working on configuration 9000/12096
Lasso working on configuration 12000/12096
Smart approach converged. Picked: [ 0.          0.80854397  0.          0.          0.         -0.40850852
  0.         -0.0036633   0.4235139 ]
Lasso configuration dropping
Lasso dropping completed
Lasso working on configuration 3000/12096
Lasso working on configuration 6000/12096
Lasso working on configuration 9000/12096
Lasso working on configuration 12000/12096
Smart approach converged. Picked: [ 0.          0.80854508  0.          0.          0.         -0.40850283
  0.         -0.00402848  0.42351396]
Lasso configuration dropping
Lasso dropping completed
Lasso working on configuration 3000/12096
Lasso working on configuration 6000/12096
Lasso working on configuration 9000/12096
Lasso working on configuration 12000/12096
Smart approach converged. Picked: [ 0.          0.80853689  0.          0.          0.         

Lasso working on configuration 3000/12096
Lasso working on configuration 6000/12096
Lasso working on configuration 9000/12096
Lasso working on configuration 12000/12096
Smart approach converged. Picked: [ 0.          0.99303618  0.         -0.06035031  0.          0.
  0.06883927 -0.07064395  0.02252941]
Lasso configuration dropping
Lasso dropping completed
Lasso working on configuration 3000/12096
Lasso working on configuration 6000/12096
Lasso working on configuration 9000/12096
Lasso working on configuration 12000/12096
Smart approach converged. Picked: [ 0.          0.99303618  0.         -0.06035031  0.          0.
  0.06883927 -0.07064395  0.02252941]
Lasso configuration dropping
Lasso dropping completed
Lasso working on configuration 3000/12096
Lasso working on configuration 6000/12096
Lasso working on configuration 9000/12096
Lasso working on configuration 12000/12096
Smart approach converged. Picked: [ 0.          0.80855216  0.          0.          0.         -0.40850048
  0.

Lasso working on configuration 3000/12096
Lasso working on configuration 6000/12096
Lasso working on configuration 9000/12096
Lasso working on configuration 12000/12096
Smart approach converged. Picked: [ 0.          0.99791129  0.         -0.0602823   0.          0.
  0.         -0.00506105  0.02266047]
Lasso configuration dropping
Lasso dropping completed
Lasso working on configuration 3000/12096
Lasso working on configuration 6000/12096
Lasso working on configuration 9000/12096
Lasso working on configuration 12000/12096
Smart approach converged. Picked: [ 0.          0.99791128  0.         -0.06028242  0.          0.
  0.         -0.00506105  0.02266048]
Lasso configuration dropping
Lasso dropping completed
Lasso working on configuration 3000/12096
Lasso working on configuration 6000/12096
Lasso working on configuration 9000/12096
Lasso working on configuration 12000/12096
Smart approach converged. Picked: [ 0.          0.99791128  0.         -0.06028242  0.          0.
  0.        

KeyboardInterrupt: 

In [None]:
result8.performancesFeatures()