In [9]:
import pandas as pd
import os
import numpy as np

import Utils as ut
import CortesAlignmentFile as ca
import mySampler as ms

from sklearn.model_selection import StratifiedShuffleSplit

In [10]:
d_clinical = pd.read_csv(os.path.join('data', 'dataset_clinical_cleaned.csv'))
d_genetic = pd.read_csv(os.path.join('data', 'dataset_genetic_cleaned.csv'))
d_vampire = pd.read_csv(os.path.join('data', 'dataset_vampire_cleaned.csv'))
outputs = pd.read_csv(os.path.join('data', 'outputs_cleaned.csv'))

## Heart Attack

In [11]:
y = outputs["cvd_fail"].values
C = d_clinical.values
G = d_genetic.values
V = d_vampire.values

In [12]:
tr_idx, _ = next(StratifiedShuffleSplit(n_splits=1, test_size=0.95).split(C, y))

In [13]:
y = y[tr_idx]
C = C[tr_idx, :]
G = G[tr_idx, :]
V = V[tr_idx, :]

In [14]:
ds_list = [C, G, V]

In [19]:
kernel_type = [{'linear':[0.5], 'polynomial':[2, 3, 7], 'gaussian':[0.1, 0.5, 0.7]},
               {'linear':[0.2], 'polynomial':[4, 5, 8], 'gaussian':[0.7, 1]}]
estimator = ca.centeredKernelAlignment
sampler = ms.mySampler(n_splits=3, test_size=.25)

In [20]:
result1 = sampler.sample(kernel_type, estimator, ds_list, y, valid_fold = 3, verbose=True)

1 split out of 3 ...
	Working on config 1 of 2: {'linear': [0.5], 'polynomial': [2, 3, 7], 'gaussian': [0.1, 0.5, 0.7]}
Fold no. 1
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Fold no. 2
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Fold no. 3
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Validation complete, config selected:[[0.5, 2, 0.1], [0.5, 7, 0.7], [0.5, 2, 0.5]]
Triplet: 0.09234098812440253, <KernelFile.kernelWrapper object at 0x000002383A3D99E8>, [ 1.00490099e-11 -2.19870203e-17 -3.01066195e-02  5.58518592e-06
 -1.08303973e-06 -6.89268415e-01 -5.21428573e-09  4.31943381e-11
  7.23880269e-01]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
	Working on config 2 of 2: {'linear': [0.2], 'polynomial': [4, 5, 8], 'gaussian': [0.7, 1]}
Fold no. 1
		Perfomances computed for 200
Fold no.

  'precision', 'predicted', average, warn_for)


In [21]:
result1.performancesFeatures()

statistics of configuration 0
{'config': {'linear': [[0.5, 0.5, 0.5], [0.5, 0.5, 0.5], [0.5, 0.5, 0.5]], 'polynomial': [[2, 7, 2], [7, 7, 2], [3, 3, 2]], 'gaussian': [[0.1, 0.7, 0.5], [0.7, 0.7, 0.7], [0.7, 0.1, 0.5]]}, 'CA': (0.09371156738384001, 3.864378714677002e-06), 'Accuracy': (0.3142857142857143, 0.0), 'Precision': (0.3142857142857143, 0.0), 'Recall': (1.0, 0.0), 'eta': (array([ 1.36570444e-12, -7.32900676e-18, -5.05859001e-01,  4.43970397e-06,
       -9.57385799e-05, -1.54775623e-01, -6.11208005e-09,  4.79588004e-11,
        6.64758102e-01]), array([4.36027835e-23, 1.07428681e-34, 1.15190499e-01, 1.06232728e-11,
       1.81251458e-08, 1.55990455e-01, 2.90871695e-17, 1.70002109e-21,
       7.06686395e-03]))}
statistics of configuration 1
{'config': {'linear': [[0.2, 0.2, 0.2], [0.2, 0.2, 0.2], [0.2, 0.2, 0.2]], 'polynomial': [[4, 4, 5], [4, 5, 4], [8, 8, 5]], 'gaussian': [[1, 0.7, 0.7], [0.7, 1, 1], [0.7, 1, 0.7]]}, 'CA': (0.01436114825099443, 0.00046362046663154), 'Accuracy': (

In [28]:
# wrapper testing

kernel_type = [{'linear':[0.5], 'polynomial':[2, 3], 'gaussian':[0.1, 0.3, 0.6]}#,
               #{'linear':[0.2], 'polynomial':[4, 5, 8], 'gaussian':[0.7, 1]}
              ]
estimator = ca.centeredKernelAlignment
sampler = ms.mySampler(n_splits=3, test_size=.25, normalize_kernels = True)

In [29]:
result3 = sampler.sample(kernel_type, estimator, ds_list, y, valid_fold = 3, verbose=True)

1 split out of 3 ...
	Working on config 1 of 1: {'linear': [0.5], 'polynomial': [2, 3], 'gaussian': [0.1, 0.3, 0.6]}
Fold no. 1
		Perfomances computed for 200
Fold no. 2
		Perfomances computed for 200
Fold no. 3
		Perfomances computed for 200
Validation complete, config selected:[[0.5, 3, 0.3], [0.5, 2, 0.6], [0.5, 3, 0.1]]
Triplet: 0.09028538758176574, <KernelFile.kernelWrapper object at 0x000002383832FF28>, [-5.03450656e-06  7.93548718e-06  7.02079141e-01  2.72812182e-05
 -1.40103560e-05 -7.12029240e-01  2.23289195e-05 -5.01685364e-06
  9.96191202e-03]
[ 1.  1. -1.  1. -1.  1. -1.  1.  1.  1. -1. -1.  1. -1.  1.  1.  1.  1.
  1.  1. -1. -1.  1. -1. -1.  1.  1.  1.  1. -1.  1.  1. -1. -1. -1.]
	Result of 0:
CA: 0.09028538758176574
[linear:0.5, polynomial:3, gaussian:0.3, ]
[linear:0.5, polynomial:2, gaussian:0.6, ]
[linear:0.5, polynomial:3, gaussian:0.1, ]

eta vector: [-5.03450656e-06  7.93548718e-06  7.02079141e-01  2.72812182e-05
 -1.40103560e-05 -7.12029240e-01  2.23289195e-05 -5

LinAlgError: Singular matrix

In [None]:
result3.performancesFeatures()