In [1]:
import pandas as pd
import os
import numpy as np

import Utils as ut
import CortesAlignmentFile as ca
import mySampler as ms
 
from sklearn.model_selection import StratifiedShuffleSplit

  from collections import Sequence


In [2]:
from sklearn import datasets
# make_classification(n_samples=100, n_features=20, n_informative=2, n_redundant=2, n_repeated=0, 
#                     n_classes=2, n_clusters_per_class=2, weights=None, flip_y=0.01, class_sep=1.0, 
#                     hypercube=True, shift=0.0, scale=1.0, shuffle=True, random_state=None)
ds, label = datasets.make_classification(n_samples=300, n_features=30, n_informative=10, n_redundant=0, n_classes=2)
ds_names = ['ds1', 'ds2', 'ds3']

In [3]:
np.where(label==0)
label[np.where(label==0)]=-1

## Toy

In [4]:
def random_sampling(X, y, n_samples):
    from sklearn.model_selection import StratifiedShuffleSplit
    return next(StratifiedShuffleSplit(n_splits=1, test_size=n_samples).split(X, y))

In [5]:
tr_idx, ts_idx = random_sampling(ds, label, n_samples=0.25)

ds1 = np.hstack([ds[:,:5], ds[:,10:15]])
ds1_tr = ds1[tr_idx]
ds1_ts = ds1[ts_idx]

ds2 = np.hstack([ds[:,5:10], ds[:,15:20]])
ds2_tr = ds2[tr_idx]
ds2_ts = ds2[ts_idx]

ds3 = ds[:,20:]
ds3_tr = ds3[tr_idx]
ds3_ts = ds3[ts_idx]

y_ = label[tr_idx]
y_test = label[ts_idx]

ds_list_complete = [ds1, ds2, ds3]
ds_list = [ds1_tr, ds2_tr, ds3_tr]
ds_test = [ds1_ts, ds2_ts, ds3_ts]

## Basic approach

In [6]:
kernel_names = ['laplacian', 'polynomial', 'gaussian']
kernel_type = [{'laplacian':[0.1, 0.5, 0.7], 'polynomial':[2, 3], 'gaussian':[0.1, 0.5]},
               {'laplacian':[0.7, 1], 'polynomial':[5, 8], 'gaussian':[0.7, 1]}]
estimator = ca.centeredKernelAlignment
sampler = ms.mySampler(n_splits=3, test_size=.25)

In [7]:
result1 = sampler.sample(kernel_type, estimator, ds_list, y_, valid_fold = 3, verbose=True)


1 split out of 3 ...
	Working on config 1 of 2: {'laplacian': [0.1, 0.5, 0.7], 'polynomial': [2, 3], 'gaussian': [0.1, 0.5]}
Fold no. 1
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
		Perfomances computed for 800
		Perfomances computed for 1000
		Perfomances computed for 1200
		Perfomances computed for 1400
		Perfomances computed for 1600
Fold no. 2
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
		Perfomances computed for 800
		Perfomances computed for 1000
		Perfomances computed for 1200
		Perfomances computed for 1400
		Perfomances computed for 1600
Fold no. 3
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
		Perfomances computed for 800
		Perfomances computed for 1000
		Perfomances computed for 1200
		Perfomances computed for 1400
		Perfomances computed for 1600
Validation complete, config selected:[[0.7, 2, 0.1], [0.5, 2, 0.1], [0.7, 2, 0.5]]
	R

In [8]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_, y_test, w_list, ds_list, ds_test, kernel_names)

Perfomances computed for 1 dictionary settings:
	Accuracy: 0.72
	Precision: 0.7352941176470589
	Recall: 0.6756756756756757
Perfomances computed for 2 dictionary settings:
	Accuracy: 0.6266666666666667
	Precision: 0.6153846153846154
	Recall: 0.6486486486486487


In [9]:
w_dict

[{'ds1': {'laplacian': 0.7, 'polynomial': 2, 'gaussian': 0.5},
  'ds2': {'laplacian': 0.5, 'polynomial': 2, 'gaussian': 0.1},
  'ds3': {'laplacian': 0.7, 'polynomial': 3, 'gaussian': 0.5}},
 {'ds1': {'laplacian': 0.7, 'polynomial': 5, 'gaussian': 0.7},
  'ds2': {'laplacian': 0.7, 'polynomial': 5, 'gaussian': 1},
  'ds3': {'laplacian': 1, 'polynomial': 5, 'gaussian': 0.7}}]

In [10]:
result1.performancesFeatures()

statistics of configuration 1
{'config': {'laplacian': [[0.7, 0.5, 0.7], [0.7, 0.5, 0.1], [0.7, 0.5, 0.7]], 'polynomial': [[2, 2, 2], [3, 2, 3], [2, 2, 3]], 'gaussian': [[0.1, 0.1, 0.5], [0.5, 0.1, 0.5], [0.5, 0.1, 0.1]]}, 'CA': (0.20992099992347793, 0.00013493209654370412), 'Accuracy': (0.7836257309941521, 6.839711364180474e-05), 'Precision': (0.8135097001763668, 0.0004920081246947793), 'Recall': (0.7471264367816092, 0.00026423569824283197), 'eta': (array([ 6.14655515e-01,  2.62587780e-03, -6.87598633e-02, -7.58343223e-01,
        4.20339794e-03,  8.92650108e-02,  7.97663773e-02, -2.75938366e-04,
        4.96379394e-02]), array([5.17319203e-03, 3.14652425e-06, 3.05652368e-03, 3.09249022e-03,
       2.58075570e-06, 3.36084510e-04, 1.03178585e-02, 2.92671854e-07,
       3.58458466e-03]))}
statistics of configuration 2
{'config': {'laplacian': [[0.7, 0.7, 1], [0.7, 0.7, 1], [0.7, 0.7, 0.7]], 'polynomial': [[5, 5, 5], [5, 5, 5], [5, 5, 5]], 'gaussian': [[0.7, 1, 0.7], [0.7, 0.7, 0.7], [0.

In [11]:
kernel_names = ['sigmoid', 'polynomial', 'gaussian']
kernel_type = [{'sigmoid':[0.1, 0.5, 0.7], 'polynomial':[2, 3], 'gaussian':[0.1, 0.5]},
               {'sigmoid':[0.7, 1], 'polynomial':[5, 8], 'gaussian':[0.7, 1]}]
estimator = ca.centeredKernelAlignment
sampler = ms.mySampler(n_splits=3, test_size=.25)

In [12]:
result1 = sampler.sample(kernel_type, estimator, ds_list, y_, valid_fold = 3, verbose=True)


1 split out of 3 ...
	Working on config 1 of 2: {'sigmoid': [0.1, 0.5, 0.7], 'polynomial': [2, 3], 'gaussian': [0.1, 0.5]}
Fold no. 1
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
		Perfomances computed for 800
		Perfomances computed for 1000
		Perfomances computed for 1200
		Perfomances computed for 1400
		Perfomances computed for 1600
Fold no. 2
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
		Perfomances computed for 800
		Perfomances computed for 1000
		Perfomances computed for 1200
		Perfomances computed for 1400
		Perfomances computed for 1600
Fold no. 3
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
		Perfomances computed for 800
		Perfomances computed for 1000
		Perfomances computed for 1200
		Perfomances computed for 1400
		Perfomances computed for 1600
Validation complete, config selected:[[0.1, 2, 0.5], [0.5, 2, 0.1], [0.1, 3, 0.5]]
	Res

In [13]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_, y_test, w_list, ds_list, ds_test, kernel_names)

Perfomances computed for 1 dictionary settings:
	Accuracy: 0.68
	Precision: 0.7096774193548387
	Recall: 0.5945945945945946
Perfomances computed for 2 dictionary settings:
	Accuracy: 0.6266666666666667
	Precision: 0.6363636363636364
	Recall: 0.5675675675675675


In [14]:
w_dict

[{'ds1': {'sigmoid': 0.1, 'polynomial': 2, 'gaussian': 0.5},
  'ds2': {'sigmoid': 0.7, 'polynomial': 2, 'gaussian': 0.1},
  'ds3': {'sigmoid': 0.1, 'polynomial': 3, 'gaussian': 0.5}},
 {'ds1': {'sigmoid': 1, 'polynomial': 5, 'gaussian': 0.7},
  'ds2': {'sigmoid': 1, 'polynomial': 5, 'gaussian': 1},
  'ds3': {'sigmoid': 0.7, 'polynomial': 8, 'gaussian': 0.7}}]

In [15]:
result1.performancesFeatures()

statistics of configuration 1
{'config': {'sigmoid': [[0.1, 0.5, 0.1], [0.1, 0.7, 0.1], [0.1, 0.7, 0.1]], 'polynomial': [[2, 2, 3], [2, 2, 3], [2, 2, 3]], 'gaussian': [[0.5, 0.1, 0.5], [0.5, 0.1, 0.5], [0.5, 0.5, 0.1]]}, 'CA': (0.20399896753470548, 0.00011223200538800328), 'Accuracy': (0.8245614035087718, 0.00020519134092541295), 'Precision': (0.856172839506173, 0.0022778539856729125), 'Recall': (0.7931034482758621, 0.0007927070947284983), 'eta': (array([-0.12504215,  0.0429415 , -0.35054069,  0.03899163,  0.04438565,
       -0.11057872, -0.15040946,  0.00325964,  0.53593381]), array([5.62125962e-04, 2.09020886e-04, 3.44779262e-01, 4.14522297e-04,
       4.00645077e-04, 1.64060298e-01, 1.52988202e-02, 6.00092871e-06,
       8.33424411e-03]))}
statistics of configuration 2
{'config': {'sigmoid': [[1, 1, 0.7], [1, 1, 0.7], [0.7, 1, 0.7]], 'polynomial': [[5, 5, 5], [5, 5, 8], [5, 5, 8]], 'gaussian': [[0.7, 1, 0.7], [0.7, 1, 0.7], [1, 1, 0.7]]}, 'CA': (0.14951426934052173, 4.65596561519697

## Kernel normalization

In [16]:
kernel_names = ['laplacian', 'polynomial', 'gaussian']
kernel_type = [{'laplacian':[0.1, 0.5, 0.7], 'polynomial':[2, 3], 'gaussian':[0.1, 0.5]},
               {'laplacian':[0.7, 1], 'polynomial':[5, 8], 'gaussian':[0.7, 1]}]
estimator = ca.centeredKernelAlignment
sampler = ms.mySampler(n_splits=3, test_size=.25, normalize_kernels = True)

In [17]:
result3 = sampler.sample(kernel_type, estimator, ds_list, y_, valid_fold = 3, verbose=True)


1 split out of 3 ...
	Working on config 1 of 2: {'laplacian': [0.1, 0.5, 0.7], 'polynomial': [2, 3], 'gaussian': [0.1, 0.5]}
Fold no. 1
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
		Perfomances computed for 800
		Perfomances computed for 1000
		Perfomances computed for 1200
		Perfomances computed for 1400
		Perfomances computed for 1600
Fold no. 2
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
		Perfomances computed for 800
		Perfomances computed for 1000
		Perfomances computed for 1200
		Perfomances computed for 1400
		Perfomances computed for 1600
Fold no. 3
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
		Perfomances computed for 800
		Perfomances computed for 1000
		Perfomances computed for 1200
		Perfomances computed for 1400
		Perfomances computed for 1600
Validation complete, config selected:[[0.1, 2, 0.1], [0.1, 2, 0.1], [0.1, 3, 0.1]]
	R

In [18]:
w_dict, w_list = result3.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_, y_test, w_list, ds_list, ds_test, kernel_names)

Perfomances computed for 1 dictionary settings:
	Accuracy: 0.6533333333333333
	Precision: 0.6571428571428571
	Recall: 0.6216216216216216
Perfomances computed for 2 dictionary settings:
	Accuracy: 0.6266666666666667
	Precision: 0.6097560975609756
	Recall: 0.6756756756756757


In [19]:
w_dict

[{'ds1': {'laplacian': 0.1, 'polynomial': 2, 'gaussian': 0.1},
  'ds2': {'laplacian': 0.1, 'polynomial': 2, 'gaussian': 0.1},
  'ds3': {'laplacian': 0.1, 'polynomial': 3, 'gaussian': 0.1}},
 {'ds1': {'laplacian': 0.7, 'polynomial': 5, 'gaussian': 0.7},
  'ds2': {'laplacian': 0.7, 'polynomial': 5, 'gaussian': 0.7},
  'ds3': {'laplacian': 1, 'polynomial': 8, 'gaussian': 0.7}}]

In [20]:
result3.performancesFeatures()

statistics of configuration 1
{'config': {'laplacian': [[0.1, 0.1, 0.1], [0.1, 0.5, 0.1], [0.1, 0.1, 0.7]], 'polynomial': [[2, 2, 3], [2, 2, 3], [3, 2, 2]], 'gaussian': [[0.1, 0.1, 0.1], [0.1, 0.1, 0.1], [0.1, 0.1, 0.1]]}, 'CA': (0.18610658120373405, 6.857163696358553e-05), 'Accuracy': (0.8187134502923975, 0.0004787797954926293), 'Precision': (0.859047619047619, 0.0002684807256235833), 'Recall': (0.7701149425287356, 0.001849649887699828), 'eta': (array([-0.39429685,  0.3661883 ,  0.09406684,  0.01747295,  0.76457308,
        0.03044364,  0.03462316, -0.16471734,  0.13638624]), array([0.02971482, 0.00811473, 0.0011146 , 0.00121664, 0.01722287,
       0.00308383, 0.00186114, 0.00461318, 0.00190984]))}
statistics of configuration 2
{'config': {'laplacian': [[0.7, 0.7, 1], [0.7, 0.7, 1], [0.7, 0.7, 1]], 'polynomial': [[8, 5, 8], [5, 5, 8], [5, 5, 5]], 'gaussian': [[0.7, 0.7, 0.7], [0.7, 0.7, 1], [0.7, 0.7, 0.7]]}, 'CA': (0.14669048267466384, 2.2943917552913315e-05), 'Accuracy': (0.73099415

In [None]:
kernel_names = ['sigmoid', 'polynomial', 'gaussian']
kernel_type = [{'sigmoid':[0.1, 0.5, 0.7], 'polynomial':[2, 3], 'gaussian':[0.1, 0.5]},
               {'sigmoid':[0.7, 1], 'polynomial':[5, 8], 'gaussian':[0.7, 1]}]
estimator = ca.centeredKernelAlignment
sampler = ms.mySampler(n_splits=3, test_size=.25, normalize_kernels = True)

In [None]:
result1 = sampler.sample(kernel_type, estimator, ds_list, y_, valid_fold = 3, verbose=True)

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_, y_test, w_list, ds_list, ds_test, kernel_names)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

## Normalized data

In [None]:
kernel_names = ['laplacian', 'polynomial', 'gaussian']
kernel_type = [{'laplacian':[0.1, 0.5, 0.7], 'polynomial':[2, 3], 'gaussian':[0.1, 0.5]},
               {'laplacian':[0.7, 1], 'polynomial':[5, 8], 'gaussian':[0.7, 1]}]
estimator = ca.centeredKernelAlignment
sampler = ms.mySampler(n_splits=3, test_size=.25, normalizing = True)

In [None]:
result4 = sampler.sample(kernel_type, estimator, ds_list, y_, valid_fold = 3, verbose=True)

In [None]:
w_dict, w_list = result4.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_, y_test, w_list, ds_list, ds_test, kernel_names)

In [None]:
w_dict

In [None]:
result4.performancesFeatures()

In [None]:
kernel_names = ['sigmoid', 'polynomial', 'gaussian']
kernel_type = [{'sigmoid':[0.1, 0.5, 0.7], 'polynomial':[2, 3], 'gaussian':[0.1, 0.5]},
               {'sigmoid':[0.7, 1], 'polynomial':[5, 8], 'gaussian':[0.7, 1]}]
estimator = ca.centeredKernelAlignment
sampler = ms.mySampler(n_splits=3, test_size=.25, normalizing = True)

In [None]:
result1 = sampler.sample(kernel_type, estimator, ds_list, y_, valid_fold = 3, verbose=True)

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_, y_test, w_list, ds_list, ds_test, kernel_names)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

## Origin Data Centering

In [None]:
kernel_names = ['laplacian', 'polynomial', 'gaussian']
kernel_type = [{'laplacian':[0.1, 0.5, 0.7], 'polynomial':[2, 3], 'gaussian':[0.1, 0.5]},
               {'laplacian':[0.7, 1], 'polynomial':[5, 8], 'gaussian':[0.7, 1]}]
estimator = ca.centeredKernelAlignment
sampler = ms.mySampler(n_splits=3, test_size=.25, centering = True)

In [None]:
result5 = sampler.sample(kernel_type, estimator, ds_list, y_, valid_fold = 3, verbose=True)

In [None]:
w_dict, w_list = result5.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_, y_test, w_list, ds_list, ds_test, kernel_names)

In [None]:
w_dict

In [None]:
result5.performancesFeatures()

In [None]:
kernel_names = ['sigmoid', 'polynomial', 'gaussian']
kernel_type = [{'sigmoid':[0.1, 0.5, 0.7], 'polynomial':[2, 3], 'gaussian':[0.1, 0.5]},
               {'sigmoid':[0.7, 1], 'polynomial':[5, 8], 'gaussian':[0.7, 1]}]
estimator = ca.centeredKernelAlignment
sampler = ms.mySampler(n_splits=3, test_size=.25, centering = True)

In [None]:
result1 = sampler.sample(kernel_type, estimator, ds_list, y_, valid_fold = 3, verbose=True)

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_, y_test, w_list, ds_list, ds_test, kernel_names)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

## Origin Data  Centering and Normalization

In [None]:
kernel_names = ['laplacian', 'polynomial', 'gaussian']
kernel_type = [{'laplacian':[0.1, 0.5, 0.7], 'polynomial':[2, 3], 'gaussian':[0.1, 0.5]},
               {'laplacian':[0.7, 1], 'polynomial':[5, 8], 'gaussian':[0.7, 1]}]
estimator = ca.centeredKernelAlignment
sampler = ms.mySampler(n_splits=3, test_size=.25, centering = True, normalizing = True)

In [None]:
result6 = sampler.sample(kernel_type, estimator, ds_list, y_, valid_fold = 3, verbose=True)

In [None]:
w_dict, w_list = result6.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_, y_test, w_list, ds_list, ds_test, kernel_names)

In [None]:
w_dict

In [None]:
result6.performancesFeatures()

In [None]:
kernel_names = ['sigmoid', 'polynomial', 'gaussian']
kernel_type = [{'sigmoid':[0.1, 0.5, 0.7], 'polynomial':[2, 3], 'gaussian':[0.1, 0.5]},
               {'sigmoid':[0.7, 1], 'polynomial':[5, 8], 'gaussian':[0.7, 1]}]
estimator = ca.centeredKernelAlignment
sampler = ms.mySampler(n_splits=3, test_size=.25, centering = True, normalizing = True)

In [None]:
result1 = sampler.sample(kernel_type, estimator, ds_list, y_, valid_fold = 3, verbose=True)

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_, y_test, w_list, ds_list, ds_test, kernel_names)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

## Normalization, Kernel Normalization

In [None]:
kernel_names = ['laplacian', 'polynomial', 'gaussian']
kernel_type = [{'laplacian':[0.1, 0.5, 0.7], 'polynomial':[2, 3], 'gaussian':[0.1, 0.5]},
               {'laplacian':[0.7, 1], 'polynomial':[5, 8], 'gaussian':[0.7, 1]}]
estimator = ca.centeredKernelAlignment
sampler = ms.mySampler(n_splits=3, test_size=.25, normalizing = True, normalize_kernels = True)

In [None]:
result10 = sampler.sample(kernel_type, estimator, ds_list, y_, valid_fold = 3, verbose=True)

In [None]:
w_dict, w_list = result10.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_, y_test, w_list, ds_list, ds_test, kernel_names)

In [None]:
w_dict

In [None]:
result10.performancesFeatures()

In [None]:
kernel_names = ['sigmoid', 'polynomial', 'gaussian']
kernel_type = [{'sigmoid':[0.1, 0.5, 0.7], 'polynomial':[2, 3], 'gaussian':[0.1, 0.5]},
               {'sigmoid':[0.7, 1], 'polynomial':[5, 8], 'gaussian':[0.7, 1]}]
estimator = ca.centeredKernelAlignment
sampler = ms.mySampler(n_splits=3, test_size=.25, normalizing = True, normalize_kernels = True)

In [None]:
result1 = sampler.sample(kernel_type, estimator, ds_list, y_, valid_fold = 3, verbose=True)

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_, y_test, w_list, ds_list, ds_test, kernel_names)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

## Centering, Kernel Normalization

In [None]:
kernel_names = ['laplacian', 'polynomial', 'gaussian']
kernel_type = [{'laplacian':[0.1, 0.5, 0.7], 'polynomial':[2, 3], 'gaussian':[0.1, 0.5]},
               {'laplacian':[0.7, 1], 'polynomial':[5, 8], 'gaussian':[0.7, 1]}]
estimator = ca.centeredKernelAlignment
sampler = ms.mySampler(n_splits=3, test_size=.25, centering = True, normalize_kernels = True)

In [None]:
result11 = sampler.sample(kernel_type, estimator, ds_list, y_, valid_fold = 3, verbose=True)

In [None]:
w_dict, w_list = result11.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_, y_test, w_list, ds_list, ds_test, kernel_names)

In [None]:
w_dict

In [None]:
result11.performancesFeatures()

In [None]:
kernel_names = ['sigmoid', 'polynomial', 'gaussian']
kernel_type = [{'sigmoid':[0.1, 0.5, 0.7], 'polynomial':[2, 3], 'gaussian':[0.1, 0.5]},
               {'sigmoid':[0.7, 1], 'polynomial':[5, 8], 'gaussian':[0.7, 1]}]
estimator = ca.centeredKernelAlignment
sampler = ms.mySampler(n_splits=3, test_size=.25, centering = True, normalize_kernels = True)

In [None]:
result1 = sampler.sample(kernel_type, estimator, ds_list, y_, valid_fold = 3, verbose=True)

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_, y_test, w_list, ds_list, ds_test, kernel_names)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

## Centering, Normalization, Kernel Normalization

In [None]:
kernel_names = ['laplacian', 'polynomial', 'gaussian']
kernel_type = [{'laplacian':[0.1, 0.5, 0.7], 'polynomial':[2, 3], 'gaussian':[0.1, 0.5]},
               {'laplacian':[0.7, 1], 'polynomial':[5, 8], 'gaussian':[0.7, 1]}]
estimator = ca.centeredKernelAlignment
sampler = ms.mySampler(n_splits=3, test_size=.25, centering = True, normalizing = True, normalize_kernels = True)

In [None]:
result7 = sampler.sample(kernel_type, estimator, ds_list, y_, valid_fold = 3, verbose=True)

In [None]:
w_dict, w_list = result7.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_, y_test, w_list, ds_list, ds_test, kernel_names)

In [None]:
w_dict

In [None]:
result7.performancesFeatures()

In [None]:
kernel_names = ['sigmoid', 'polynomial', 'gaussian']
kernel_type = [{'sigmoid':[0.1, 0.5, 0.7], 'polynomial':[2, 3], 'gaussian':[0.1, 0.5]},
               {'sigmoid':[0.7, 1], 'polynomial':[5, 8], 'gaussian':[0.7, 1]}]
estimator = ca.centeredKernelAlignment
sampler = ms.mySampler(n_splits=3, test_size=.25, centering = True, normalizing = True, normalize_kernels = True)

In [None]:
result1 = sampler.sample(kernel_type, estimator, ds_list, y_, valid_fold = 3, verbose=True)

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_, y_test, w_list, ds_list, ds_test, kernel_names)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

## L2 Penalty, Centering, K-Normalization

In [21]:
kernel_names = ['laplacian', 'polynomial', 'gaussian']
kernel_type = [{'laplacian':[0.1, 0.5, 0.7], 'polynomial':[2, 3], 'gaussian':[0.1, 0.5]},
               {'laplacian':[0.7, 1], 'polynomial':[5, 8], 'gaussian':[0.7, 1]}]
estimator = ca.centeredKernelAlignment
sampler = ms.mySampler(n_splits=3, test_size=.25, lamb = 0.5, centering = True, normalize_kernels = False)

In [22]:
result9 = sampler.sample(kernel_type, estimator, ds_list, y_, valid_fold = 3, verbose=True)


1 split out of 3 ...
	Working on config 1 of 2: {'laplacian': [0.1, 0.5, 0.7], 'polynomial': [2, 3], 'gaussian': [0.1, 0.5]}
Fold no. 1
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
		Perfomances computed for 800
		Perfomances computed for 1000
		Perfomances computed for 1200
		Perfomances computed for 1400
		Perfomances computed for 1600
Fold no. 2
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
		Perfomances computed for 800
		Perfomances computed for 1000
		Perfomances computed for 1200
		Perfomances computed for 1400
		Perfomances computed for 1600
Fold no. 3
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
		Perfomances computed for 800
		Perfomances computed for 1000
		Perfomances computed for 1200
		Perfomances computed for 1400
		Perfomances computed for 1600
Validation complete, config selected:[[0.1, 2, 0.5], [0.5, 3, 0.5], [0.1, 2, 0.1]]
	R

In [23]:
w_dict, w_list = result9.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_, y_test, w_list, ds_list, ds_test, kernel_names)

Perfomances computed for 1 dictionary settings:
	Accuracy: 0.64
	Precision: 0.6470588235294118
	Recall: 0.5945945945945946
Perfomances computed for 2 dictionary settings:
	Accuracy: 0.6
	Precision: 0.5813953488372093
	Recall: 0.6756756756756757


In [24]:
w_dict

[{'ds1': {'laplacian': 0.1, 'polynomial': 3, 'gaussian': 0.5},
  'ds2': {'laplacian': 0.5, 'polynomial': 3, 'gaussian': 0.5},
  'ds3': {'laplacian': 0.1, 'polynomial': 2, 'gaussian': 0.1}},
 {'ds1': {'laplacian': 0.7, 'polynomial': 5, 'gaussian': 0.7},
  'ds2': {'laplacian': 0.7, 'polynomial': 8, 'gaussian': 0.7},
  'ds3': {'laplacian': 1, 'polynomial': 5, 'gaussian': 1}}]

In [25]:
result9.performancesFeatures()

statistics of configuration 1
{'config': {'laplacian': [[0.1, 0.5, 0.1], [0.1, 0.5, 0.1], [0.1, 0.1, 0.1]], 'polynomial': [[2, 3, 2], [3, 3, 2], [3, 3, 3]], 'gaussian': [[0.5, 0.5, 0.1], [0.5, 0.5, 0.1], [0.5, 0.5, 0.5]]}, 'CA': (0.18077790470796481, 0.00023040177045593088), 'Accuracy': (0.15789473684210525, 0.002667487432030368), 'Precision': (0.17257799671592777, 0.0024234727581083962), 'Recall': (0.1724137931034483, 0.0023781212841854937), 'eta': (array([-0.0900899 , -0.35997713,  0.30312374, -0.25570717, -0.51230399,
        0.62872117,  0.07178047, -0.00322436, -0.04793645]), array([0.00148177, 0.00613266, 0.00080319, 0.01754692, 0.00776831,
       0.00045487, 0.00030987, 0.00318854, 0.00213755]))}
statistics of configuration 2
{'config': {'laplacian': [[0.7, 0.7, 0.7], [0.7, 0.7, 1], [0.7, 0.7, 1]], 'polynomial': [[5, 5, 5], [5, 8, 5], [8, 8, 5]], 'gaussian': [[0.7, 0.7, 1], [0.7, 0.7, 1], [1, 1, 1]]}, 'CA': (0.17954257290908307, 0.00019000578769002644), 'Accuracy': (0.1637426900

In [31]:
kernel_names = ['sigmoid', 'polynomial', 'gaussian']
kernel_type = [{'sigmoid':[0.1, 0.5, 0.7], 'polynomial':[2, 3], 'gaussian':[0.1, 0.5]},
               {'sigmoid':[0.7, 1], 'polynomial':[5, 8], 'gaussian':[0.7, 1]}]
estimator = ca.centeredKernelAlignment
sampler = ms.mySampler(n_splits=3, test_size=.25, lamb = 0.5, centering = True, normalize_kernels = False)

In [32]:
result1 = sampler.sample(kernel_type, estimator, ds_list, y_, valid_fold = 3, verbose=True)


1 split out of 3 ...
	Working on config 1 of 2: {'sigmoid': [0.1, 0.5, 0.7], 'polynomial': [2, 3], 'gaussian': [0.1, 0.5]}
Fold no. 1
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
		Perfomances computed for 800
		Perfomances computed for 1000
		Perfomances computed for 1200
		Perfomances computed for 1400
		Perfomances computed for 1600
Fold no. 2
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
		Perfomances computed for 800
		Perfomances computed for 1000
		Perfomances computed for 1200
		Perfomances computed for 1400
		Perfomances computed for 1600
Fold no. 3
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
		Perfomances computed for 800
		Perfomances computed for 1000
		Perfomances computed for 1200
		Perfomances computed for 1400
		Perfomances computed for 1600
Validation complete, config selected:[[0.7, 2, 0.5], [0.7, 3, 0.1], [0.1, 2, 0.1]]
	Res

In [28]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_, y_test, w_list, ds_list, ds_test, kernel_names)

Perfomances computed for 1 dictionary settings:
	Accuracy: 0.68
	Precision: 0.696969696969697
	Recall: 0.6216216216216216
Perfomances computed for 2 dictionary settings:
	Accuracy: 0.6133333333333333
	Precision: 0.6176470588235294
	Recall: 0.5675675675675675


In [29]:
w_dict

[{'ds1': {'sigmoid': 0.7, 'polynomial': 3, 'gaussian': 0.1},
  'ds2': {'sigmoid': 0.7, 'polynomial': 3, 'gaussian': 0.5},
  'ds3': {'sigmoid': 0.1, 'polynomial': 3, 'gaussian': 0.1}},
 {'ds1': {'sigmoid': 1, 'polynomial': 8, 'gaussian': 1},
  'ds2': {'sigmoid': 1, 'polynomial': 8, 'gaussian': 1},
  'ds3': {'sigmoid': 0.7, 'polynomial': 8, 'gaussian': 0.7}}]

In [30]:
result1.performancesFeatures()

statistics of configuration 1
{'config': {'sigmoid': [[0.7, 0.7, 0.1], [0.7, 0.7, 0.1], [0.1, 0.7, 0.1]], 'polynomial': [[3, 2, 3], [2, 3, 3], [3, 3, 2]], 'gaussian': [[0.1, 0.1, 0.1], [0.5, 0.5, 0.1], [0.1, 0.5, 0.1]]}, 'CA': (0.20019448391297434, 9.134122560720207e-05), 'Accuracy': (0.48538011695906436, 0.04787797954926303), 'Precision': (0.5152688172043011, 0.0485608972135507), 'Recall': (0.4597701149425288, 0.028801691108468752), 'eta': (array([ 0.2387452 , -0.09466533,  0.14012141, -0.18278146,  0.05005148,
        0.47719908, -0.09518994, -0.02334256,  0.01877751]), array([0.22641179, 0.11243009, 0.00967371, 0.05040529, 0.23235589,
       0.00521054, 0.00310299, 0.00040844, 0.00081467]))}
statistics of configuration 2
{'config': {'sigmoid': [[1, 1, 0.7], [1, 1, 1], [0.7, 0.7, 0.7]], 'polynomial': [[8, 8, 5], [8, 8, 8], [8, 5, 8]], 'gaussian': [[1, 1, 0.7], [1, 1, 1], [0.7, 0.7, 0.7]]}, 'CA': (0.20368636313169639, 0.0001111994985962757), 'Accuracy': (0.7309941520467836, 0.02489654

## Sparsity in eta, Centering, K-Normalization

In [None]:
kernel_names = ['laplacian', 'polynomial', 'gaussian']
kernel_type = [{'laplacian':[0.1, 0.5, 0.7], 'polynomial':[2, 3], 'gaussian':[0.1, 0.5]},
               {'laplacian':[0.7, 1], 'polynomial':[5, 8], 'gaussian':[0.7, 1]}]
estimator = ca.centeredKernelAlignment
sampler = ms.mySampler(n_splits=3, test_size=.25, sparsity = 0.3, centering = True, normalize_kernels = True)

In [None]:
result8 = sampler.sample(kernel_type, estimator, ds_list, y_, valid_fold = 3, verbose=True)

In [None]:
w_dict, w_list = result8.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_, y_test, w_list, ds_list, ds_test, kernel_names)

In [None]:
w_dict

In [None]:
result8.performancesFeatures()

In [None]:
kernel_names = ['sigmoid', 'polynomial', 'gaussian']
kernel_type = [{'sigmoid':[0.1, 0.5, 0.7], 'polynomial':[2, 3], 'gaussian':[0.1, 0.5]},
               {'sigmoid':[0.7, 1], 'polynomial':[5, 8], 'gaussian':[0.7, 1]}]
estimator = ca.centeredKernelAlignment
sampler = ms.mySampler(n_splits=3, test_size=.25, sparsity = 0.3, centering = True, normalize_kernels = True)

In [None]:
result1 = sampler.sample(kernel_type, estimator, ds_list, y_, valid_fold = 3, verbose=True)

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_, y_test, w_list, ds_list, ds_test, kernel_names)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()