In [1]:
import pandas as pd
import os
import numpy as np

import Utils as ut
import CortesAlignmentFile as ca
import mySampler as ms
 
from sklearn.model_selection import StratifiedShuffleSplit

  from collections import Sequence


In [2]:
from sklearn import datasets
# make_classification(n_samples=100, n_features=20, n_informative=2, n_redundant=2, n_repeated=0, 
#                     n_classes=2, n_clusters_per_class=2, weights=None, flip_y=0.01, class_sep=1.0, 
#                     hypercube=True, shift=0.0, scale=1.0, shuffle=True, random_state=None)
ds, label = datasets.make_classification(n_samples=300, n_features=30, n_informative=10, n_redundant=0, n_classes=2)
ds_names = ['ds1', 'ds2', 'ds3']

In [3]:
np.where(label==0)
label[np.where(label==0)]=-1

## Toy

In [4]:
def random_sampling(X, y, n_samples):
    from sklearn.model_selection import StratifiedShuffleSplit
    return next(StratifiedShuffleSplit(n_splits=1, test_size=n_samples).split(X, y))

In [5]:
tr_idx, ts_idx = random_sampling(ds, label, n_samples=0.25)

ds1 = np.hstack([ds[:,:5], ds[:,10:15]])
ds1_tr = ds1[tr_idx]
ds1_ts = ds1[ts_idx]

ds2 = np.hstack([ds[:,5:10], ds[:,15:20]])
ds2_tr = ds2[tr_idx]
ds2_ts = ds2[ts_idx]

ds3 = ds[:,20:]
ds3_tr = ds3[tr_idx]
ds3_ts = ds3[ts_idx]

y_ = label[tr_idx]
y_test = label[ts_idx]

ds_list_complete = [ds1, ds2, ds3]
ds_list = [ds1_tr, ds2_tr, ds3_tr]
ds_test = [ds1_ts, ds2_ts, ds3_ts]

## Basic approach

In [8]:
kernel_type = [{'laplacian':[0.1, 0.5], 'polynomial':[2, 3], 'gaussian':[0.1, 0.5]},
               {'laplacian':[0.7, 1], 'polynomial':[5, 8], 'gaussian':[0.7, 1]},
               {'sigmoid':[0.1, 0.5], 'polynomial':[2, 3], 'gaussian':[0.1, 0.5]},
               {'sigmoid':[0.7, 1], 'polynomial':[5, 8], 'gaussian':[0.7, 1]}]
estimator = ca.centeredKernelAlignment
sampler = ms.mySampler(n_splits=3, test_size=.25)

In [9]:
result1 = sampler.sample(kernel_type, estimator, ds_list, y_, valid_fold = 3, verbose=True)


1 split out of 3 ...
	Working on config 1 of 4: {'laplacian': [0.1, 0.5], 'polynomial': [2, 3], 'gaussian': [0.1, 0.5]}
Fold no. 1
		Perfomances computed for 200
		Perfomances computed for 400
Fold no. 2
		Perfomances computed for 200
		Perfomances computed for 400
Fold no. 3
		Perfomances computed for 200
		Perfomances computed for 400
Validation complete, config selected:[[0.5, 2, 0.5], [0.1, 2, 0.5], [0.1, 2, 0.1]]
	Working on config 2 of 4: {'laplacian': [0.7, 1], 'polynomial': [5, 8], 'gaussian': [0.7, 1]}
Fold no. 1
		Perfomances computed for 200
		Perfomances computed for 400
Fold no. 2
		Perfomances computed for 200
		Perfomances computed for 400
Fold no. 3
		Perfomances computed for 200
		Perfomances computed for 400
Validation complete, config selected:[[1, 5, 1], [0.7, 5, 0.7], [0.7, 5, 1]]
	Working on config 3 of 4: {'sigmoid': [0.1, 0.5], 'polynomial': [2, 3], 'gaussian': [0.1, 0.5]}
Fold no. 1
		Perfomances computed for 200
		Perfomances computed for 400
Fold no. 2
		Per

In [10]:
result1.global_best_

[[{'CA': 0.23573761202500557,
   'Accuracy': 0.7543859649122807,
   'Precision': 0.75,
   'Recall': 0.75,
   'config': <KernelFile.kernelWrapper at 0x120545b70>,
   'eta': array([-0.8480617 ,  0.01056839,  0.377632  ,  0.12833188,  0.00420076,
           0.33428676, -0.07563046,  0.00430986,  0.06403784])},
  {'CA': 0.11581397841650364,
   'Accuracy': 0.7192982456140351,
   'Precision': 0.7307692307692307,
   'Recall': 0.6785714285714286,
   'config': <KernelFile.kernelWrapper at 0x1a227a00b8>,
   'eta': array([-5.26420455e-01,  3.15339670e-06,  1.44550366e-01,  7.28777399e-01,
           3.37592828e-07, -3.58738849e-01,  1.53528554e-01,  5.08390662e-07,
          -1.36402419e-01])},
  {'CA': 0.2545326717003451,
   'Accuracy': 0.7719298245614035,
   'Precision': 0.7586206896551724,
   'Recall': 0.7857142857142857,
   'config': <KernelFile.kernelWrapper at 0x1a227f2048>,
   'eta': array([ 0.3204535 ,  0.03015456, -0.7221118 ,  0.16979901,  0.0293851 ,
           0.5385448 , -0.13738055,

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_, y_test, w_list, ds_list, ds_test, kernel_names)

In [11]:
result1.performancesFeatures()

statistics of configuration 0
{'config': {'laplacian': [[0.5, 0.1, 0.1], [0.1, 0.1, 0.5], [0.5, 0.1, 0.1]], 'polynomial': [[2, 2, 2], [2, 2, 3], [2, 2, 2]], 'gaussian': [[0.5, 0.5, 0.1], [0.1, 0.1, 0.5], [0.5, 0.1, 0.1]]}, 'CA': (0.23029475120573206, 1.5554451960119094e-05), 'Accuracy': (0.7660818713450293, 0.0002735884545672172), 'Precision': (0.7636752136752136, 0.001015048579151145), 'Recall': (0.7619047619047619, 0.0002834467120181403), 'eta': (array([-0.46649898,  0.0335352 ,  0.0742536 ,  0.2787281 ,  0.00865763,
        0.19964151, -0.00829271,  0.0097436 , -0.11853566]), array([1.79796882e-01, 2.64315400e-04, 5.81759239e-02, 2.32125644e-02,
       9.93253422e-06, 1.28653387e-02, 1.07574577e-01, 5.32287440e-05,
       2.61952284e-01]))}
statistics of configuration 1
{'config': {'laplacian': [[1, 0.7, 0.7], [0.7, 0.7, 0.7], [0.7, 0.7, 1]], 'polynomial': [[5, 5, 5], [5, 5, 5], [5, 5, 5]], 'gaussian': [[1, 0.7, 1], [0.7, 1, 0.7], [0.7, 0.7, 0.7]]}, 'CA': (0.1153164285193487, 1.9098

## Kernel normalization

In [None]:
kernel_names = ['linear', 'polynomial', 'gaussian']
kernel_type = [{'laplacian':[0.1, 0.5], 'polynomial':[2, 3], 'gaussian':[0.1, 0.5]},
               {'laplacian':[0.7, 1], 'polynomial':[5, 8], 'gaussian':[0.7, 1]},
               {'sigmoid':[0.1, 0.5], 'polynomial':[2, 3], 'gaussian':[0.1, 0.5]},
               {'sigmoid':[0.7, 1], 'polynomial':[5, 8], 'gaussian':[0.7, 1]}]
estimator = ca.centeredKernelAlignment
sampler = ms.mySampler(n_splits=3, test_size=.25, normalize_kernels = True)

In [None]:
result3 = sampler.sample(kernel_type, estimator, ds_list, y_, valid_fold = 3, verbose=True)

In [None]:
w_dict, w_list = result3.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_, y_test, w_list, ds_list, ds_test, kernel_names)

In [None]:
w_dict

In [None]:
result3.performancesFeatures()

## Normalized data

In [None]:
kernel_names = ['linear', 'polynomial', 'gaussian']
kernel_type = [{'laplacian':[0.1, 0.5], 'polynomial':[2, 3], 'gaussian':[0.1, 0.5]},
               {'laplacian':[0.7, 1], 'polynomial':[5, 8], 'gaussian':[0.7, 1]},
               {'sigmoid':[0.1, 0.5], 'polynomial':[2, 3], 'gaussian':[0.1, 0.5]},
               {'sigmoid':[0.7, 1], 'polynomial':[5, 8], 'gaussian':[0.7, 1]}]
estimator = ca.centeredKernelAlignment
sampler = ms.mySampler(n_splits=3, test_size=.25, normalizing = True)

In [None]:
result4 = sampler.sample(kernel_type, estimator, ds_list, y_, valid_fold = 3, verbose=True)

In [None]:
w_dict, w_list = result4.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_, y_test, w_list, ds_list, ds_test, kernel_names)

In [None]:
result4.performancesFeatures()

## Origin Data Centering

In [6]:
kernel_names = ['linear', 'polynomial', 'gaussian']
kernel_type = [{'laplacian':[0.1, 0.5], 'polynomial':[2, 3], 'gaussian':[0.1, 0.5]},
               {'laplacian':[0.7, 1], 'polynomial':[5, 8], 'gaussian':[0.7, 1]},
               {'sigmoid':[0.1, 0.5], 'polynomial':[2, 3], 'gaussian':[0.1, 0.5]},
               {'sigmoid':[0.7, 1], 'polynomial':[5, 8], 'gaussian':[0.7, 1]}]
estimator = ca.centeredKernelAlignment
sampler = ms.mySampler(n_splits=3, test_size=.25, centering = True)

In [7]:
result5 = sampler.sample(kernel_type, estimator, ds_list, y_, valid_fold = 3, verbose=True)

1 split out of 3 ...
	Working on config 1 of 2: {'linear': [0.5], 'polynomial': [2, 3, 7], 'gaussian': [0.1, 0.5, 0.7]}
Fold no. 1
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Fold no. 2
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Fold no. 3
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Validation complete, config selected:[[0.5, 7, 0.7], [0.5, 2, 0.1], [0.5, 7, 0.7]]
	Working on config 2 of 2: {'linear': [0.2], 'polynomial': [4, 5, 8], 'gaussian': [0.7, 1]}
Fold no. 1
		Perfomances computed for 200
Fold no. 2
		Perfomances computed for 200
Fold no. 3
		Perfomances computed for 200
Validation complete, config selected:[[0.2, 8, 1], [0.2, 4, 0.7], [0.2, 8, 1]]
	Result of 1:
CA: 0.2445182945006547
Accuracy: 0.40350877192982454
Precision: 0.41935483870967744
Recall: 0.4482758620689655
[linear:0.5, polynomial:7, gaussian:0.7, ]
[linear:0.5, polynom

In [8]:
w_dict, w_list = result5.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_, y_test, w_list, ds_list, ds_test, kernel_names)

Perfomances computed for 1 dictionary settings:
	Accuracy: 0.7333333333333333
	Precision: 0.7297297297297297
	Recall: 0.7297297297297297
Perfomances computed for 2 dictionary settings:
	Accuracy: 0.7066666666666667
	Precision: 0.6923076923076923
	Recall: 0.7297297297297297


In [10]:
w_dict

[{'ds1': {'linear': 0.5, 'polynomial': 7, 'gaussian': 0.7},
  'ds2': {'linear': 0.5, 'polynomial': 2, 'gaussian': 0.1},
  'ds3': {'linear': 0.5, 'polynomial': 7, 'gaussian': 0.7}},
 {'ds1': {'linear': 0.2, 'polynomial': 8, 'gaussian': 1},
  'ds2': {'linear': 0.2, 'polynomial': 4, 'gaussian': 0.7},
  'ds3': {'linear': 0.2, 'polynomial': 8, 'gaussian': 1}}]

In [9]:
result5.performancesFeatures()

statistics of configuration 0
{'config': {'linear': [[0.5, 0.5, 0.5], [0.5, 0.5, 0.5], [0.5, 0.5, 0.5]], 'polynomial': [[7, 2, 7], [2, 2, 7], [7, 7, 7]], 'gaussian': [[0.7, 0.1, 0.7], [0.1, 0.1, 0.7], [0.7, 0.7, 0.1]]}, 'CA': (0.265830078855572, 0.00026218314515649883), 'Accuracy': (0.5730994152046783, 0.01566293902397319), 'Precision': (0.5758481646273638, 0.012989017557719756), 'Recall': (0.6091954022988506, 0.016118377592812785), 'eta': (array([-0.02276001, -0.16314961,  0.13695711, -0.05157704,  0.0632759 ,
        0.02797943, -0.02116098,  0.0051436 ,  0.2471284 ]), array([6.59701644e-03, 1.41530647e-01, 4.21358748e-02, 6.72422404e-03,
       3.18965914e-01, 2.04497578e-01, 2.67788008e-04, 4.39104377e-03,
       1.60003282e-01]))}
statistics of configuration 1
{'config': {'linear': [[0.2, 0.2, 0.2], [0.2, 0.2, 0.2], [0.2, 0.2, 0.2]], 'polynomial': [[8, 4, 8], [4, 4, 8], [8, 8, 4]], 'gaussian': [[1, 0.7, 1], [0.7, 0.7, 1], [1, 1, 0.7]]}, 'CA': (0.2658651026507633, 0.000260586928092

## Origin Data  Centering and Normalization

In [11]:
kernel_names = ['linear', 'polynomial', 'gaussian']
kernel_type = [{'laplacian':[0.1, 0.5], 'polynomial':[2, 3], 'gaussian':[0.1, 0.5]},
               {'laplacian':[0.7, 1], 'polynomial':[5, 8], 'gaussian':[0.7, 1]},
               {'sigmoid':[0.1, 0.5], 'polynomial':[2, 3], 'gaussian':[0.1, 0.5]},
               {'sigmoid':[0.7, 1], 'polynomial':[5, 8], 'gaussian':[0.7, 1]}]
estimator = ca.centeredKernelAlignment
sampler = ms.mySampler(n_splits=3, test_size=.25, centering = True, normalizing = True)

In [16]:
result6 = sampler.sample(kernel_type, estimator, ds_list, y_, valid_fold = 3, verbose=True)

1 split out of 3 ...
	Working on config 1 of 2: {'linear': [0.5], 'polynomial': [2, 3, 7], 'gaussian': [0.1, 0.5, 0.7]}
Fold no. 1
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Fold no. 2
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Fold no. 3
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Validation complete, config selected:[[0.5, 7, 0.7], [0.5, 2, 0.1], [0.5, 2, 0.7]]
	Working on config 2 of 2: {'linear': [0.2], 'polynomial': [4, 5, 8], 'gaussian': [0.7, 1, 1.5]}
Fold no. 1
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Fold no. 2
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Fold no. 3
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Validation complete, config selected:[[0.2, 4, 1.5], [0.2, 4, 0.7], [0.2, 4,

In [17]:
w_dict, w_list = result6.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_, y_test, w_list, ds_list, ds_test, kernel_names)

Perfomances computed for 1 dictionary settings:
	Accuracy: 0.7466666666666667
	Precision: 0.7368421052631579
	Recall: 0.7567567567567568
Perfomances computed for 2 dictionary settings:
	Accuracy: 0.6933333333333334
	Precision: 0.6944444444444444
	Recall: 0.6756756756756757


In [18]:
w_dict

[{'ds1': {'linear': 0.5, 'polynomial': 2, 'gaussian': 0.7},
  'ds2': {'linear': 0.5, 'polynomial': 2, 'gaussian': 0.1},
  'ds3': {'linear': 0.5, 'polynomial': 7, 'gaussian': 0.7}},
 {'ds1': {'linear': 0.2, 'polynomial': 4, 'gaussian': 1.5},
  'ds2': {'linear': 0.2, 'polynomial': 4, 'gaussian': 0.7},
  'ds3': {'linear': 0.2, 'polynomial': 8, 'gaussian': 1.5}}]

In [19]:
result6.performancesFeatures()

statistics of configuration 0
{'config': {'linear': [[0.5, 0.5, 0.5], [0.5, 0.5, 0.5], [0.5, 0.5, 0.5]], 'polynomial': [[7, 2, 2], [2, 2, 7], [2, 2, 7]], 'gaussian': [[0.7, 0.1, 0.7], [0.1, 0.1, 0.7], [0.7, 0.1, 0.7]]}, 'CA': (0.2562338509191033, 0.0001893960159368705), 'Accuracy': (0.9122807017543858, 1.232595164407831e-32), 'Precision': (0.8833333333333333, 0.00013888888888888916), 'Recall': (0.9540229885057472, 0.00026423569824283365), 'eta': (array([ 0.02041821, -0.25668014,  0.18250814, -0.01546459,  0.10891607,
       -0.01996857, -0.03747769,  0.2094214 , -0.01324796]), array([1.79495734e-03, 1.68788211e-01, 6.11464856e-02, 3.88820556e-03,
       3.83835052e-01, 1.42112640e-01, 2.43863699e-03, 7.83858689e-02,
       6.11071051e-05]))}
statistics of configuration 1
{'config': {'linear': [[0.2, 0.2, 0.2], [0.2, 0.2, 0.2], [0.2, 0.2, 0.2]], 'polynomial': [[4, 4, 4], [4, 4, 8], [4, 4, 8]], 'gaussian': [[1.5, 0.7, 1.5], [0.7, 0.7, 1.5], [1.5, 0.7, 1.5]]}, 'CA': (0.25636666060952035, 

## Normalization, Kernel Normalization

In [20]:
kernel_names = ['linear', 'polynomial', 'gaussian']
kernel_type = [{'laplacian':[0.1, 0.5], 'polynomial':[2, 3], 'gaussian':[0.1, 0.5]},
               {'laplacian':[0.7, 1], 'polynomial':[5, 8], 'gaussian':[0.7, 1]},
               {'sigmoid':[0.1, 0.5], 'polynomial':[2, 3], 'gaussian':[0.1, 0.5]},
               {'sigmoid':[0.7, 1], 'polynomial':[5, 8], 'gaussian':[0.7, 1]}]
estimator = ca.centeredKernelAlignment
sampler = ms.mySampler(n_splits=3, test_size=.25, normalizing = True, normalize_kernels = True)

In [22]:
result10 = sampler.sample(kernel_type, estimator, ds_list, y_, valid_fold = 3, verbose=True)

1 split out of 3 ...
	Working on config 1 of 2: {'linear': [0.5], 'polynomial': [2, 3, 4], 'gaussian': [0.1, 0.3, 0.6]}
Fold no. 1
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Fold no. 2
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Fold no. 3
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Validation complete, config selected:[[0.5, 4, 0.1], [0.5, 3, 0.1], [0.5, 4, 0.1]]
	Working on config 2 of 2: {'linear': [0.2], 'polynomial': [4, 5, 8], 'gaussian': [0.7, 1]}
Fold no. 1
		Perfomances computed for 200
Fold no. 2
		Perfomances computed for 200
Fold no. 3
		Perfomances computed for 200
Validation complete, config selected:[[0.2, 8, 0.7], [0.2, 8, 1], [0.2, 8, 1]]
	Result of 1:
CA: 0.25573628307992285
Accuracy: 0.9122807017543859
Precision: 0.875
Recall: 0.9655172413793104
[linear:0.5, polynomial:4, gaussian:0.1, ]
[linear:0.5, polynomial:3, gaussia

In [23]:
w_dict, w_list = result10.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_, y_test, w_list, ds_list, ds_test, kernel_names)

Perfomances computed for 1 dictionary settings:
	Accuracy: 0.72
	Precision: 0.7222222222222222
	Recall: 0.7027027027027027
Perfomances computed for 2 dictionary settings:
	Accuracy: 0.7333333333333333
	Precision: 0.7428571428571429
	Recall: 0.7027027027027027


In [25]:
w_dict

[{'ds1': {'linear': 0.5, 'polynomial': 4, 'gaussian': 0.6},
  'ds2': {'linear': 0.5, 'polynomial': 3, 'gaussian': 0.1},
  'ds3': {'linear': 0.5, 'polynomial': 4, 'gaussian': 0.1}},
 {'ds1': {'linear': 0.2, 'polynomial': 4, 'gaussian': 0.7},
  'ds2': {'linear': 0.2, 'polynomial': 8, 'gaussian': 1},
  'ds3': {'linear': 0.2, 'polynomial': 8, 'gaussian': 1}}]

In [24]:
result10.performancesFeatures()

statistics of configuration 0
{'config': {'linear': [[0.5, 0.5, 0.5], [0.5, 0.5, 0.5], [0.5, 0.5, 0.5]], 'polynomial': [[4, 3, 4], [4, 3, 3], [2, 4, 4]], 'gaussian': [[0.1, 0.1, 0.1], [0.6, 0.1, 0.1], [0.3, 0.6, 0.1]]}, 'CA': (0.27725079718609735, 0.00023237797939343774), 'Accuracy': (0.8304093567251462, 0.0035566499093738235), 'Precision': (0.799074074074074, 0.003890603566529493), 'Recall': (0.896551724137931, 0.003170828378913994), 'eta': (array([ 0.00211001, -0.18142696, -0.06491551, -0.02118474, -0.06819569,
        0.44733447,  0.00637805, -0.17772248,  0.30767238]), array([2.31984722e-06, 1.58552667e-01, 7.43632699e-02, 4.15554808e-04,
       1.97717359e-01, 1.27891210e-01, 5.71132707e-05, 1.89205209e-02,
       5.34499393e-02]))}
statistics of configuration 1
{'config': {'linear': [[0.2, 0.2, 0.2], [0.2, 0.2, 0.2], [0.2, 0.2, 0.2]], 'polynomial': [[8, 8, 8], [4, 8, 8], [4, 8, 8]], 'gaussian': [[0.7, 1, 1], [0.7, 1, 1], [0.7, 1, 1]]}, 'CA': (0.2771037213555725, 0.000255990298438

## Centering, Kernel Normalization

In [26]:
kernel_names = ['linear', 'polynomial', 'gaussian']
kernel_type = [{'laplacian':[0.1, 0.5], 'polynomial':[2, 3], 'gaussian':[0.1, 0.5]},
               {'laplacian':[0.7, 1], 'polynomial':[5, 8], 'gaussian':[0.7, 1]},
               {'sigmoid':[0.1, 0.5], 'polynomial':[2, 3], 'gaussian':[0.1, 0.5]},
               {'sigmoid':[0.7, 1], 'polynomial':[5, 8], 'gaussian':[0.7, 1]}]
estimator = ca.centeredKernelAlignment
sampler = ms.mySampler(n_splits=3, test_size=.25, centering = True, normalize_kernels = True)

In [29]:
result11 = sampler.sample(kernel_type, estimator, ds_list, y_, valid_fold = 3, verbose=True)

1 split out of 3 ...
	Working on config 1 of 2: {'linear': [0.5], 'polynomial': [2, 3, 4], 'gaussian': [0.1, 0.3, 0.6]}
Fold no. 1
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Fold no. 2
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Fold no. 3
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Validation complete, config selected:[[0.5, 2, 0.1], [0.5, 4, 0.6], [0.5, 4, 0.6]]
	Working on config 2 of 2: {'linear': [0.2], 'polynomial': [4, 5, 8], 'gaussian': [0.7, 1]}
Fold no. 1
		Perfomances computed for 200
Fold no. 2
		Perfomances computed for 200
Fold no. 3
		Perfomances computed for 200
Validation complete, config selected:[[0.2, 5, 0.7], [0.2, 8, 1], [0.2, 8, 1]]
	Result of 1:
CA: 0.2496631169403161
Accuracy: 0.8421052631578947
Precision: 0.7941176470588235
Recall: 0.9310344827586207
[linear:0.5, polynomial:2, gaussian:0.1, ]
[linear:0.5, polynomia

In [30]:
w_dict, w_list = result11.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_, y_test, w_list, ds_list, ds_test, kernel_names)

Perfomances computed for 1 dictionary settings:
	Accuracy: 0.7333333333333333
	Precision: 0.7428571428571429
	Recall: 0.7027027027027027
Perfomances computed for 2 dictionary settings:
	Accuracy: 0.72
	Precision: 0.7105263157894737
	Recall: 0.7297297297297297


In [31]:
w_dict

[{'ds1': {'linear': 0.5, 'polynomial': 4, 'gaussian': 0.6},
  'ds2': {'linear': 0.5, 'polynomial': 4, 'gaussian': 0.6},
  'ds3': {'linear': 0.5, 'polynomial': 4, 'gaussian': 0.6}},
 {'ds1': {'linear': 0.2, 'polynomial': 4, 'gaussian': 0.7},
  'ds2': {'linear': 0.2, 'polynomial': 8, 'gaussian': 0.7},
  'ds3': {'linear': 0.2, 'polynomial': 8, 'gaussian': 1}}]

In [32]:
result11.performancesFeatures()

statistics of configuration 0
{'config': {'linear': [[0.5, 0.5, 0.5], [0.5, 0.5, 0.5], [0.5, 0.5, 0.5]], 'polynomial': [[2, 4, 4], [4, 4, 4], [4, 4, 4]], 'gaussian': [[0.1, 0.6, 0.6], [0.6, 0.6, 0.6], [0.6, 0.6, 0.6]]}, 'CA': (0.2674891285506506, 0.0006290441388932356), 'Accuracy': (0.7485380116959064, 0.004582606614000885), 'Precision': (0.7526071169208425, 0.0013897742189481815), 'Recall': (0.7471264367816092, 0.01691108468754129), 'eta': (array([-0.00404827, -0.36816226,  0.29404096, -0.05322447,  0.66341477,
       -0.06191449,  0.02732085, -0.06914452,  0.00636662]), array([8.16585312e-06, 5.78240366e-02, 8.39834753e-02, 1.11750382e-03,
       1.71439314e-01, 1.65154898e-03, 3.70196099e-04, 9.06415631e-03,
       1.68354130e-04]))}
statistics of configuration 1
{'config': {'linear': [[0.2, 0.2, 0.2], [0.2, 0.2, 0.2], [0.2, 0.2, 0.2]], 'polynomial': [[5, 8, 8], [8, 8, 8], [4, 8, 8]], 'gaussian': [[0.7, 1, 1], [0.7, 0.7, 1], [0.7, 0.7, 1]]}, 'CA': (0.2683669494034853, 0.000623556041

## Centering, Normalization, Kernel Normalization

In [33]:
kernel_names = ['linear', 'polynomial', 'gaussian']
kernel_type = [{'laplacian':[0.1, 0.5], 'polynomial':[2, 3], 'gaussian':[0.1, 0.5]},
               {'laplacian':[0.7, 1], 'polynomial':[5, 8], 'gaussian':[0.7, 1]},
               {'sigmoid':[0.1, 0.5], 'polynomial':[2, 3], 'gaussian':[0.1, 0.5]},
               {'sigmoid':[0.7, 1], 'polynomial':[5, 8], 'gaussian':[0.7, 1]}]
estimator = ca.centeredKernelAlignment
sampler = ms.mySampler(n_splits=3, test_size=.25, centering = True, normalizing = True, normalize_kernels = True)

In [34]:
result7 = sampler.sample(kernel_type, estimator, ds_list, y_, valid_fold = 3, verbose=True)

1 split out of 3 ...
	Working on config 1 of 2: {'linear': [0.5], 'polynomial': [2, 3, 4], 'gaussian': [0.1, 0.3, 0.6]}
Fold no. 1
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Fold no. 2
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Fold no. 3
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Validation complete, config selected:[[0.5, 4, 0.6], [0.5, 4, 0.6], [0.5, 4, 0.6]]
	Working on config 2 of 2: {'linear': [0.2], 'polynomial': [4, 5, 8], 'gaussian': [0.7, 1, 1.5]}
Fold no. 1
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Fold no. 2
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Fold no. 3
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Validation complete, config selected:[[0.2, 8, 1], [0.2, 8, 0.7], [0.2, 8, 1

In [35]:
w_dict, w_list = result7.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_, y_test, w_list, ds_list, ds_test, kernel_names)

Perfomances computed for 1 dictionary settings:
	Accuracy: 0.7333333333333333
	Precision: 0.7428571428571429
	Recall: 0.7027027027027027
Perfomances computed for 2 dictionary settings:
	Accuracy: 0.72
	Precision: 0.7105263157894737
	Recall: 0.7297297297297297


In [36]:
w_dict

[{'ds1': {'linear': 0.5, 'polynomial': 4, 'gaussian': 0.6},
  'ds2': {'linear': 0.5, 'polynomial': 4, 'gaussian': 0.6},
  'ds3': {'linear': 0.5, 'polynomial': 4, 'gaussian': 0.6}},
 {'ds1': {'linear': 0.2, 'polynomial': 8, 'gaussian': 0.7},
  'ds2': {'linear': 0.2, 'polynomial': 8, 'gaussian': 0.7},
  'ds3': {'linear': 0.2, 'polynomial': 8, 'gaussian': 1}}]

In [38]:
result7.performancesFeatures()

statistics of configuration 0
{'config': {'linear': [[0.5, 0.5, 0.5], [0.5, 0.5, 0.5], [0.5, 0.5, 0.5]], 'polynomial': [[4, 4, 4], [2, 4, 4], [4, 4, 4]], 'gaussian': [[0.6, 0.6, 0.6], [0.6, 0.6, 0.1], [0.3, 0.6, 0.6]]}, 'CA': (0.2862115195302434, 0.00014379341077753593), 'Accuracy': (0.8128654970760234, 0.0008891624773434565), 'Precision': (0.7917934415145368, 0.001634019071828678), 'Recall': (0.8620689655172414, 0.0007927070947284983), 'eta': (array([ 0.00606128, -0.3236106 ,  0.13136831, -0.06741176,  0.75012003,
       -0.05788777,  0.02384884,  0.09838397, -0.25847404]), array([3.62540788e-06, 1.31692842e-02, 1.41046128e-02, 3.37636181e-04,
       4.25007345e-02, 6.38093218e-04, 3.13317083e-05, 2.71498447e-02,
       1.32414244e-01]))}
statistics of configuration 1
{'config': {'linear': [[0.2, 0.2, 0.2], [0.2, 0.2, 0.2], [0.2, 0.2, 0.2]], 'polynomial': [[8, 8, 8], [4, 4, 8], [8, 8, 4]], 'gaussian': [[1, 0.7, 1.5], [0.7, 1.5, 1], [0.7, 0.7, 1]]}, 'CA': (0.2862012885092245, 0.0001429

## L2 Penalty, Centering, K-Normalization

In [40]:
kernel_names = ['linear', 'polynomial', 'gaussian']
kernel_type = [{'laplacian':[0.1, 0.5], 'polynomial':[2, 3], 'gaussian':[0.1, 0.5]},
               {'laplacian':[0.7, 1], 'polynomial':[5, 8], 'gaussian':[0.7, 1]},
               {'sigmoid':[0.1, 0.5], 'polynomial':[2, 3], 'gaussian':[0.1, 0.5]},
               {'sigmoid':[0.7, 1], 'polynomial':[5, 8], 'gaussian':[0.7, 1]}]
estimator = ca.centeredKernelAlignment
sampler = ms.mySampler(n_splits=3, test_size=.25, lamb = 0.5, centering = True, normalize_kernels = False)

In [41]:
result9 = sampler.sample(kernel_type, estimator, ds_list, y_, valid_fold = 3, verbose=True)

1 split out of 3 ...
	Working on config 1 of 2: {'linear': [0.5], 'polynomial': [2, 3, 4], 'gaussian': [0.1, 0.3, 0.6]}
Fold no. 1
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Fold no. 2
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Fold no. 3
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Validation complete, config selected:[[0.5, 4, 0.3], [0.5, 4, 0.1], [0.5, 2, 0.1]]
	Working on config 2 of 2: {'linear': [0.2], 'polynomial': [4, 5, 8], 'gaussian': [0.7, 1, 1.5]}
Fold no. 1
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Fold no. 2
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Fold no. 3
		Perfomances computed for 200
		Perfomances computed for 400
		Perfomances computed for 600
Validation complete, config selected:[[0.2, 4, 0.7], [0.2, 8, 1.5], [0.2, 5,

In [42]:
w_dict, w_list = result9.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_, y_test, w_list, ds_list, ds_test, kernel_names)

Perfomances computed for 1 dictionary settings:
	Accuracy: 0.72
	Precision: 0.7222222222222222
	Recall: 0.7027027027027027
Perfomances computed for 2 dictionary settings:
	Accuracy: 0.76
	Precision: 0.7567567567567568
	Recall: 0.7567567567567568


In [43]:
w_dict

[{'ds1': {'linear': 0.5, 'polynomial': 4, 'gaussian': 0.6},
  'ds2': {'linear': 0.5, 'polynomial': 2, 'gaussian': 0.1},
  'ds3': {'linear': 0.5, 'polynomial': 2, 'gaussian': 0.1}},
 {'ds1': {'linear': 0.2, 'polynomial': 4, 'gaussian': 1.5},
  'ds2': {'linear': 0.2, 'polynomial': 4, 'gaussian': 1.5},
  'ds3': {'linear': 0.2, 'polynomial': 5, 'gaussian': 0.7}}]

In [44]:
result9.performancesFeatures()

statistics of configuration 0
{'config': {'linear': [[0.5, 0.5, 0.5], [0.5, 0.5, 0.5], [0.5, 0.5, 0.5]], 'polynomial': [[4, 4, 2], [4, 2, 2], [2, 2, 2]], 'gaussian': [[0.3, 0.1, 0.1], [0.6, 0.1, 0.1], [0.6, 0.1, 0.1]]}, 'CA': (0.27727316284543096, 0.00014780526408407127), 'Accuracy': (0.8421052631578947, 0.00020519134092541295), 'Precision': (0.819220430107527, 0.00019601254480286823), 'Recall': (0.8850574712643677, 0.00026423569824283365), 'eta': (array([-0.07590169, -0.00067549,  0.19967228, -0.02732969,  0.32940538,
        0.12962775,  0.12310414, -0.05220473, -0.0946121 ]), array([0.00057981, 0.00150963, 0.00242562, 0.04433421, 0.28065632,
       0.37727453, 0.00293652, 0.022005  , 0.06975822]))}
statistics of configuration 1
{'config': {'linear': [[0.2, 0.2, 0.2], [0.2, 0.2, 0.2], [0.2, 0.2, 0.2]], 'polynomial': [[4, 8, 5], [4, 4, 8], [8, 4, 4]], 'gaussian': [[0.7, 1.5, 0.7], [1.5, 1.5, 1.5], [1.5, 1.5, 1]]}, 'CA': (0.274868708853115, 0.0002227383921581577), 'Accuracy': (0.637426

## Sparsity in eta, Centering, K-Normalization

In [6]:
kernel_names = ['linear', 'polynomial', 'gaussian']
kernel_type = [{'linear':[0.5], 'polynomial':[2, 3], 'gaussian':[0.1, 0.3]}#,
               #{'linear':[0.2], 'polynomial':[4, 5, 8], 'gaussian':[0.7, 1]}
              ]
kernel_type = [{'laplacian':[0.1, 0.5], 'polynomial':[2, 3], 'gaussian':[0.1, 0.5]},
               {'laplacian':[0.7, 1], 'polynomial':[5, 8], 'gaussian':[0.7, 1]},
               {'sigmoid':[0.1, 0.5], 'polynomial':[2, 3], 'gaussian':[0.1, 0.5]},
               {'sigmoid':[0.7, 1], 'polynomial':[5, 8], 'gaussian':[0.7, 1]}]
estimator = ca.centeredKernelAlignment
sampler = ms.mySampler(n_splits=3, test_size=.25, sparsity = 0.3, centering = True, normalize_kernels = True)

In [7]:
result8 = sampler.sample(kernel_type, estimator, ds_list, y_, valid_fold = 3, verbose=True)


1 split out of 3 ...
	Working on config 1 of 1: {'linear': [0.5], 'polynomial': [2, 3], 'gaussian': [0.1, 0.3]}
Fold no. 1
Fold no. 2
Fold no. 3
Validation complete, config selected:[[0.5, 2, 0.3], [0.5, 3, 0.1], [0.5, 2, 0.1]]
	Result of 1:
CA: 0.2543693846590317
Accuracy: 0.8245614035087719
Precision: 0.8275862068965517
Recall: 0.8275862068965517
[linear:0.5, polynomial:2, gaussian:0.3, ]
[linear:0.5, polynomial:3, gaussian:0.1, ]
[linear:0.5, polynomial:2, gaussian:0.1, ]

eta vector: [ 0.          0.          0.10481314 -0.09794478  0.9896532   0.
  0.00275124  0.          0.        ]


	Completed in 1.6333333333333333 minutes

2 split out of 3 ...
	Working on config 1 of 1: {'linear': [0.5], 'polynomial': [2, 3], 'gaussian': [0.1, 0.3]}
Fold no. 1
Fold no. 2
Fold no. 3
Validation complete, config selected:[[0.5, 3, 0.1], [0.5, 3, 0.1], [0.5, 2, 0.3]]
	Result of 2:
CA: 0.2443662584823163
Accuracy: 0.8245614035087719
Precision: 0.8518518518518519
Recall: 0.7931034482758621
[linear:

In [8]:
w_dict, w_list = result8.votingOverCA(ds_names, kernel_names)
ut.testConfigurations(estimator, y_, y_test, w_list, ds_list, ds_test, kernel_names)

Perfomances computed for 1 dictionary settings:
	Accuracy: 0.7466666666666667
	Precision: 0.7209302325581395
	Recall: 0.8157894736842105


In [9]:
w_dict

[{'ds1': {'linear': 0.5, 'polynomial': 2, 'gaussian': 0.3},
  'ds2': {'linear': 0.5, 'polynomial': 3, 'gaussian': 0.1},
  'ds3': {'linear': 0.5, 'polynomial': 2, 'gaussian': 0.1}}]

In [10]:
result8.performancesFeatures()

statistics of configuration 1
{'config': {'linear': [[0.5, 0.5, 0.5], [0.5, 0.5, 0.5], [0.5, 0.5, 0.5]], 'polynomial': [[2, 3, 2], [3, 3, 2], [2, 3, 2]], 'gaussian': [[0.3, 0.1, 0.1], [0.1, 0.1, 0.3], [0.3, 0.1, 0.1]]}, 'CA': (0.24796546340238448, 2.061030574888471e-05), 'Accuracy': (0.7953216374269005, 0.0017099278410451096), 'Precision': (0.8042571306939124, 0.002613229436871684), 'Recall': (0.7931034482758621, 0.0007927070947284983), 'eta': (array([ 0.        ,  0.07490041,  0.07294075, -0.09521692,  0.98285313,
        0.        ,  0.00442709,  0.        ,  0.        ]), array([0.00000000e+00, 1.12201429e-02, 2.67427065e-03, 4.41083125e-06,
       8.28886216e-05, 0.00000000e+00, 1.73505454e-06, 0.00000000e+00,
       0.00000000e+00]))}
