In [1]:
import pandas as pd
import os
import numpy as np

import Utils as ut
import CortesAlignmentFile as ca
import mySampler as ms
 
from sklearn.model_selection import StratifiedShuffleSplit

## Dataset Configuration - Toy

In [2]:
from sklearn import datasets
# make_classification(n_samples=100, n_features=20, n_informative=2, n_redundant=2, n_repeated=0, 
#                     n_classes=2, n_clusters_per_class=2, weights=None, flip_y=0.01, class_sep=1.0, 
#                     hypercube=True, shift=0.0, scale=1.0, shuffle=True, random_state=None)
ds, label = datasets.make_classification(n_samples=300, n_features=30, n_informative=10, n_redundant=0, n_classes=2)
ds_names = ['ds1', 'ds2', 'ds3']

In [3]:
np.where(label==0)
label[np.where(label==0)]=-1

In [4]:
def random_sampling(X, y, n_samples):
    from sklearn.model_selection import StratifiedShuffleSplit
    return next(StratifiedShuffleSplit(n_splits=1, test_size=n_samples).split(X, y))

In [5]:
tr_idx, ts_idx = random_sampling(ds, label, n_samples=0.25)

ds1 = np.hstack([ds[:,:5], ds[:,10:15]])
ds1_tr = ds1[tr_idx]
ds1_ts = ds1[ts_idx]

ds2 = np.hstack([ds[:,5:10], ds[:,15:20]])
ds2_tr = ds2[tr_idx]
ds2_ts = ds2[ts_idx]

ds3 = ds[:,20:]
ds3_tr = ds3[tr_idx]
ds3_ts = ds3[ts_idx]

y_c_ = label[tr_idx]
y_c_test = label[ts_idx]

ds_list_complete = [ds1, ds2, ds3]
ds_list = [ds1_tr, ds2_tr, ds3_tr]
ds_test = [ds1_ts, ds2_ts, ds3_ts]

## Kernel Definition

In [6]:
kernel_names_1 = ['laplacian', 'polynomial', 'gaussian']
kernel_type_1 = [{'laplacian':[0.1, 0.2, 0.3], 'polynomial':[2, 3], 'gaussian':[0.2, 0.3]},
               {'laplacian':[0.5, 0.7], 'polynomial':[5, 8], 'gaussian':[0.5, 0.7]}]

In [7]:
kernel_names_2 = ['sigmoid', 'polynomial', 'gaussian']
kernel_type_2 = [{'sigmoid':[0.1, 0.2, 0.3], 'polynomial':[2, 3], 'gaussian':[0.2, 0.3]},
               {'sigmoid':[0.5, 0.7], 'polynomial':[5, 8], 'gaussian':[0.5, 0.7]}]

In [8]:
kernel_names_3 = ['linear', 'polynomial', 'gaussian']
kernel_type_3 = [{'linear':[0.5], 'polynomial':[2, 3, 7], 'gaussian':[0.1, 0.5, 0.7]},
               {'linear':[0.2], 'polynomial':[4, 5, 8], 'gaussian':[0.7, 1]}]

In [9]:
estimator = ca.centeredKernelAlignment

## Basic approach

In [10]:
sampler = ms.mySampler(n_splits=3, test_size=.25)

In [None]:
result1 = sampler.sample(kernel_type_1, estimator, ds_list, y_c_, valid_fold = 3, verbose=False)

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names_1)
ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names_1)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

In [None]:
result1 = sampler.sample(kernel_type_2, estimator, ds_list, y_c_, valid_fold = 3, verbose=False)

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names_2)
ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names_2)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

In [None]:
result13 = sampler.sample(kernel_type_3, estimator, ds_list, y_c_, valid_fold = 3, verbose=False)

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names_3)
ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names_3)

In [None]:
w_dict

In [None]:
result13.performancesFeatures()

## Kernel normalization

In [None]:
sampler = ms.mySampler(n_splits=3, test_size=.25, normalize_kernels = True)

In [None]:
result1 = sampler.sample(kernel_type_1, estimator, ds_list, y_c_, valid_fold = 3, verbose=False)

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names_1)
ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names_1)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

In [None]:
result1 = sampler.sample(kernel_type_2, estimator, ds_list, y_c_, valid_fold = 3, verbose=False)

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names_2)
ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names_2)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

In [None]:
result13 = sampler.sample(kernel_type_3, estimator, ds_list, y_c_, valid_fold = 3, verbose=False)

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names_3)
ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names_3)

In [None]:
w_dict

In [None]:
result13.performancesFeatures()

## Normalized data

In [None]:
sampler = ms.mySampler(n_splits=3, test_size=.25, normalizing = True)

In [None]:
result1 = sampler.sample(kernel_type_1, estimator, ds_list, y_c_, valid_fold = 3, verbose=False)

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names_1)
ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names_1)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

In [None]:
result1 = sampler.sample(kernel_type_2, estimator, ds_list, y_c_, valid_fold = 3, verbose=False)

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names_2)
ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names_2)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

In [None]:
result13 = sampler.sample(kernel_type_3, estimator, ds_list, y_c_, valid_fold = 3, verbose=False)

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names_3)
ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names_3)

In [None]:
w_dict

In [None]:
result13.performancesFeatures()

## Origin Data Centering

In [None]:
sampler = ms.mySampler(n_splits=3, test_size=.25, centering = True)

In [None]:
result1 = sampler.sample(kernel_type_1, estimator, ds_list, y_c_, valid_fold = 3, verbose=False)

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names_1)
ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names_1)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

In [None]:
result1 = sampler.sample(kernel_type_2, estimator, ds_list, y_c_, valid_fold = 3, verbose=False)

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names_2)
ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names_2)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

In [None]:
result13 = sampler.sample(kernel_type_3, estimator, ds_list, y_c_, valid_fold = 3, verbose=False)

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names_3)
ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names_3)

In [None]:
w_dict

In [None]:
result13.performancesFeatures()

## Origin Data  Centering and Normalization

In [None]:
sampler = ms.mySampler(n_splits=3, test_size=.25, centering = True, normalizing = True)

In [None]:
result1 = sampler.sample(kernel_type_1, estimator, ds_list, y_c_, valid_fold = 3, verbose=False)

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names_1)
ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names_1)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

In [None]:
result1 = sampler.sample(kernel_type_2, estimator, ds_list, y_c_, valid_fold = 3, verbose=False)

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names_2)
ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names_2)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

In [None]:
result13 = sampler.sample(kernel_type_3, estimator, ds_list, y_c_, valid_fold = 3, verbose=False)

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names_3)
ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names_3)

In [None]:
w_dict

In [None]:
result13.performancesFeatures()

## Normalization, Kernel Normalization

In [None]:
sampler = ms.mySampler(n_splits=3, test_size=.25, normalizing = True, normalize_kernels = True)

In [None]:
result1 = sampler.sample(kernel_type_1, estimator, ds_list, y_c_, valid_fold = 3, verbose=False)

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names_1)
ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names_1)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

In [None]:
result1 = sampler.sample(kernel_type_2, estimator, ds_list, y_c_, valid_fold = 3, verbose=False)

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names_2)
ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names_2)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

In [None]:
result13 = sampler.sample(kernel_type_3, estimator, ds_list, y_c_, valid_fold = 3, verbose=False)

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names_3)
ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names_3)

In [None]:
w_dict

In [None]:
result13.performancesFeatures()

## Centering, Kernel Normalization

In [None]:
sampler = ms.mySampler(n_splits=3, test_size=.25, centering = True, normalize_kernels = True)

In [None]:
result1 = sampler.sample(kernel_type_1, estimator, ds_list, y_c_, valid_fold = 3, verbose=False)

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names_1)
ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names_1)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

In [None]:
result1 = sampler.sample(kernel_type_2, estimator, ds_list, y_c_, valid_fold = 3, verbose=False)

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names_2)
ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names_2)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

In [None]:
result13 = sampler.sample(kernel_type_3, estimator, ds_list, y_c_, valid_fold = 3, verbose=False)

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names_3)
ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names_3)

In [None]:
w_dict

In [None]:
result13.performancesFeatures()

## Centering, Normalization, Kernel Normalization

In [None]:
sampler = ms.mySampler(n_splits=3, test_size=.25, centering = True, normalizing = True, normalize_kernels = True)

In [None]:
result1 = sampler.sample(kernel_type_1, estimator, ds_list, y_c_, valid_fold = 3, verbose=False)

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names_1)
ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names_1)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

In [None]:
result1 = sampler.sample(kernel_type_2, estimator, ds_list, y_c_, valid_fold = 3, verbose=False)

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names_2)
ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names_2)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

In [None]:
result13 = sampler.sample(kernel_type_3, estimator, ds_list, y_c_, valid_fold = 3, verbose=False)

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names_3)
ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names_3)

In [None]:
w_dict

In [None]:
result13.performancesFeatures()

## L2 Penalty, Centering, K-Normalization

In [None]:
sampler = ms.mySampler(n_splits=3, test_size=0.25, lamb = 0.7, centering = True, normalize_kernels = True)

In [None]:
result1 = sampler.sample(kernel_type_1, estimator, ds_list, y_c_, valid_fold = 3, verbose=False)

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names_1)
ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names_1)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

In [None]:
result1 = sampler.sample(kernel_type_2, estimator, ds_list, y_c_, valid_fold = 3, verbose=False)

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names_2)
ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names_2)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

In [None]:
result13 = sampler.sample(kernel_type_3, estimator, ds_list, y_c_, valid_fold = 3, verbose=False)

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names_3)
ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names_3)

In [None]:
w_dict

In [None]:
result13.performancesFeatures()

## Sparsity in eta, Centering, K-Normalization

In [None]:
sampler = ms.mySampler(n_splits=3, test_size=0.25, lamb = 0.7, centering = True, normalize_kernels = True)

In [None]:
result1 = sampler.sample(kernel_type_1, estimator, ds_list, y_c_, valid_fold = 3, verbose=False)

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names_1)
ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names_1)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

In [None]:
result1 = sampler.sample(kernel_type_2, estimator, ds_list, y_c_, valid_fold = 3, verbose=False)

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names_2)
ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names_2)

In [None]:
w_dict

In [None]:
result1.performancesFeatures()

In [None]:
result13 = sampler.sample(kernel_type_3, estimator, ds_list, y_c_, valid_fold = 3, verbose=False)

In [None]:
w_dict, w_list = result1.votingOverCA(ds_names, kernel_names_3)
ut.testConfigurations(estimator, y_c_, y_c_test, w_list, ds_list, ds_test, kernel_names_3)

In [None]:
w_dict

In [None]:
result13.performancesFeatures()