In [1]:
import pandas as pd
import os
import numpy as np

import Utils as ut
import CortesAlignmentFile as ca
import mySampler as ms
 
from sklearn.model_selection import StratifiedShuffleSplit, StratifiedKFold

from threading import Thread, Lock

  from collections import Sequence


## Dataset Configuration

In [2]:
ds = pd.read_csv(os.path.join('data', 'toyDataset.csv')).values
label = pd.read_csv(os.path.join('data', 'toyLabel.csv')).values.reshape(-1)
ds_names = ['ds1', 'ds2', 'ds3']

In [3]:
def random_sampling(X, y, n_samples):
    return next(StratifiedShuffleSplit(n_splits=1, test_size=n_samples).split(X, y))

In [4]:
tr_idx, ts_idx = random_sampling(ds, label, n_samples=0.25)

ds1 = np.hstack([ds[:,:5], ds[:,10:15]])
ds1_tr = ds1[tr_idx]
ds1_ts = ds1[ts_idx]

ds2 = np.hstack([ds[:,5:10], ds[:,15:20]])
ds2_tr = ds2[tr_idx]
ds2_ts = ds2[ts_idx]

ds3 = ds[:,20:]
ds3_tr = ds3[tr_idx]
ds3_ts = ds3[ts_idx]

y_t_ = label[tr_idx]
y_t_test = label[ts_idx]

ds_list_complete = [ds1, ds2, ds3]
ds_list = [ds1_tr, ds2_tr, ds3_tr]
ds_test = [ds1_ts, ds2_ts, ds3_ts]

## Kernel Definition

In [5]:
kernel_names_0 = ['laplacian', 'gaussian']
kernel_type_0 = [{'laplacian':[0.2, 0.6], 'gaussian':[0.3, 0.7]},
               {'laplacian':[0.4, 0.9], 'gaussian':[0.5, 1]}]

In [6]:
kernel_names_1 = ['linear', 'gaussian']
kernel_type_1 = [{'linear':[1], 'gaussian':[0.4, 0.7]},
               {'linear':[1], 'gaussian':[0.5, 1]}]

In [7]:
kernel_names_2 = ['polynomial', 'gaussian']
kernel_type_2 = [{'polynomial':[2, 7], 'gaussian':[0.4, 0.7]},
               {'polynomial':[3, 5], 'gaussian':[0.5, 1]}]

In [8]:
kernel_names_3 = ['sigmoid', 'gaussian']
kernel_type_3 = [{'sigmoid':[0.2, 0.6], 'gaussian':[0.3, 0.7]},
               {'sigmoid':[0.4, 0.9], 'gaussian':[0.5, 1]}]

## Global parameters

In [9]:
lock_toy = Lock()

kernel_names = [kernel_names_0, kernel_names_1, kernel_names_2, kernel_names_3]
kernel_types = [kernel_type_0, kernel_type_1, kernel_type_2, kernel_type_3]

## Other shared parameters initialization

In [10]:
estimator = ca.centeredKernelAlignment

threads = []

valid_fold = 3

exclusion_list = None

l1_params = [0.1, 0.3, 0.5, 0.7, 0.1]
l2_params = [0.1, 0.3, 0.5, 0.7, 0.1]

## Thread

In [11]:
def executeKernels(sampler, estimator, penalty_type, parameter, ds_list, ds_test, y_, y_test, valid_fold, exclusion_list, verbose, approach):
    
    results = np.empty(len(kernel_names))
    
    for idx, (k_names, k_type) in enumerate(zip(kernel_names, kernel_types)):
        
        result = sampler.sample(k_type, estimator, ds_list, y_, valid_fold=valid_fold, verbose=verbose, exclusion_list=exclusion_list)
        w_dict, w_list = result.votingOverCA(ds_names, k_names)
        ut.testConfigurations(estimator, penalty_type, parameter, y_, y_test, w_list, ds_list, ds_test, k_names, 'classification', verbose=verbose)
        outcome_dict = result.performancesFeatures(verbose=verbose)
        results[idx] = outcome_dict['CA'][0]
    
    return results
    

In [12]:
def selectParam(params, penalty_type, train_set_list, train_label, estimator, approach, n_splits=3, centering=False, normalizing=False, normalize_kernels=False, exclusion_list=None, verbose=False):
    
    skf = StratifiedKFold(n_splits=n_splits, shuffle=False)
    
    n_params = len(params)
    n_kernels = len(kernel_names)
    
    results = np.zeros((n_params, n_kernels))
    
    for tr_idx, val_idx in skf.split(train_set_list[0], train_label):
        tr_set_list = [X[tr_idx] for X in train_set_list]
        val_set_list = [X[val_idx] for X in train_set_list]
        tr_label = train_label[tr_idx]
        val_label = train_label[val_idx]
        
        for idx, param in enumerate(params):
            if penalty_type == 'l1':
                sampler = ms.mySampler(n_splits=3, test_size=.25, sparsity=param, centering=centering, normalizing=normalizing, normalize_kernels=normalize_kernels)
            elif penalty_type == 'l2':
                sampler = ms.mySampler(n_splits=3, test_size=.25, lamb=param, centering=centering, normalizing=normalizing, normalize_kernels=normalize_kernels)
            else:
                raise ValueError('Penalty type not set properly')
            
            results[idx] += executeKernels(sampler, estimator, penalty_type, param, tr_set_list, val_set_list, tr_label, val_label, n_splits, exclusion_list, verbose, approach)
    results /= skf.get_n_splits
    avg_results = np.sum(results, axis=0)/n_kernels
    print(approach+"\n"+avg_results)


## L2 Penalty, Origin Data  Centering and Normalization

In [14]:
t = Thread(target=selectParam, args=(l2_params, 'l2', ds_list, y_t_, estimator, 'Centering - Normalizing', valid_fold, True, True, False, exclusion_list, False))
t.start()
threads.append(t)

## L1 Penalty, Origin Data  Centering and Normalization

In [16]:
t = Thread(target=selectParam, args=(l1_params, 'l1', ds_list, y_t_, estimator, 'Centering - Normalizing', valid_fold, True, True, False, exclusion_list, False))
t.start()
threads.append(t)

## L2 Penalty, Normalization, Kernel Normalization

In [18]:
t = Thread(target=selectParam, args=(l2_params, 'l2', ds_list, y_t_, estimator, 'Normalizing - K Normalizing', valid_fold, False, True, True, exclusion_list, False))
t.start()
threads.append(t)

## L1 Penalty, Normalization, Kernel Normalization

In [20]:
t = Thread(target=selectParam, args=(l1_params, 'l1', ds_list, y_t_, estimator, 'Normalizing - K Normalizing', valid_fold, False, True, True, exclusion_list, False))
t.start()
threads.append(t)

## L2 Penalty, Centering, Normalization, Kernel Normalization

In [22]:
t = Thread(target=selectParam, args=(l2_params, 'l2', ds_list, y_t_, estimator, 'Centering - Normalizing - K Normalizing', valid_fold, True, True, True, exclusion_list, False))
t.start()
threads.append(t)

## L1 Penalty, Centering, Normalization, Kernel Normalization

In [24]:
t = Thread(target=selectParam, args=(l1_params, 'l1', ds_list, y_t_, estimator, 'Centering - Normalizing - K Normalizing', valid_fold, True, True, True, exclusion_list, False))
t.start()
threads.append(t)

## L2 Penalty, Centering, K-Normalization

In [26]:
t = Thread(target=selectParam, args=(l2_params, 'l2', ds_list, y_t_, estimator, 'Centering - K Normalizing', valid_fold, True, False, True, exclusion_list, False))
t.start()
threads.append(t)

## L1 Penalty, Centering, K-Normalization

In [28]:
t = Thread(target=selectParam, args=(l1_params, 'l1', ds_list, y_t_, estimator, 'Centering - K Normalizing', valid_fold, True, False, True, exclusion_list, False))
t.start()
threads.append(t)

## Waiting

In [29]:
for t in threads:
    t.join()

print("Operations completed")

  eta_new /= np.linalg.norm(eta_new)


KeyboardInterrupt: 