In [1]:
#groupings: all(done elsewhere), (14-28)done elsewhere, (14-21), (21-28), (14-35), (28-35), (21-35)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.feature_selection import mutual_info_classif
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC

def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

from sklearn.feature_selection import SelectKBest

In [2]:
def create_groupings(dates, rows, data):
    grouped = []
    for i in rows:
        for j in dates:
            if j in i:
                grouped.append(i)
    return data.loc[grouped].values

In [3]:
def create_dataset(sub, sup, dates):
    sub_group = create_groupings(dates, sub.index, sub)
    sup_group = create_groupings(dates, sup.index, sup)
    X = np.concatenate((sub_group, sup_group))
    y = ['sub'] * sub_group.shape[0] + ['sup'] * sup_group.shape[0]
    y = np.asarray(y)
    return X, y

In [4]:
#group the data

sub_all = pd.read_csv('all_subg_samples.csv', index_col=0)
sup_all = pd.read_csv('all_supg_samples.csv', index_col=0)

X_0_14, y_0_14 = create_dataset(sub_all, sup_all, ['-0','-14'])
X_14_21, y_14_21 = create_dataset(sub_all, sup_all, ['-14','-21'])
X_14_28, y_14_28 = create_dataset(sub_all, sup_all, ['-14','-21', '-28'])
X_21_28, y_21_28 = create_dataset(sub_all, sup_all, ['-21','-28'])
X_14_35, y_14_35 = create_dataset(sub_all, sup_all, ['-14', '-21', '-28', '-35'])
X_21_35, y_21_35 = create_dataset(sub_all, sup_all, ['-21', '-28', '-35'])
X_28_35, y_28_35 = create_dataset(sub_all, sup_all, ['-28','-35'])

In [9]:
#exhaustively search the param space
warnings.warn = warn


def run_parameter_search(X, y, dates):
    best = []    
    print('Search for {0} '.format(dates))
    for k in [None, 100,200,300,400,500,600,700,800,900,1000]:
        if k:
            X_new = SelectKBest(mutual_info_classif, k=k).fit_transform(X, y)
            desc = "{0} features".format(k)
        else:
            X_new = X
            desc = "All features"
        tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4],
                         'C': [1, 10, 100, 1000]},
                            {'kernel': ['linear', 'poly','sigmoid'], 'C': [1, 10, 100, 1000]}]
        scores = ['accuracy']

        for score in scores:
            print("# Tuning hyper-parameters for %s" % score)
            print()

            clf = GridSearchCV(SVC(), tuned_parameters, cv=15,
                       scoring='%s' % score)
            clf.fit(X_new,y)

            print("Best parameters set found on development set: %s" % desc)
            print()
            print(clf.best_params_)
            print()
            print("Grid scores on development set:")
            print()
            means = clf.cv_results_['mean_test_score']
            best.append({'k':k, 'mean':np.max(means)})
            stds = clf.cv_results_['std_test_score']
            for mean, std, params in zip(means, stds, clf.cv_results_['params']):
                print("%0.3f (+/-%0.03f) for %r"
                      % (mean, std, params))
    print(best)

In [10]:
for data in [(X_0_14, y_0_14, "0-14"),
            (X_14_21, y_14_21, "14-21"),
            (X_14_28, y_14_28, "14-28"),
            (X_21_28, y_21_28, "21-28"),
            (X_14_35, y_14_35, "14-35"),
            (X_21_35, y_21_35, "21-35"),
            (X_28_35, y_28_35, "28-35")]:
    run_parameter_search(data[0], data[1], data[2])

Search for 0-14 
# Tuning hyper-parameters for accuracy

Best parameters set found on development set: All features

{'C': 10, 'kernel': 'linear'}

Grid scores on development set:

0.600 (+/-0.153) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.600 (+/-0.153) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.600 (+/-0.153) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.600 (+/-0.153) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.600 (+/-0.153) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.600 (+/-0.153) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.600 (+/-0.153) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.600 (+/-0.153) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
0.600 (+/-0.153) for {'C': 1, 'kernel': 'linear'}
0.500 (+/-0.000) for {'C': 1, 'kernel': 'poly'}
0.600 (+/-0.153) for {'C': 1, 'kernel': 'sigmoid'}
0.617 (+/-0.180) for {'C': 10, 'kernel': 'linear'}
0.500 (+/-0.000) for {'C': 10, 'kernel': 'poly'}
0.600 (+/-0.153) for {'C': 10, 'kernel': 'sigmoid'

# Tuning hyper-parameters for accuracy

Best parameters set found on development set: 700 features

{'C': 1000, 'kernel': 'linear'}

Grid scores on development set:

0.583 (+/-0.149) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.583 (+/-0.149) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.583 (+/-0.149) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.583 (+/-0.149) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.583 (+/-0.149) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.583 (+/-0.149) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.583 (+/-0.149) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.583 (+/-0.149) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
0.583 (+/-0.149) for {'C': 1, 'kernel': 'linear'}
0.500 (+/-0.000) for {'C': 1, 'kernel': 'poly'}
0.583 (+/-0.149) for {'C': 1, 'kernel': 'sigmoid'}
0.583 (+/-0.149) for {'C': 10, 'kernel': 'linear'}
0.500 (+/-0.000) for {'C': 10, 'kernel': 'poly'}
0.583 (+/-0.149) for {'C': 10, 'kernel': 'sigmoid'}
0.617 (+/-0.1

# Tuning hyper-parameters for accuracy

Best parameters set found on development set: 200 features

{'C': 1000, 'kernel': 'linear'}

Grid scores on development set:

0.633 (+/-0.201) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.633 (+/-0.201) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.633 (+/-0.201) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.633 (+/-0.201) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.633 (+/-0.201) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.633 (+/-0.201) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.633 (+/-0.201) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.633 (+/-0.201) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
0.633 (+/-0.201) for {'C': 1, 'kernel': 'linear'}
0.517 (+/-0.062) for {'C': 1, 'kernel': 'poly'}
0.633 (+/-0.201) for {'C': 1, 'kernel': 'sigmoid'}
0.633 (+/-0.201) for {'C': 10, 'kernel': 'linear'}
0.517 (+/-0.062) for {'C': 10, 'kernel': 'poly'}
0.633 (+/-0.201) for {'C': 10, 'kernel': 'sigmoid'}
0.883 (+/-0.1

# Tuning hyper-parameters for accuracy

Best parameters set found on development set: 900 features

{'C': 1000, 'kernel': 'linear'}

Grid scores on development set:

0.700 (+/-0.208) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.700 (+/-0.208) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.700 (+/-0.208) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.700 (+/-0.208) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.700 (+/-0.208) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.700 (+/-0.208) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.700 (+/-0.208) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.700 (+/-0.208) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
0.700 (+/-0.208) for {'C': 1, 'kernel': 'linear'}
0.533 (+/-0.125) for {'C': 1, 'kernel': 'poly'}
0.700 (+/-0.208) for {'C': 1, 'kernel': 'sigmoid'}
0.733 (+/-0.193) for {'C': 10, 'kernel': 'linear'}
0.533 (+/-0.125) for {'C': 10, 'kernel': 'poly'}
0.700 (+/-0.208) for {'C': 10, 'kernel': 'sigmoid'}
0.833 (+/-0.1

# Tuning hyper-parameters for accuracy

Best parameters set found on development set: 400 features

{'C': 100, 'kernel': 'linear'}

Grid scores on development set:

0.711 (+/-0.177) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.711 (+/-0.177) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.711 (+/-0.177) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.711 (+/-0.177) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.711 (+/-0.177) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.711 (+/-0.177) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.711 (+/-0.177) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.711 (+/-0.177) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
0.711 (+/-0.177) for {'C': 1, 'kernel': 'linear'}
0.500 (+/-0.061) for {'C': 1, 'kernel': 'poly'}
0.711 (+/-0.177) for {'C': 1, 'kernel': 'sigmoid'}
0.722 (+/-0.199) for {'C': 10, 'kernel': 'linear'}
0.500 (+/-0.061) for {'C': 10, 'kernel': 'poly'}
0.711 (+/-0.177) for {'C': 10, 'kernel': 'sigmoid'}
0.844 (+/-0.12

Best parameters set found on development set: All features

{'C': 1000, 'kernel': 'linear'}

Grid scores on development set:

0.683 (+/-0.213) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.683 (+/-0.213) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.683 (+/-0.213) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.683 (+/-0.213) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.683 (+/-0.213) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.683 (+/-0.213) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.683 (+/-0.213) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.683 (+/-0.213) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
0.683 (+/-0.213) for {'C': 1, 'kernel': 'linear'}
0.517 (+/-0.062) for {'C': 1, 'kernel': 'poly'}
0.683 (+/-0.213) for {'C': 1, 'kernel': 'sigmoid'}
0.717 (+/-0.201) for {'C': 10, 'kernel': 'linear'}
0.517 (+/-0.062) for {'C': 10, 'kernel': 'poly'}
0.683 (+/-0.213) for {'C': 10, 'kernel': 'sigmoid'}
0.683 (+/-0.193) for {'C': 100, 'kernel': 'linear'}
0

# Tuning hyper-parameters for accuracy

Best parameters set found on development set: 700 features

{'C': 1000, 'kernel': 'linear'}

Grid scores on development set:

0.650 (+/-0.200) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.650 (+/-0.200) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.650 (+/-0.200) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.650 (+/-0.200) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.650 (+/-0.200) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.650 (+/-0.200) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.650 (+/-0.200) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.650 (+/-0.200) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
0.650 (+/-0.200) for {'C': 1, 'kernel': 'linear'}
0.517 (+/-0.062) for {'C': 1, 'kernel': 'poly'}
0.650 (+/-0.200) for {'C': 1, 'kernel': 'sigmoid'}
0.650 (+/-0.200) for {'C': 10, 'kernel': 'linear'}
0.517 (+/-0.062) for {'C': 10, 'kernel': 'poly'}
0.650 (+/-0.200) for {'C': 10, 'kernel': 'sigmoid'}
0.733 (+/-0.2

# Tuning hyper-parameters for accuracy

Best parameters set found on development set: 200 features

{'C': 100, 'kernel': 'linear'}

Grid scores on development set:

0.692 (+/-0.128) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.692 (+/-0.128) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.692 (+/-0.128) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.692 (+/-0.128) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.692 (+/-0.128) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.692 (+/-0.128) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.692 (+/-0.128) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.692 (+/-0.128) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
0.692 (+/-0.128) for {'C': 1, 'kernel': 'linear'}
0.517 (+/-0.062) for {'C': 1, 'kernel': 'poly'}
0.692 (+/-0.128) for {'C': 1, 'kernel': 'sigmoid'}
0.683 (+/-0.157) for {'C': 10, 'kernel': 'linear'}
0.517 (+/-0.062) for {'C': 10, 'kernel': 'poly'}
0.692 (+/-0.128) for {'C': 10, 'kernel': 'sigmoid'}
0.725 (+/-0.13

# Tuning hyper-parameters for accuracy

Best parameters set found on development set: 900 features

{'C': 1000, 'kernel': 'linear'}

Grid scores on development set:

0.683 (+/-0.218) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.683 (+/-0.218) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.683 (+/-0.218) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.683 (+/-0.218) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.683 (+/-0.218) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.683 (+/-0.218) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.683 (+/-0.218) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.683 (+/-0.218) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
0.683 (+/-0.218) for {'C': 1, 'kernel': 'linear'}
0.508 (+/-0.031) for {'C': 1, 'kernel': 'poly'}
0.683 (+/-0.218) for {'C': 1, 'kernel': 'sigmoid'}
0.717 (+/-0.180) for {'C': 10, 'kernel': 'linear'}
0.508 (+/-0.031) for {'C': 10, 'kernel': 'poly'}
0.683 (+/-0.218) for {'C': 10, 'kernel': 'sigmoid'}
0.742 (+/-0.1

# Tuning hyper-parameters for accuracy

Best parameters set found on development set: 400 features

{'C': 100, 'kernel': 'linear'}

Grid scores on development set:

0.644 (+/-0.201) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.633 (+/-0.194) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.644 (+/-0.201) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.633 (+/-0.194) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.644 (+/-0.201) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.633 (+/-0.194) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.644 (+/-0.201) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.633 (+/-0.194) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
0.644 (+/-0.201) for {'C': 1, 'kernel': 'linear'}
0.500 (+/-0.000) for {'C': 1, 'kernel': 'poly'}
0.644 (+/-0.201) for {'C': 1, 'kernel': 'sigmoid'}
0.667 (+/-0.149) for {'C': 10, 'kernel': 'linear'}
0.500 (+/-0.000) for {'C': 10, 'kernel': 'poly'}
0.644 (+/-0.201) for {'C': 10, 'kernel': 'sigmoid'}
0.711 (+/-0.12

Best parameters set found on development set: All features

{'C': 10, 'kernel': 'linear'}

Grid scores on development set:

0.550 (+/-0.208) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.550 (+/-0.208) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.550 (+/-0.208) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.550 (+/-0.208) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.550 (+/-0.208) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.550 (+/-0.208) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.550 (+/-0.208) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.550 (+/-0.208) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
0.550 (+/-0.208) for {'C': 1, 'kernel': 'linear'}
0.533 (+/-0.085) for {'C': 1, 'kernel': 'poly'}
0.550 (+/-0.208) for {'C': 1, 'kernel': 'sigmoid'}
0.667 (+/-0.217) for {'C': 10, 'kernel': 'linear'}
0.533 (+/-0.085) for {'C': 10, 'kernel': 'poly'}
0.550 (+/-0.208) for {'C': 10, 'kernel': 'sigmoid'}
0.667 (+/-0.175) for {'C': 100, 'kernel': 'linear'}
0.5

# Tuning hyper-parameters for accuracy

Best parameters set found on development set: 700 features

{'C': 1000, 'kernel': 'linear'}

Grid scores on development set:

0.617 (+/-0.155) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.617 (+/-0.155) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.617 (+/-0.155) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.617 (+/-0.155) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.617 (+/-0.155) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.617 (+/-0.155) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.617 (+/-0.155) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.617 (+/-0.155) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
0.617 (+/-0.155) for {'C': 1, 'kernel': 'linear'}
0.517 (+/-0.062) for {'C': 1, 'kernel': 'poly'}
0.617 (+/-0.155) for {'C': 1, 'kernel': 'sigmoid'}
0.617 (+/-0.155) for {'C': 10, 'kernel': 'linear'}
0.517 (+/-0.062) for {'C': 10, 'kernel': 'poly'}
0.617 (+/-0.155) for {'C': 10, 'kernel': 'sigmoid'}
0.617 (+/-0.2

In [11]:
def partition(data):
    rows=data.index
    indices = []
    for p in range(11,26):
        train = []
        test = []
        sample = "{0}".format(p)
        for i in range(len(rows)):   
            if sample in rows[i]:
                test.append(i)
            else:
                train.append(i)
        indices.append((train, test))
    return indices

    

In [12]:
warnings.warn = warn


def run_partitioned_search(X, y, partition, dates):
    best = []
    print('Search for {0} '.format(dates))
    for k in [None, 100,200,300,400,500,600,700,800,900,1000]:
        if k:
            X_new = SelectKBest(mutual_info_classif, k=k).fit_transform(X.values, y)
            desc = "{0} features".format(k)
        else:
            X_new = X.values
            desc = "All features"
        tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4],
                         'C': [1, 10, 100, 1000]},
                            {'kernel': ['linear', 'poly','sigmoid'], 'C': [1, 10, 100, 1000]}]
        scores = ['accuracy']

        for score in scores:
            print("# Tuning hyper-parameters for %s" % score)
            print()

            clf = GridSearchCV(SVC(), tuned_parameters, cv=partition,
                       scoring='%s' % score)
            clf.fit(X_new,y)

            print("Best parameters set found on development set: %s" % desc)
            print()
            print(clf.best_params_)
            print()
            print("Grid scores on development set:")
            print()
            means = clf.cv_results_['mean_test_score']
            best.append({'k':k, 'mean':np.max(means)})
            stds = clf.cv_results_['std_test_score']
            for mean, std, params in zip(means, stds, clf.cv_results_['params']):
                print("%0.3f (+/-%0.03f) for %r"
                      % (mean, std, params))
    print(best)
            

In [13]:
def create_groupings_df(dates, rows, data):
    grouped = []
    for i in rows:
        for j in dates:
            if j in i:
                grouped.append(i)
    return data.loc[grouped]

def create_dataset_df(sub, sup, dates):
    sub_group = create_groupings_df(dates, sub.index, sub)
    sup_group = create_groupings_df(dates, sup.index, sup)
    X = sub_group.append(sup_group)
    y = ['sub'] * sub_group.shape[0] + ['sup'] * sup_group.shape[0]
    y = np.asarray(y)
    return X, y

X_0_14, y_0_14 = create_dataset_df(sub_all, sup_all, ['-0','-14'])
X_14_21, y_14_21 = create_dataset_df(sub_all, sup_all, ['-14','-21'])
X_14_28, y_14_28 = create_dataset_df(sub_all, sup_all, ['-14','-21', '-28'])
X_21_28, y_21_28 = create_dataset_df(sub_all, sup_all, ['-21','-28'])
X_14_35, y_14_35 = create_dataset_df(sub_all, sup_all, ['-14', '-21', '-28', '-35'])
X_21_35, y_21_35 = create_dataset_df(sub_all, sup_all, ['-21', '-28', '-35'])
X_28_35, y_28_35 = create_dataset_df(sub_all, sup_all, ['-28','-35'])

In [14]:
for data in [(X_0_14, y_0_14, partition(X_0_14), "0-14"),
            (X_14_21, y_14_21,partition(X_14_21), "14-21"),
            (X_14_28, y_14_28, partition(X_14_28), "14-28"),
            (X_21_28, y_21_28, partition(X_21_28),"21-28"),
            (X_14_35, y_14_35, partition(X_14_35),"14-35"),
            (X_21_35, y_21_35, partition(X_21_35), "21-35"),
            (X_28_35, y_28_35, partition(X_28_35), "28-35")]:
    run_partitioned_search(data[0], data[1], data[2], data[3])

Search for 0-14 
# Tuning hyper-parameters for accuracy

Best parameters set found on development set: All features

{'C': 100, 'kernel': 'linear'}

Grid scores on development set:

0.591 (+/-0.126) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.591 (+/-0.126) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.591 (+/-0.126) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.591 (+/-0.126) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.591 (+/-0.126) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.591 (+/-0.126) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.591 (+/-0.126) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.591 (+/-0.126) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
0.591 (+/-0.126) for {'C': 1, 'kernel': 'linear'}
0.500 (+/-0.000) for {'C': 1, 'kernel': 'poly'}
0.591 (+/-0.126) for {'C': 1, 'kernel': 'sigmoid'}
0.602 (+/-0.149) for {'C': 10, 'kernel': 'linear'}
0.500 (+/-0.000) for {'C': 10, 'kernel': 'poly'}
0.591 (+/-0.126) for {'C': 10, 'kernel': 'sigmoid

# Tuning hyper-parameters for accuracy

Best parameters set found on development set: 700 features

{'C': 1000, 'kernel': 'linear'}

Grid scores on development set:

0.591 (+/-0.122) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.591 (+/-0.122) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.591 (+/-0.122) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.591 (+/-0.122) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.591 (+/-0.122) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.591 (+/-0.122) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.591 (+/-0.122) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.591 (+/-0.122) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
0.580 (+/-0.122) for {'C': 1, 'kernel': 'linear'}
0.523 (+/-0.052) for {'C': 1, 'kernel': 'poly'}
0.580 (+/-0.122) for {'C': 1, 'kernel': 'sigmoid'}
0.580 (+/-0.122) for {'C': 10, 'kernel': 'linear'}
0.523 (+/-0.052) for {'C': 10, 'kernel': 'poly'}
0.580 (+/-0.122) for {'C': 10, 'kernel': 'sigmoid'}
0.659 (+/-0.1

# Tuning hyper-parameters for accuracy

Best parameters set found on development set: 200 features

{'C': 1000, 'kernel': 'linear'}

Grid scores on development set:

0.655 (+/-0.180) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.655 (+/-0.180) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.655 (+/-0.180) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.655 (+/-0.180) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.655 (+/-0.180) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.655 (+/-0.180) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.655 (+/-0.180) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.655 (+/-0.180) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
0.655 (+/-0.180) for {'C': 1, 'kernel': 'linear'}
0.595 (+/-0.142) for {'C': 1, 'kernel': 'poly'}
0.655 (+/-0.180) for {'C': 1, 'kernel': 'sigmoid'}
0.655 (+/-0.180) for {'C': 10, 'kernel': 'linear'}
0.595 (+/-0.142) for {'C': 10, 'kernel': 'poly'}
0.655 (+/-0.180) for {'C': 10, 'kernel': 'sigmoid'}
0.828 (+/-0.1

# Tuning hyper-parameters for accuracy

Best parameters set found on development set: 900 features

{'C': 1000, 'kernel': 'linear'}

Grid scores on development set:

0.724 (+/-0.156) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.724 (+/-0.156) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.724 (+/-0.156) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.724 (+/-0.156) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.724 (+/-0.156) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.724 (+/-0.156) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.724 (+/-0.156) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.724 (+/-0.156) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
0.724 (+/-0.156) for {'C': 1, 'kernel': 'linear'}
0.534 (+/-0.089) for {'C': 1, 'kernel': 'poly'}
0.724 (+/-0.156) for {'C': 1, 'kernel': 'sigmoid'}
0.733 (+/-0.150) for {'C': 10, 'kernel': 'linear'}
0.534 (+/-0.089) for {'C': 10, 'kernel': 'poly'}
0.724 (+/-0.156) for {'C': 10, 'kernel': 'sigmoid'}
0.784 (+/-0.1

# Tuning hyper-parameters for accuracy

Best parameters set found on development set: 400 features

{'C': 100, 'kernel': 'linear'}

Grid scores on development set:

0.753 (+/-0.126) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.747 (+/-0.135) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.753 (+/-0.126) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.747 (+/-0.135) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.753 (+/-0.126) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.747 (+/-0.135) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.753 (+/-0.126) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.747 (+/-0.135) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
0.753 (+/-0.126) for {'C': 1, 'kernel': 'linear'}
0.555 (+/-0.125) for {'C': 1, 'kernel': 'poly'}
0.753 (+/-0.126) for {'C': 1, 'kernel': 'sigmoid'}
0.671 (+/-0.115) for {'C': 10, 'kernel': 'linear'}
0.555 (+/-0.125) for {'C': 10, 'kernel': 'poly'}
0.753 (+/-0.126) for {'C': 10, 'kernel': 'sigmoid'}
0.822 (+/-0.11

Best parameters set found on development set: All features

{'C': 1000, 'kernel': 'linear'}

Grid scores on development set:

0.636 (+/-0.180) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.636 (+/-0.180) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.636 (+/-0.180) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.636 (+/-0.180) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.636 (+/-0.180) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.636 (+/-0.180) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.636 (+/-0.180) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.636 (+/-0.180) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
0.636 (+/-0.180) for {'C': 1, 'kernel': 'linear'}
0.511 (+/-0.052) for {'C': 1, 'kernel': 'poly'}
0.636 (+/-0.180) for {'C': 1, 'kernel': 'sigmoid'}
0.693 (+/-0.167) for {'C': 10, 'kernel': 'linear'}
0.511 (+/-0.052) for {'C': 10, 'kernel': 'poly'}
0.636 (+/-0.180) for {'C': 10, 'kernel': 'sigmoid'}
0.693 (+/-0.130) for {'C': 100, 'kernel': 'linear'}
0

# Tuning hyper-parameters for accuracy

Best parameters set found on development set: 700 features

{'C': 100, 'kernel': 'linear'}

Grid scores on development set:

0.659 (+/-0.194) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.648 (+/-0.195) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.659 (+/-0.194) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.648 (+/-0.195) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.659 (+/-0.194) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.648 (+/-0.195) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.659 (+/-0.194) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.648 (+/-0.195) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
0.659 (+/-0.194) for {'C': 1, 'kernel': 'linear'}
0.500 (+/-0.000) for {'C': 1, 'kernel': 'poly'}
0.659 (+/-0.194) for {'C': 1, 'kernel': 'sigmoid'}
0.625 (+/-0.165) for {'C': 10, 'kernel': 'linear'}
0.500 (+/-0.000) for {'C': 10, 'kernel': 'poly'}
0.659 (+/-0.194) for {'C': 10, 'kernel': 'sigmoid'}
0.739 (+/-0.18

# Tuning hyper-parameters for accuracy

Best parameters set found on development set: 200 features

{'C': 1000, 'kernel': 'linear'}

Grid scores on development set:

0.705 (+/-0.119) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.699 (+/-0.115) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.705 (+/-0.119) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.699 (+/-0.115) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.705 (+/-0.119) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.699 (+/-0.115) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.705 (+/-0.119) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.699 (+/-0.115) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
0.705 (+/-0.119) for {'C': 1, 'kernel': 'linear'}
0.517 (+/-0.052) for {'C': 1, 'kernel': 'poly'}
0.705 (+/-0.119) for {'C': 1, 'kernel': 'sigmoid'}
0.682 (+/-0.127) for {'C': 10, 'kernel': 'linear'}
0.517 (+/-0.052) for {'C': 10, 'kernel': 'poly'}
0.705 (+/-0.119) for {'C': 10, 'kernel': 'sigmoid'}
0.756 (+/-0.0

# Tuning hyper-parameters for accuracy

Best parameters set found on development set: 900 features

{'C': 100, 'kernel': 'linear'}

Grid scores on development set:

0.716 (+/-0.104) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.716 (+/-0.104) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.716 (+/-0.104) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.716 (+/-0.104) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.716 (+/-0.104) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.716 (+/-0.104) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.716 (+/-0.104) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.716 (+/-0.104) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
0.716 (+/-0.104) for {'C': 1, 'kernel': 'linear'}
0.506 (+/-0.026) for {'C': 1, 'kernel': 'poly'}
0.716 (+/-0.104) for {'C': 1, 'kernel': 'sigmoid'}
0.722 (+/-0.130) for {'C': 10, 'kernel': 'linear'}
0.506 (+/-0.026) for {'C': 10, 'kernel': 'poly'}
0.716 (+/-0.104) for {'C': 10, 'kernel': 'sigmoid'}
0.756 (+/-0.09

# Tuning hyper-parameters for accuracy

Best parameters set found on development set: 400 features

{'C': 100, 'kernel': 'linear'}

Grid scores on development set:

0.686 (+/-0.129) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.686 (+/-0.129) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.686 (+/-0.129) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.686 (+/-0.129) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.686 (+/-0.129) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.686 (+/-0.129) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.686 (+/-0.129) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.686 (+/-0.129) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
0.686 (+/-0.129) for {'C': 1, 'kernel': 'linear'}
0.500 (+/-0.000) for {'C': 1, 'kernel': 'poly'}
0.686 (+/-0.129) for {'C': 1, 'kernel': 'sigmoid'}
0.686 (+/-0.129) for {'C': 10, 'kernel': 'linear'}
0.500 (+/-0.000) for {'C': 10, 'kernel': 'poly'}
0.686 (+/-0.129) for {'C': 10, 'kernel': 'sigmoid'}
0.712 (+/-0.12

Best parameters set found on development set: All features

{'C': 10, 'kernel': 'linear'}

Grid scores on development set:

0.550 (+/-0.208) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.550 (+/-0.208) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.550 (+/-0.208) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.550 (+/-0.208) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.550 (+/-0.208) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.550 (+/-0.208) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.550 (+/-0.208) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.550 (+/-0.208) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
0.550 (+/-0.208) for {'C': 1, 'kernel': 'linear'}
0.533 (+/-0.085) for {'C': 1, 'kernel': 'poly'}
0.550 (+/-0.208) for {'C': 1, 'kernel': 'sigmoid'}
0.667 (+/-0.217) for {'C': 10, 'kernel': 'linear'}
0.533 (+/-0.085) for {'C': 10, 'kernel': 'poly'}
0.550 (+/-0.208) for {'C': 10, 'kernel': 'sigmoid'}
0.667 (+/-0.175) for {'C': 100, 'kernel': 'linear'}
0.5

# Tuning hyper-parameters for accuracy

Best parameters set found on development set: 700 features

{'C': 100, 'kernel': 'linear'}

Grid scores on development set:

0.617 (+/-0.155) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.617 (+/-0.155) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.617 (+/-0.155) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.617 (+/-0.155) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.617 (+/-0.155) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.617 (+/-0.155) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.617 (+/-0.155) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.617 (+/-0.155) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
0.617 (+/-0.155) for {'C': 1, 'kernel': 'linear'}
0.517 (+/-0.062) for {'C': 1, 'kernel': 'poly'}
0.617 (+/-0.155) for {'C': 1, 'kernel': 'sigmoid'}
0.617 (+/-0.155) for {'C': 10, 'kernel': 'linear'}
0.517 (+/-0.062) for {'C': 10, 'kernel': 'poly'}
0.617 (+/-0.155) for {'C': 10, 'kernel': 'sigmoid'}
0.633 (+/-0.18