In [1]:
%run startup.py

%matplotlib inline

%run lib/load_uci.py

# Logistic Regression

In [100]:
lr_pipe = Pipeline([
    ('scaler1', StandardScaler()),
    ('pca', PCA(5)),
    ('scaler2', StandardScaler()),
    ('clf', LogisticRegression())
])

In [123]:
lr_pipe_params = { 'clf__C':np.logspace(-3, 0, 4), 
                  'clf__solver':('sag', 'liblinear')
                 }

In [124]:
gs_lr = GridSearchCV(lr_pipe, lr_pipe_params, cv=5, n_jobs=-1, verbose=1)

## Sample 1

In [106]:
gs_lr.fit(X_tr_1[top_corrs], y_tr_1)

Fitting 5 folds for each of 14 candidates, totalling 70 fits


[Parallel(n_jobs=-1)]: Done  70 out of  70 | elapsed:    0.3s finished


GridSearchCV(cv=5, error_score='raise',
       estimator=Pipeline(steps=[('scaler1', StandardScaler(copy=True, with_mean=True, with_std=True)), ('pca', PCA(copy=True, iterated_power='auto', n_components=5, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)), ('scaler2', StandardScaler(copy=True, with_mean=True, with_std=True)), ('clf', LogisticRegressi...ty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False))]),
       fit_params={}, iid=True, n_jobs=-1,
       param_grid={'clf__C': array([  1.00000e-03,   1.00000e-02,   1.00000e-01,   1.00000e+00,
         1.00000e+01,   1.00000e+02,   1.00000e+03]), 'clf__solver': ('sag', 'liblinear')},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=1)

In [107]:
gs_lr.best_score_

0.6166666666666667

In [108]:
gs_lr.best_params_

{'clf__C': 0.01, 'clf__solver': 'sag'}

In [109]:
gs_lr.score(X_tr_1[top_corrs], y_tr_1)

0.62666666666666671

In [110]:
gs_lr.score(X_test[top_corrs], y_test)

0.56666666666666665

## Sample 2

In [111]:
gs_lr.fit(X_tr_2[top_corrs], y_tr_2)

Fitting 5 folds for each of 14 candidates, totalling 70 fits


[Parallel(n_jobs=-1)]: Done  70 out of  70 | elapsed:    0.5s finished


GridSearchCV(cv=5, error_score='raise',
       estimator=Pipeline(steps=[('scaler1', StandardScaler(copy=True, with_mean=True, with_std=True)), ('pca', PCA(copy=True, iterated_power='auto', n_components=5, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)), ('scaler2', StandardScaler(copy=True, with_mean=True, with_std=True)), ('clf', LogisticRegressi...ty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False))]),
       fit_params={}, iid=True, n_jobs=-1,
       param_grid={'clf__C': array([  1.00000e-03,   1.00000e-02,   1.00000e-01,   1.00000e+00,
         1.00000e+01,   1.00000e+02,   1.00000e+03]), 'clf__solver': ('sag', 'liblinear')},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=1)

In [112]:
gs_lr.best_score_

0.58333333333333337

In [113]:
gs_lr.best_params_

{'clf__C': 0.10000000000000001, 'clf__solver': 'sag'}

In [114]:
gs_lr.score(X_tr_2[top_corrs], y_tr_2)

0.59666666666666668

In [115]:
gs_lr.score(X_test[top_corrs], y_test)

0.56999999999999995

## Sample 3

In [116]:
gs_lr.fit(X_tr_3[top_corrs], y_tr_3)

Fitting 5 folds for each of 14 candidates, totalling 70 fits


[Parallel(n_jobs=-1)]: Done  70 out of  70 | elapsed:    0.5s finished


GridSearchCV(cv=5, error_score='raise',
       estimator=Pipeline(steps=[('scaler1', StandardScaler(copy=True, with_mean=True, with_std=True)), ('pca', PCA(copy=True, iterated_power='auto', n_components=5, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)), ('scaler2', StandardScaler(copy=True, with_mean=True, with_std=True)), ('clf', LogisticRegressi...ty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False))]),
       fit_params={}, iid=True, n_jobs=-1,
       param_grid={'clf__C': array([  1.00000e-03,   1.00000e-02,   1.00000e-01,   1.00000e+00,
         1.00000e+01,   1.00000e+02,   1.00000e+03]), 'clf__solver': ('sag', 'liblinear')},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=1)

In [117]:
gs_lr.best_score_

0.64000000000000001

In [118]:
gs_lr.best_params_

{'clf__C': 0.001, 'clf__solver': 'liblinear'}

In [119]:
gs_lr.score(X_tr_3[top_corrs], y_tr_3)

0.63833333333333331

In [120]:
gs_lr.score(X_test[top_corrs], y_test)

0.61499999999999999

# Decision Tree

In [131]:
dtc_pipe = Pipeline([
    ('scaler1', StandardScaler()),
    ('pca', PCA(5)),
    ('scaler2', StandardScaler()),
    ('clf', DecisionTreeClassifier())
])

In [155]:
dtc_params = {'clf__criterion':('gini', 'entropy'), 
              'clf__max_depth':(8, 9, 10, 11, 12, None), 
              'clf__min_samples_leaf':(2,3,4,5,6)}

In [156]:
gs_dtc = GridSearchCV(dtc_pipe, dtc_params, cv=5, n_jobs=-1, verbose=1)

## Sample 1

In [157]:
gs_dtc.fit(X_tr_1[top_corrs], y_tr_1)

Fitting 5 folds for each of 60 candidates, totalling 300 fits


[Parallel(n_jobs=-1)]: Done 300 out of 300 | elapsed:    1.3s finished


GridSearchCV(cv=5, error_score='raise',
       estimator=Pipeline(steps=[('scaler1', StandardScaler(copy=True, with_mean=True, with_std=True)), ('pca', PCA(copy=True, iterated_power='auto', n_components=5, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)), ('scaler2', StandardScaler(copy=True, with_mean=True, with_std=True)), ('clf', DecisionTreeClas...it=2, min_weight_fraction_leaf=0.0,
            presort=False, random_state=None, splitter='best'))]),
       fit_params={}, iid=True, n_jobs=-1,
       param_grid={'clf__criterion': ('gini', 'entropy'), 'clf__max_depth': (8, 9, 10, 11, 12, None), 'clf__min_samples_leaf': (2, 3, 4, 5, 6)},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=1)

In [158]:
gs_dtc.best_score_

0.73666666666666669

In [159]:
gs_dtc.best_params_

{'clf__criterion': 'gini', 'clf__max_depth': 11, 'clf__min_samples_leaf': 2}

In [160]:
gs_dtc.score(X_tr_1[top_corrs], y_tr_1)

0.91333333333333333

In [161]:
gs_dtc.score(X_test[top_corrs], y_test)

0.71999999999999997

## Sample 2

In [162]:
gs_dtc.fit(X_tr_2[top_corrs], y_tr_2)

Fitting 5 folds for each of 60 candidates, totalling 300 fits


[Parallel(n_jobs=-1)]: Done 300 out of 300 | elapsed:    1.4s finished


GridSearchCV(cv=5, error_score='raise',
       estimator=Pipeline(steps=[('scaler1', StandardScaler(copy=True, with_mean=True, with_std=True)), ('pca', PCA(copy=True, iterated_power='auto', n_components=5, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)), ('scaler2', StandardScaler(copy=True, with_mean=True, with_std=True)), ('clf', DecisionTreeClas...it=2, min_weight_fraction_leaf=0.0,
            presort=False, random_state=None, splitter='best'))]),
       fit_params={}, iid=True, n_jobs=-1,
       param_grid={'clf__criterion': ('gini', 'entropy'), 'clf__max_depth': (8, 9, 10, 11, 12, None), 'clf__min_samples_leaf': (2, 3, 4, 5, 6)},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=1)

In [163]:
gs_dtc.best_score_

0.7466666666666667

In [164]:
gs_dtc.best_params_

{'clf__criterion': 'gini', 'clf__max_depth': None, 'clf__min_samples_leaf': 5}

In [165]:
gs_dtc.score(X_tr_2[top_corrs], y_tr_2)

0.89166666666666672

In [166]:
gs_dtc.score(X_test[top_corrs], y_test)

0.76500000000000001

## Sample 3

In [167]:
gs_dtc.fit(X_tr_3[top_corrs], y_tr_3)

Fitting 5 folds for each of 60 candidates, totalling 300 fits


[Parallel(n_jobs=-1)]: Done 300 out of 300 | elapsed:    1.4s finished


GridSearchCV(cv=5, error_score='raise',
       estimator=Pipeline(steps=[('scaler1', StandardScaler(copy=True, with_mean=True, with_std=True)), ('pca', PCA(copy=True, iterated_power='auto', n_components=5, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)), ('scaler2', StandardScaler(copy=True, with_mean=True, with_std=True)), ('clf', DecisionTreeClas...it=2, min_weight_fraction_leaf=0.0,
            presort=False, random_state=None, splitter='best'))]),
       fit_params={}, iid=True, n_jobs=-1,
       param_grid={'clf__criterion': ('gini', 'entropy'), 'clf__max_depth': (8, 9, 10, 11, 12, None), 'clf__min_samples_leaf': (2, 3, 4, 5, 6)},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=1)

In [168]:
gs_dtc.best_score_

0.72166666666666668

In [169]:
gs_dtc.best_params_

{'clf__criterion': 'gini', 'clf__max_depth': None, 'clf__min_samples_leaf': 3}

In [170]:
gs_dtc.score(X_tr_3[top_corrs], y_tr_3)

0.93166666666666664

In [171]:
gs_dtc.score(X_test[top_corrs], y_test)

0.75

# Decision Tree without PCA

In [190]:
dtc_pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('clf', DecisionTreeClassifier())
])

In [191]:
dtc_params = {'clf__criterion':('gini', 'entropy'), 
              'clf__max_depth':(8, 9, 10, 11, 12, None), 
              'clf__min_samples_leaf':(2,3,4,5,6)}

In [192]:
gs_dtc = GridSearchCV(dtc_pipe, dtc_params, cv=5, n_jobs=-1, verbose=1)

## Sample 1

In [193]:
gs_dtc.fit(X_tr_1[top_corrs], y_tr_1)

Fitting 5 folds for each of 60 candidates, totalling 300 fits


[Parallel(n_jobs=-1)]: Done 300 out of 300 | elapsed:    1.4s finished


GridSearchCV(cv=5, error_score='raise',
       estimator=Pipeline(steps=[('scaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('clf', DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            presort=False, random_state=None, splitter='best'))]),
       fit_params={}, iid=True, n_jobs=-1,
       param_grid={'clf__criterion': ('gini', 'entropy'), 'clf__max_depth': (8, 9, 10, 11, 12, None), 'clf__min_samples_leaf': (2, 3, 4, 5, 6)},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=1)

In [194]:
gs_dtc.best_score_

0.75666666666666671

In [195]:
gs_dtc.best_params_

{'clf__criterion': 'entropy', 'clf__max_depth': 12, 'clf__min_samples_leaf': 2}

In [196]:
gs_dtc.score(X_tr_1[top_corrs], y_tr_1)

0.97333333333333338

In [197]:
gs_dtc.score(X_test[top_corrs], y_test)

0.72999999999999998

## Sample 2

In [198]:
gs_dtc.fit(X_tr_2[top_corrs], y_tr_2)

Fitting 5 folds for each of 60 candidates, totalling 300 fits


[Parallel(n_jobs=-1)]: Done 300 out of 300 | elapsed:    1.6s finished


GridSearchCV(cv=5, error_score='raise',
       estimator=Pipeline(steps=[('scaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('clf', DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            presort=False, random_state=None, splitter='best'))]),
       fit_params={}, iid=True, n_jobs=-1,
       param_grid={'clf__criterion': ('gini', 'entropy'), 'clf__max_depth': (8, 9, 10, 11, 12, None), 'clf__min_samples_leaf': (2, 3, 4, 5, 6)},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=1)

In [199]:
gs_dtc.best_score_

0.75666666666666671

In [200]:
gs_dtc.best_params_

{'clf__criterion': 'gini', 'clf__max_depth': 8, 'clf__min_samples_leaf': 4}

In [201]:
gs_dtc.score(X_tr_2[top_corrs], y_tr_2)

0.93000000000000005

In [202]:
gs_dtc.score(X_test[top_corrs], y_test)

0.73166666666666669

## Sample 3

In [203]:
gs_dtc.fit(X_tr_3[top_corrs], y_tr_3)

Fitting 5 folds for each of 60 candidates, totalling 300 fits


[Parallel(n_jobs=-1)]: Done 300 out of 300 | elapsed:    1.7s finished


GridSearchCV(cv=5, error_score='raise',
       estimator=Pipeline(steps=[('scaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('clf', DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            presort=False, random_state=None, splitter='best'))]),
       fit_params={}, iid=True, n_jobs=-1,
       param_grid={'clf__criterion': ('gini', 'entropy'), 'clf__max_depth': (8, 9, 10, 11, 12, None), 'clf__min_samples_leaf': (2, 3, 4, 5, 6)},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=1)

In [204]:
gs_dtc.best_score_

0.71499999999999997

In [205]:
gs_dtc.best_params_

{'clf__criterion': 'entropy', 'clf__max_depth': 12, 'clf__min_samples_leaf': 4}

In [206]:
gs_dtc.score(X_tr_3[top_corrs], y_tr_3)

0.92833333333333334

In [207]:
gs_dtc.score(X_test[top_corrs], y_test)

0.73833333333333329

# K Neighbors Classifier

In [208]:
knc_pipe = Pipeline([
    ('scaler1', StandardScaler()),
    ('pca', PCA(5)),
    ('scaler2', StandardScaler()),
    ('clf', KNeighborsClassifier())
])

In [210]:
knc_params = {'clf__n_neighbors':(5, 9, 13, 17, 21, 25),
              'clf__weights':('uniform', 'distance'),
              'clf__algorithm':('auto', 'ball_tree', 'kd_tree'),
              'clf__leaf_size':(2, 5, 10, 15, 20, 25, 30)}

In [211]:
gs_knc = GridSearchCV(knc_pipe, knc_params, cv=5, n_jobs=-1, verbose=1)

## Sample 1

In [212]:
gs_knc.fit(X_tr_1[top_corrs], y_tr_1)

Fitting 5 folds for each of 252 candidates, totalling 1260 fits


[Parallel(n_jobs=-1)]: Done 248 tasks      | elapsed:    2.4s
[Parallel(n_jobs=-1)]: Done 1260 out of 1260 | elapsed:    8.9s finished


GridSearchCV(cv=5, error_score='raise',
       estimator=Pipeline(steps=[('scaler1', StandardScaler(copy=True, with_mean=True, with_std=True)), ('pca', PCA(copy=True, iterated_power='auto', n_components=5, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)), ('scaler2', StandardScaler(copy=True, with_mean=True, with_std=True)), ('clf', KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform'))]),
       fit_params={}, iid=True, n_jobs=-1,
       param_grid={'clf__n_neighbors': (5, 9, 13, 17, 21, 25), 'clf__weights': ('uniform', 'distance'), 'clf__algorithm': ('auto', 'ball_tree', 'kd_tree'), 'clf__leaf_size': (2, 5, 10, 15, 20, 25, 30)},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=1)

In [213]:
gs_knc.best_score_

0.875

In [214]:
gs_knc.best_params_

{'clf__algorithm': 'auto',
 'clf__leaf_size': 2,
 'clf__n_neighbors': 5,
 'clf__weights': 'distance'}

In [215]:
gs_knc.score(X_tr_1[top_corrs], y_tr_1)

1.0

In [216]:
gs_knc.score(X_test[top_corrs], y_test)

0.91333333333333333

## Sample 2

In [217]:
gs_knc.fit(X_tr_2[top_corrs], y_tr_2)

Fitting 5 folds for each of 252 candidates, totalling 1260 fits


[Parallel(n_jobs=-1)]: Done 248 tasks      | elapsed:    2.3s
[Parallel(n_jobs=-1)]: Done 1260 out of 1260 | elapsed:    9.4s finished


GridSearchCV(cv=5, error_score='raise',
       estimator=Pipeline(steps=[('scaler1', StandardScaler(copy=True, with_mean=True, with_std=True)), ('pca', PCA(copy=True, iterated_power='auto', n_components=5, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)), ('scaler2', StandardScaler(copy=True, with_mean=True, with_std=True)), ('clf', KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform'))]),
       fit_params={}, iid=True, n_jobs=-1,
       param_grid={'clf__n_neighbors': (5, 9, 13, 17, 21, 25), 'clf__weights': ('uniform', 'distance'), 'clf__algorithm': ('auto', 'ball_tree', 'kd_tree'), 'clf__leaf_size': (2, 5, 10, 15, 20, 25, 30)},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=1)

In [218]:
gs_knc.best_score_

0.85166666666666668

In [219]:
gs_knc.best_params_

{'clf__algorithm': 'auto',
 'clf__leaf_size': 2,
 'clf__n_neighbors': 5,
 'clf__weights': 'distance'}

In [220]:
gs_knc.score(X_tr_2[top_corrs], y_tr_2)

1.0

In [221]:
gs_knc.score(X_test[top_corrs], y_test)

0.89333333333333331

## Sample 3

In [222]:
gs_knc.fit(X_tr_3[top_corrs], y_tr_3)

Fitting 5 folds for each of 252 candidates, totalling 1260 fits


[Parallel(n_jobs=-1)]: Done 248 tasks      | elapsed:    2.4s
[Parallel(n_jobs=-1)]: Done 1260 out of 1260 | elapsed:    9.4s finished


GridSearchCV(cv=5, error_score='raise',
       estimator=Pipeline(steps=[('scaler1', StandardScaler(copy=True, with_mean=True, with_std=True)), ('pca', PCA(copy=True, iterated_power='auto', n_components=5, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)), ('scaler2', StandardScaler(copy=True, with_mean=True, with_std=True)), ('clf', KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform'))]),
       fit_params={}, iid=True, n_jobs=-1,
       param_grid={'clf__n_neighbors': (5, 9, 13, 17, 21, 25), 'clf__weights': ('uniform', 'distance'), 'clf__algorithm': ('auto', 'ball_tree', 'kd_tree'), 'clf__leaf_size': (2, 5, 10, 15, 20, 25, 30)},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=1)

In [223]:
gs_knc.best_score_

0.85499999999999998

In [224]:
gs_knc.best_params_

{'clf__algorithm': 'auto',
 'clf__leaf_size': 2,
 'clf__n_neighbors': 13,
 'clf__weights': 'distance'}

In [225]:
gs_knc.score(X_tr_3[top_corrs], y_tr_3)

1.0

In [226]:
gs_knc.score(X_test[top_corrs], y_test)

0.88166666666666671

# Support Vector Classifier

In [228]:
svc_pipe = Pipeline([
    ('scaler1', StandardScaler()),
    ('pca', PCA(5)),
    ('scaler2', StandardScaler()),
    ('clf', SVC())
])

In [229]:
svc_params = {'clf__C':(100.0, 10.0, 1.0, 0.1, .01, .001),
              'clf__gamma':(.1, .2, .3, .4, .5, .6, .7, .8, .9, 1.0),
              'clf__decision_function_shape':('ovo', 'ovr')}

In [230]:
gs_svc = GridSearchCV(svc_pipe, svc_params, cv=5, n_jobs=-1, verbose=1)

## Sample 1

In [231]:
gs_svc.fit(X_tr_1[top_corrs], y_tr_1)

Fitting 5 folds for each of 120 candidates, totalling 600 fits


[Parallel(n_jobs=-1)]: Done 128 tasks      | elapsed:    1.7s
[Parallel(n_jobs=-1)]: Done 600 out of 600 | elapsed:    6.5s finished


GridSearchCV(cv=5, error_score='raise',
       estimator=Pipeline(steps=[('scaler1', StandardScaler(copy=True, with_mean=True, with_std=True)), ('pca', PCA(copy=True, iterated_power='auto', n_components=5, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)), ('scaler2', StandardScaler(copy=True, with_mean=True, with_std=True)), ('clf', SVC(C=1.0, cache...,
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False))]),
       fit_params={}, iid=True, n_jobs=-1,
       param_grid={'clf__C': (100.0, 10.0, 1.0, 0.1, 0.01, 0.001), 'clf__gamma': (0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0), 'clf__decision_function_shape': ('ovo', 'ovr')},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=1)

In [232]:
gs_svc.best_score_

0.87666666666666671

In [233]:
gs_svc.best_params_

{'clf__C': 1.0, 'clf__decision_function_shape': 'ovo', 'clf__gamma': 0.7}

In [234]:
gs_svc.score(X_tr_1[top_corrs], y_tr_1)

0.94499999999999995

In [235]:
gs_svc.score(X_test[top_corrs], y_test)

0.88500000000000001

## Sample 2

In [236]:
gs_svc.fit(X_tr_2[top_corrs], y_tr_2)

Fitting 5 folds for each of 120 candidates, totalling 600 fits


[Parallel(n_jobs=-1)]: Done 144 tasks      | elapsed:    1.9s
[Parallel(n_jobs=-1)]: Done 600 out of 600 | elapsed:    6.5s finished


GridSearchCV(cv=5, error_score='raise',
       estimator=Pipeline(steps=[('scaler1', StandardScaler(copy=True, with_mean=True, with_std=True)), ('pca', PCA(copy=True, iterated_power='auto', n_components=5, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)), ('scaler2', StandardScaler(copy=True, with_mean=True, with_std=True)), ('clf', SVC(C=1.0, cache...,
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False))]),
       fit_params={}, iid=True, n_jobs=-1,
       param_grid={'clf__C': (100.0, 10.0, 1.0, 0.1, 0.01, 0.001), 'clf__gamma': (0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0), 'clf__decision_function_shape': ('ovo', 'ovr')},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=1)

In [237]:
gs_svc.best_score_

0.87

In [238]:
gs_svc.best_params_

{'clf__C': 1.0, 'clf__decision_function_shape': 'ovo', 'clf__gamma': 1.0}

In [239]:
gs_svc.score(X_tr_2[top_corrs], y_tr_2)

0.96499999999999997

In [240]:
gs_svc.score(X_test[top_corrs], y_test)

0.88500000000000001

## Sample 3

In [241]:
gs_svc.fit(X_tr_3[top_corrs], y_tr_3)

Fitting 5 folds for each of 120 candidates, totalling 600 fits


[Parallel(n_jobs=-1)]: Done 144 tasks      | elapsed:    1.9s
[Parallel(n_jobs=-1)]: Done 600 out of 600 | elapsed:    6.5s finished


GridSearchCV(cv=5, error_score='raise',
       estimator=Pipeline(steps=[('scaler1', StandardScaler(copy=True, with_mean=True, with_std=True)), ('pca', PCA(copy=True, iterated_power='auto', n_components=5, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)), ('scaler2', StandardScaler(copy=True, with_mean=True, with_std=True)), ('clf', SVC(C=1.0, cache...,
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False))]),
       fit_params={}, iid=True, n_jobs=-1,
       param_grid={'clf__C': (100.0, 10.0, 1.0, 0.1, 0.01, 0.001), 'clf__gamma': (0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0), 'clf__decision_function_shape': ('ovo', 'ovr')},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=1)

In [242]:
gs_svc.best_score_

0.88

In [243]:
gs_svc.best_params_

{'clf__C': 1.0, 'clf__decision_function_shape': 'ovo', 'clf__gamma': 0.9}

In [244]:
gs_svc.score(X_tr_3[top_corrs], y_tr_3)

0.96999999999999997

In [245]:
gs_svc.score(X_test[top_corrs], y_test)

0.89000000000000001