In [1]:
%run data_package_loading.py # Code loads data as well as packages that are relevant across most project phases
%matplotlib inline

uci_features = ['28',  '48',  '64', '105', '128', '153', '241', '281', '318', '336', 
                '338', '378', '433', '442', '451', '453', '455', '472', '475', '493']

madelon_features = ['feat_257', 'feat_269', 'feat_308', 'feat_315', 'feat_336',
                   'feat_341', 'feat_395', 'feat_504', 'feat_526', 'feat_639',
                   'feat_681', 'feat_701', 'feat_724', 'feat_736', 'feat_769',
                   'feat_808', 'feat_829', 'feat_867', 'feat_920', 'feat_956']

Xuci_1 = Xuci_1[uci_features]
Xuci_2 = Xuci_2[uci_features]
Xuci_3 = Xuci_3[uci_features]

Xdb_1 = Xdb_1[madelon_features]
Xdb_2 = Xdb_2[madelon_features]
Xdb_3 = Xdb_3[madelon_features]

# !conda install -y psycopg2

from sklearn.feature_selection import SelectKBest, RFE, SelectFromModel, RFECV 
from sklearn.decomposition import PCA

from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

from sklearn.model_selection import train_test_split
from sklearn.grid_search import GridSearchCV

from sklearn.pipeline import Pipeline

from sklearn.preprocessing import StandardScaler



In [35]:
from sklearn.ensemble import RandomForestClassifier
from tqdm import tqdm

In [74]:
dtc_pipe = Pipeline([('scaler', StandardScaler()),
                     ('rfe', SelectKBest()),
                     ('pca', PCA()),
                     ('classifier', DecisionTreeClassifier())])

lr_pipe = Pipeline([('scaler', StandardScaler()),
                     ('rfe', SelectKBest()),
                     ('pca', PCA()),
                     ('classifier', LogisticRegression())])

knn_pipe = Pipeline([('scaler', StandardScaler()),
                     ('rfe', SelectKBest()),
                     ('pca', PCA()),
                     ('classifier', KNeighborsClassifier())])

rfc_pipe = Pipeline([('scaler', StandardScaler()),
                     ('rfe', SelectKBest()),
                     ('pca', PCA()),
                     ('classifier', RandomForestClassifier())])

svc_pipe = Pipeline([('scaler', StandardScaler()),
                     ('rfe', SelectKBest()),
                     ('pca', PCA()),
                     ('classifier', SVC())])

### Some naive testing of the pipes

In [58]:
def pipe_test(X, y, pipeline):
    X_test, X_train, y_test, y_train = train_test_split(X, y, test_size = 0.25, random_state=42)
    
    pipe = pipeline
    pipe.fit(X_train, y_train)
    
    print(pipe.score(X_train, y_train))
    print(pipe.score(X_test, y_test))
    

In [59]:
print("\nTesting UCI 1")
pipe_test(Xuci_1, yuci_1, dtc_pipe)

print("\nTesting UCI 2")
pipe_test(Xuci_2, yuci_2, dtc_pipe)

print("\nTesting UCI 3")
pipe_test(Xuci_3, yuci_3, dtc_pipe)

print("\nTesting DB 1")
pipe_test(Xdb_1, ydb_1, dtc_pipe)

print("\nTesting DB 2")
pipe_test(Xdb_2, ydb_2, dtc_pipe)

print("\nTesting DB 2")
pipe_test(Xdb_3, ydb_3, dtc_pipe)


Testing UCI 1
1.0
0.606060606061

Testing UCI 2
1.0
0.572727272727

Testing UCI 3
1.0
0.590909090909

Testing DB 1
1.0
0.622895622896

Testing DB 2
1.0
0.659519168291

Testing DB 2
1.0
0.654104979812


In [60]:
print("\nTesting UCI 1")
pipe_test(Xuci_1, yuci_1, lr_pipe)

print("\nTesting UCI 2")
pipe_test(Xuci_2, yuci_2, lr_pipe)

print("\nTesting UCI 3")
pipe_test(Xuci_3, yuci_3, lr_pipe)

print("\nTesting DB 1")
pipe_test(Xdb_1, ydb_1, lr_pipe)

print("\nTesting DB 2")
pipe_test(Xdb_2, ydb_2, lr_pipe)

print("\nTesting DB 2")
pipe_test(Xdb_3, ydb_3, lr_pipe)


Testing UCI 1
0.690909090909
0.533333333333

Testing UCI 2
0.690909090909
0.59696969697

Testing UCI 3
0.581818181818
0.578787878788

Testing DB 1
0.608080808081
0.57037037037

Testing DB 2
0.620622568093
0.585445094217

Testing DB 2
0.622983870968
0.609017496635


In [61]:
print("\nTesting UCI 1")
pipe_test(Xuci_1, yuci_1, knn_pipe)

print("\nTesting UCI 2")
pipe_test(Xuci_2, yuci_2, knn_pipe)

print("\nTesting UCI 3")
pipe_test(Xuci_3, yuci_3, knn_pipe)

print("\nTesting DB 1")
pipe_test(Xdb_1, ydb_1, knn_pipe)

print("\nTesting DB 2")
pipe_test(Xdb_2, ydb_2, knn_pipe)

print("\nTesting DB 2")
pipe_test(Xdb_3, ydb_3, knn_pipe)


Testing UCI 1
0.8
0.590909090909

Testing UCI 2
0.809090909091
0.630303030303

Testing UCI 3
0.809090909091
0.672727272727

Testing DB 1
0.832323232323
0.713131313131

Testing DB 2
0.84046692607
0.689408706953

Testing DB 2
0.822580645161
0.728129205922


In [62]:
print("\nTesting UCI 1")
pipe_test(Xuci_1, yuci_1, rfc_pipe)

print("\nTesting UCI 2")
pipe_test(Xuci_2, yuci_2, rfc_pipe)

print("\nTesting UCI 3")
pipe_test(Xuci_3, yuci_3, rfc_pipe)

print("\nTesting DB 1")
pipe_test(Xdb_1, ydb_1, rfc_pipe)

print("\nTesting DB 2")
pipe_test(Xdb_2, ydb_2, rfc_pipe)

print("\nTesting DB 2")
pipe_test(Xdb_3, ydb_3, rfc_pipe)


Testing UCI 1
0.990909090909
0.587878787879

Testing UCI 2
0.990909090909
0.557575757576

Testing UCI 3
0.981818181818
0.60303030303

Testing DB 1
0.991919191919
0.653872053872

Testing DB 2
0.994163424125
0.641325536062

Testing DB 2
0.985887096774
0.693135935397


### Almost all naive testing results in test scores ranging from ~0.55 - 0.68

In [94]:
dtc_params = {'rfe__k': [5, 10, 15, 20],
             'pca__n_components': [1, 2, 3, 4, 5],
             'classifier__max_depth': [1, 3, 5, 10, 15, None],
             'classifier__splitter': ['random', 'best']}

lr_params = {'rfe__k': [5, 10, 15, 20],
             'pca__n_components': [1, 2, 3, 4, 5],
             'classifier__penalty': ['l1', 'l2'],
             'classifier__max_iter': [100, 500],
             'classifier__C': np.logspace(-3,3,7)}

knn_params = {'rfe__k': [5, 10, 15, 20],
             'pca__n_components': [1, 2, 3, 4, 5],
             'classifier__n_neighbors': [1, 5, 9, 15, 25]}

rfc_params = {'rfe__k': [5, 10, 15, 20],
             'pca__n_components': [1, 2, 3, 4, 5],
             'classifier__n_estimators': [10, 50, 100, 200, 500],
             'classifier__max_depth': [1, 5, None]}

svc_params = {'rfe__k': [5, 10, 15, 20],
             'pca__n_components': [1, 2, 3, 4, 5],
              'classifier__C': np.logspace(-3,3,7)}




In [95]:
def gridsearch_pipe(X, y, pipeline, params, cv=5):
    X_test, X_train, y_test, y_train = train_test_split(X, y, test_size = 0.25, random_state=42)
    
    gs = GridSearchCV(pipeline, params, cv=cv, n_jobs=-1)
    gs.fit(X_train, y_train)
    
    results = {'best_params': gs.best_params_,
               'cv_score': gs.best_score_,
              'train_score': gs.score(X_train, y_train),
              'test_score': gs.score(X_test, y_test),
              'estimator': gs.best_estimator_.named_steps['classifier']}

    return results

In [96]:
def test_all_pipes(X, y):
    scores = []
    
    for pipe, param in tqdm([(dtc_pipe, dtc_params), (lr_pipe, lr_params), 
                        (knn_pipe, knn_params), (rfc_pipe, rfc_params),
                            (svc_pipe, svc_params)]):
        scores.append(gridsearch_pipe(X, y, pipe, param))
    
    scores_df = pd.DataFrame(scores)
    return scores_df
        

In [97]:
uci1_gs_results = test_all_pipes(Xuci_1, yuci_1)
uci2_gs_results = test_all_pipes(Xuci_2, yuci_2)
uci3_gs_results = test_all_pipes(Xuci_3, yuci_3)
db1_gs_results = test_all_pipes(Xdb_1, ydb_1)
db2_gs_results = test_all_pipes(Xdb_2, ydb_2)
db3_gs_results = test_all_pipes(Xdb_3, ydb_3)

100%|██████████| 5/5 [02:11<00:00, 26.39s/it]
100%|██████████| 5/5 [02:09<00:00, 25.90s/it]
100%|██████████| 5/5 [02:21<00:00, 28.35s/it]
100%|██████████| 5/5 [02:33<00:00, 30.70s/it]
100%|██████████| 5/5 [02:33<00:00, 30.61s/it]
100%|██████████| 5/5 [02:32<00:00, 30.50s/it]


In [98]:
uci1_gs_results

Unnamed: 0,best_params,cv_score,estimator,test_score,train_score
0,"{'classifier__max_depth': 3, 'classifier__spli...",0.727273,"DecisionTreeClassifier(class_weight=None, crit...",0.630303,0.881818
1,"{'classifier__C': 0.001, 'classifier__max_iter...",0.681818,"LogisticRegression(C=0.001, class_weight=None,...",0.554545,0.663636
2,"{'classifier__n_neighbors': 5, 'pca__n_compone...",0.809091,"KNeighborsClassifier(algorithm='auto', leaf_si...",0.639394,0.845455
3,"{'classifier__max_depth': None, 'classifier__n...",0.8,"(DecisionTreeClassifier(class_weight=None, cri...",0.6,0.963636
4,"{'classifier__C': 1.0, 'pca__n_components': 5,...",0.818182,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.727273,0.963636


In [99]:
uci2_gs_results

Unnamed: 0,best_params,cv_score,estimator,test_score,train_score
0,"{'classifier__max_depth': 10, 'classifier__spl...",0.709091,"DecisionTreeClassifier(class_weight=None, crit...",0.612121,0.918182
1,"{'classifier__C': 0.1, 'classifier__max_iter':...",0.681818,"LogisticRegression(C=0.10000000000000001, clas...",0.609091,0.681818
2,"{'classifier__n_neighbors': 1, 'pca__n_compone...",0.772727,"KNeighborsClassifier(algorithm='auto', leaf_si...",0.69697,1.0
3,"{'classifier__max_depth': 5, 'classifier__n_es...",0.727273,"(DecisionTreeClassifier(class_weight=None, cri...",0.60303,0.9
4,"{'classifier__C': 100.0, 'pca__n_components': ...",0.781818,"SVC(C=100.0, cache_size=200, class_weight=None...",0.690909,1.0


In [100]:
uci3_gs_results

Unnamed: 0,best_params,cv_score,estimator,test_score,train_score
0,"{'classifier__max_depth': 15, 'classifier__spl...",0.681818,"DecisionTreeClassifier(class_weight=None, crit...",0.654545,1.0
1,"{'classifier__C': 0.1, 'classifier__max_iter':...",0.536364,"LogisticRegression(C=0.10000000000000001, clas...",0.575758,0.627273
2,"{'classifier__n_neighbors': 1, 'pca__n_compone...",0.745455,"KNeighborsClassifier(algorithm='auto', leaf_si...",0.715152,1.0
3,"{'classifier__max_depth': None, 'classifier__n...",0.690909,"(DecisionTreeClassifier(class_weight=None, cri...",0.645455,1.0
4,"{'classifier__C': 1.0, 'pca__n_components': 3,...",0.727273,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.654545,0.9


In [101]:
db1_gs_results

Unnamed: 0,best_params,cv_score,estimator,test_score,train_score
0,"{'classifier__max_depth': None, 'classifier__s...",0.674747,"DecisionTreeClassifier(class_weight=None, crit...",0.657239,1.0
1,"{'classifier__C': 0.1, 'classifier__max_iter':...",0.610101,"LogisticRegression(C=0.10000000000000001, clas...",0.571044,0.587879
2,"{'classifier__n_neighbors': 15, 'pca__n_compon...",0.729293,"KNeighborsClassifier(algorithm='auto', leaf_si...",0.707744,0.771717
3,"{'classifier__max_depth': None, 'classifier__n...",0.723232,"(DecisionTreeClassifier(class_weight=None, cri...",0.720539,1.0
4,"{'classifier__C': 1.0, 'pca__n_components': 5,...",0.731313,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.734007,0.852525


In [102]:
db2_gs_results

Unnamed: 0,best_params,cv_score,estimator,test_score,train_score
0,"{'classifier__max_depth': None, 'classifier__s...",0.675097,"DecisionTreeClassifier(class_weight=None, crit...",0.593242,1.0
1,"{'classifier__C': 0.01, 'classifier__max_iter'...",0.620623,"LogisticRegression(C=0.01, class_weight=None, ...",0.591293,0.620623
2,"{'classifier__n_neighbors': 1, 'pca__n_compone...",0.717899,"KNeighborsClassifier(algorithm='auto', leaf_si...",0.690058,1.0
3,"{'classifier__max_depth': None, 'classifier__n...",0.727626,"(DecisionTreeClassifier(class_weight=None, cri...",0.699805,1.0
4,"{'classifier__C': 10.0, 'pca__n_components': 5...",0.723735,"SVC(C=10.0, cache_size=200, class_weight=None,...",0.689409,0.970817


In [103]:
db3_gs_results

Unnamed: 0,best_params,cv_score,estimator,test_score,train_score
0,"{'classifier__max_depth': 10, 'classifier__spl...",0.683468,"DecisionTreeClassifier(class_weight=None, crit...",0.670929,0.90121
1,"{'classifier__C': 0.01, 'classifier__max_iter'...",0.629032,"LogisticRegression(C=0.01, class_weight=None, ...",0.604307,0.608871
2,"{'classifier__n_neighbors': 5, 'pca__n_compone...",0.72379,"KNeighborsClassifier(algorithm='auto', leaf_si...",0.720727,0.814516
3,"{'classifier__max_depth': None, 'classifier__n...",0.743952,"(DecisionTreeClassifier(class_weight=None, cri...",0.696501,1.0
4,"{'classifier__C': 1.0, 'pca__n_components': 5,...",0.745968,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.725437,0.854839


Trying without the PCAs

In [104]:
dtc_pipe = Pipeline([('scaler', StandardScaler()),
                     ('rfe', SelectKBest()),
                     ('classifier', DecisionTreeClassifier())])

lr_pipe = Pipeline([('scaler', StandardScaler()),
                     ('rfe', SelectKBest()),
                     ('classifier', LogisticRegression())])

knn_pipe = Pipeline([('scaler', StandardScaler()),
                     ('rfe', SelectKBest()),
                     ('classifier', KNeighborsClassifier())])

rfc_pipe = Pipeline([('scaler', StandardScaler()),
                     ('rfe', SelectKBest()),
                     ('classifier', RandomForestClassifier())])

svc_pipe = Pipeline([('scaler', StandardScaler()),
                     ('rfe', SelectKBest()),
                     ('classifier', SVC())])

In [105]:
dtc_params = {'rfe__k': [5, 10, 15, 20],
             'classifier__max_depth': [1, 3, 5, 10, 15, None],
             'classifier__splitter': ['random', 'best']}

lr_params = {'rfe__k': [5, 10, 15, 20],
             'classifier__penalty': ['l1', 'l2'],
             'classifier__max_iter': [100, 500],
             'classifier__C': np.logspace(-3,3,7)}

knn_params = {'rfe__k': [5, 10, 15, 20],
             'classifier__n_neighbors': [1, 5, 9, 15, 25]}

rfc_params = {'rfe__k': [5, 10, 15, 20],
             'classifier__n_estimators': [10, 50, 100, 200, 500],
             'classifier__max_depth': [1, 5, None]}

svc_params = {'rfe__k': [5, 10, 15, 20],
              'classifier__C': np.logspace(-3,3,7)}

In [106]:
uci1_gs_results = test_all_pipes(Xuci_1, yuci_1)
uci2_gs_results = test_all_pipes(Xuci_2, yuci_2)
uci3_gs_results = test_all_pipes(Xuci_3, yuci_3)
db1_gs_results = test_all_pipes(Xdb_1, ydb_1)
db2_gs_results = test_all_pipes(Xdb_2, ydb_2)
db3_gs_results = test_all_pipes(Xdb_3, ydb_3)

100%|██████████| 5/5 [00:27<00:00,  5.43s/it]
100%|██████████| 5/5 [00:26<00:00,  5.32s/it]
100%|██████████| 5/5 [00:27<00:00,  5.48s/it]
100%|██████████| 5/5 [00:34<00:00,  6.85s/it]
100%|██████████| 5/5 [00:34<00:00,  6.85s/it]
100%|██████████| 5/5 [00:33<00:00,  6.60s/it]


In [107]:
uci1_gs_results

Unnamed: 0,best_params,cv_score,estimator,test_score,train_score
0,"{'classifier__max_depth': 15, 'classifier__spl...",0.672727,"DecisionTreeClassifier(class_weight=None, crit...",0.669697,1.0
1,"{'classifier__C': 1.0, 'classifier__max_iter':...",0.690909,"LogisticRegression(C=1.0, class_weight=None, d...",0.548485,0.690909
2,"{'classifier__n_neighbors': 5, 'rfe__k': 20}",0.790909,"KNeighborsClassifier(algorithm='auto', leaf_si...",0.678788,0.881818
3,"{'classifier__max_depth': 5, 'classifier__n_es...",0.745455,"(DecisionTreeClassifier(class_weight=None, cri...",0.645455,1.0
4,"{'classifier__C': 1.0, 'rfe__k': 20}",0.809091,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.681818,0.9


In [108]:
uci2_gs_results

Unnamed: 0,best_params,cv_score,estimator,test_score,train_score
0,"{'classifier__max_depth': 5, 'classifier__spli...",0.681818,"DecisionTreeClassifier(class_weight=None, crit...",0.6,0.909091
1,"{'classifier__C': 1.0, 'classifier__max_iter':...",0.690909,"LogisticRegression(C=1.0, class_weight=None, d...",0.60303,0.690909
2,"{'classifier__n_neighbors': 1, 'rfe__k': 20}",0.763636,"KNeighborsClassifier(algorithm='auto', leaf_si...",0.693939,1.0
3,"{'classifier__max_depth': 5, 'classifier__n_es...",0.736364,"(DecisionTreeClassifier(class_weight=None, cri...",0.633333,0.954545
4,"{'classifier__C': 10.0, 'rfe__k': 20}",0.772727,"SVC(C=10.0, cache_size=200, class_weight=None,...",0.709091,0.954545


In [109]:
uci3_gs_results

Unnamed: 0,best_params,cv_score,estimator,test_score,train_score
0,"{'classifier__max_depth': 10, 'classifier__spl...",0.627273,"DecisionTreeClassifier(class_weight=None, crit...",0.609091,1.0
1,"{'classifier__C': 10.0, 'classifier__max_iter'...",0.527273,"LogisticRegression(C=10.0, class_weight=None, ...",0.548485,0.645455
2,"{'classifier__n_neighbors': 1, 'rfe__k': 15}",0.727273,"KNeighborsClassifier(algorithm='auto', leaf_si...",0.715152,1.0
3,"{'classifier__max_depth': None, 'classifier__n...",0.618182,"(DecisionTreeClassifier(class_weight=None, cri...",0.645455,1.0
4,"{'classifier__C': 10.0, 'rfe__k': 20}",0.663636,"SVC(C=10.0, cache_size=200, class_weight=None,...",0.706061,0.972727


In [110]:
db1_gs_results

Unnamed: 0,best_params,cv_score,estimator,test_score,train_score
0,"{'classifier__max_depth': 10, 'classifier__spl...",0.676768,"DecisionTreeClassifier(class_weight=None, crit...",0.651178,0.993939
1,"{'classifier__C': 0.01, 'classifier__max_iter'...",0.606061,"LogisticRegression(C=0.01, class_weight=None, ...",0.574411,0.60404
2,"{'classifier__n_neighbors': 15, 'rfe__k': 20}",0.729293,"KNeighborsClassifier(algorithm='auto', leaf_si...",0.707744,0.771717
3,"{'classifier__max_depth': None, 'classifier__n...",0.749495,"(DecisionTreeClassifier(class_weight=None, cri...",0.719192,1.0
4,"{'classifier__C': 1.0, 'rfe__k': 20}",0.727273,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.724579,0.783838


In [111]:
db2_gs_results

Unnamed: 0,best_params,cv_score,estimator,test_score,train_score
0,"{'classifier__max_depth': 5, 'classifier__spli...",0.673152,"DecisionTreeClassifier(class_weight=None, crit...",0.602989,0.762646
1,"{'classifier__C': 0.01, 'classifier__max_iter'...",0.620623,"LogisticRegression(C=0.01, class_weight=None, ...",0.591293,0.620623
2,"{'classifier__n_neighbors': 1, 'rfe__k': 15}",0.717899,"KNeighborsClassifier(algorithm='auto', leaf_si...",0.690058,1.0
3,"{'classifier__max_depth': None, 'classifier__n...",0.747082,"(DecisionTreeClassifier(class_weight=None, cri...",0.706953,1.0
4,"{'classifier__C': 1.0, 'rfe__k': 20}",0.719844,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.692658,0.764591


In [112]:
db3_gs_results

Unnamed: 0,best_params,cv_score,estimator,test_score,train_score
0,"{'classifier__max_depth': 5, 'classifier__spli...",0.683468,"DecisionTreeClassifier(class_weight=None, crit...",0.670929,0.806452
1,"{'classifier__C': 0.01, 'classifier__max_iter'...",0.618952,"LogisticRegression(C=0.01, class_weight=None, ...",0.606999,0.610887
2,"{'classifier__n_neighbors': 5, 'rfe__k': 20}",0.72379,"KNeighborsClassifier(algorithm='auto', leaf_si...",0.720727,0.814516
3,"{'classifier__max_depth': 5, 'classifier__n_es...",0.729839,"(DecisionTreeClassifier(class_weight=None, cri...",0.697174,0.881048
4,"{'classifier__C': 10.0, 'rfe__k': 15}",0.739919,"SVC(C=10.0, cache_size=200, class_weight=None,...",0.720054,0.846774


### Time for some brute force

In [113]:
import itertools

In [159]:
def brute_test(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 42)
    
#     X_train = pd.DataFrame(X_train, columns=X.columns)
#     X_test = pd.DataFrame(X_test, columns=X.columns)
    
#     skb = SelectKBest(k=10)
#     skb.fit(X_train, y_train)
    
#     X_k_train = pd.DataFrame(skb.transform(X_train), columns=X.columns[skb.get_support()])
#     X_k_test = pd.DataFrame(skb.transform(X_test), columns=X.columns[skb.get_support()])
    X_k_train = X_train
    X_k_test = X_test


    results = []
    
    for cols in tqdm(itertools.combinations(X_k_train, 5)):
        
        X_tr_tmp = X_k_train[list(cols)]
        X_te_tmp = X_k_test[list(cols)]
        
        model = RandomForestClassifier(n_estimators=10, n_jobs=-1)
        model.fit(X_tr_tmp, y_train)
        
        metrics = {'features': cols,
                  'train_score': model.score(X_tr_tmp, y_train),
                  'test_score': model.score(X_te_tmp, y_test)}
        
        results.append(metrics)
        
    return pd.DataFrame(results)

In [161]:
Xuci_1_df = brute_test(Xuci_1, yuci_1)
Xuci_2_df = brute_test(Xuci_2, yuci_2)
Xuci_3_df = brute_test(Xuci_3, yuci_3)
Xdb_1_df = brute_test(Xdb_1, ydb_1)
Xdb_2_df = brute_test(Xdb_2, ydb_2)
Xdb_3_df = brute_test(Xdb_3, ydb_3)


0it [00:00, ?it/s][A
1it [00:00,  3.09it/s][A
2it [00:00,  3.06it/s][A
3it [00:00,  3.04it/s][A
4it [00:01,  3.04it/s][A
5it [00:01,  3.04it/s][A
6it [00:01,  3.04it/s][A
7it [00:02,  3.03it/s][A
8it [00:02,  3.03it/s][A
9it [00:02,  3.03it/s][A
10it [00:03,  3.03it/s][A
11it [00:03,  3.03it/s][A
12it [00:03,  3.03it/s][A
13it [00:04,  3.03it/s][A
14it [00:04,  3.03it/s][A
15it [00:04,  3.03it/s][A
16it [00:05,  3.03it/s][A
17it [00:05,  3.03it/s][A
18it [00:05,  3.03it/s][A
19it [00:06,  3.03it/s][A
20it [00:06,  3.03it/s][A
21it [00:06,  3.03it/s][A
22it [00:07,  3.03it/s][A
Exception in thread Thread-69335:
Traceback (most recent call last):
  File "/opt/conda/lib/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "/opt/conda/lib/python3.6/site-packages/tqdm/_tqdm.py", line 144, in run
    for instance in self.tqdm_cls._instances:
  File "/opt/conda/lib/python3.6/_weakrefset.py", line 60, in __iter__
    for itemref in self.data:
Run

KeyboardInterrupt: 

In [147]:
Xuci_1_df.sort_values('test_score', ascending=False).head()

Unnamed: 0,features,test_score,train_score
13,"(48, 64, 153, 336, 453)",0.7,1.0
101,"(48, 241, 336, 378, 433)",0.690909,1.0
146,"(64, 153, 281, 378, 453)",0.690909,1.0
217,"(153, 281, 336, 378, 453)",0.690909,1.0
114,"(48, 281, 336, 433, 453)",0.690909,1.0


In [148]:
Xuci_2_df.sort_values('test_score', ascending=False).head()

Unnamed: 0,features,test_score,train_score
160,"(128, 241, 472, 475, 493)",0.709091,1.0
142,"(128, 241, 338, 442, 472)",0.7,1.0
55,"(64, 128, 472, 475, 493)",0.690909,1.0
184,"(128, 338, 442, 472, 475)",0.690909,1.0
155,"(128, 241, 442, 472, 493)",0.690909,1.0


In [149]:
Xuci_3_df.sort_values('test_score', ascending=False).head()

Unnamed: 0,features,test_score,train_score
168,"(64, 281, 318, 453, 475)",0.672727,1.0
137,"(64, 241, 281, 451, 493)",0.663636,1.0
121,"(28, 336, 451, 453, 475)",0.663636,1.0
33,"(28, 64, 281, 453, 475)",0.663636,1.0
54,"(28, 64, 451, 475, 493)",0.663636,1.0


In [150]:
Xdb_1_df.sort_values('test_score', ascending=False).head()

Unnamed: 0,features,test_score,train_score
203,"(feat_341, feat_681, feat_701, feat_808, feat_...",0.779798,1.0
52,"(feat_269, feat_336, feat_769, feat_808, feat_...",0.771717,1.0
107,"(feat_269, feat_681, feat_769, feat_808, feat_...",0.771717,1.0
104,"(feat_269, feat_681, feat_736, feat_808, feat_...",0.771717,1.0
68,"(feat_269, feat_341, feat_681, feat_808, feat_...",0.771717,1.0


In [151]:
Xdb_2_df.sort_values('test_score', ascending=False).head()

Unnamed: 0,features,test_score,train_score
65,"(feat_269, feat_341, feat_681, feat_769, feat_...",0.741245,1.0
121,"(feat_269, feat_736, feat_769, feat_808, feat_...",0.737354,1.0
69,"(feat_269, feat_341, feat_681, feat_808, feat_...",0.731518,1.0
242,"(feat_681, feat_736, feat_769, feat_808, feat_...",0.729572,1.0
50,"(feat_269, feat_336, feat_736, feat_808, feat_...",0.727626,1.0


In [152]:
Xdb_3_df.sort_values('test_score', ascending=False).head()

Unnamed: 0,features,test_score,train_score
198,"(feat_336, feat_341, feat_681, feat_701, feat_...",0.772177,1.0
146,"(feat_315, feat_336, feat_681, feat_769, feat_...",0.770161,1.0
218,"(feat_336, feat_681, feat_701, feat_769, feat_...",0.766129,1.0
77,"(feat_269, feat_336, feat_681, feat_769, feat_...",0.762097,1.0
118,"(feat_269, feat_681, feat_769, feat_808, feat_...",0.760081,1.0
