In [13]:
import pandas as pd
import numpy as np

from sklearn.compose import ColumnTransformer
from sklearn.pipeline import  Pipeline
from sklearn.model_selection import train_test_split, KFold, GridSearchCV, ParameterGrid, cross_val_score
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import f1_score,  precision_score, recall_score
from sklearn.dummy import DummyClassifier

from skmultilearn.problem_transform import LabelPowerset, ClassifierChain

from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

In [14]:
features = pd.read_csv('../data/features_plus_descriptions.csv')
features.set_index('Feature Type and Number', inplace=True)
features.drop(['S5', 'D21'], axis=0, inplace=True)

In [15]:
target = ['H0_Best','H1_Best', 'H2_Best', 'H3_Best', 'H4_Best', 'H5_Best']

df = pd.read_csv('../data/multiclass_LP_transformed_raw_data.csv')
X = df.loc[:, features.index]
y = df[target]
y[:14]

Unnamed: 0,H0_Best,H1_Best,H2_Best,H3_Best,H4_Best,H5_Best
0,1,0,0,0,0,0
1,0,1,1,0,1,1
2,1,0,0,0,0,0
3,1,0,0,0,0,0
4,1,0,0,0,0,0
5,1,0,0,0,0,0
6,1,0,0,0,0,0
7,1,0,0,0,0,0
8,1,0,0,0,0,0
9,1,0,0,0,0,0


In [16]:
print(len(features.index))

51


In [17]:
types = dict.fromkeys(features.index, 'stdsc')
mm_feats = ['S1','S3', 'S4', 'S6', 'S8','S11', 'S12', 'D3', 'D39']

for feat in mm_feats:
    types[feat] = 'minmax'

minmax_feats = []
std_feats = []

for feat in types.keys():
    if types[feat] == 'stdsc':
        std_feats.append(feat)
    elif types[feat] == 'minmax':
        minmax_feats.append(feat)

print('MinMax Scaler Features: ', minmax_feats)
print('Standard Scaler Features: ', std_feats)


preprocessor = ColumnTransformer(
    transformers=[
        ('mm_scaler', MinMaxScaler(), minmax_feats),
        ('std_scaler', StandardScaler(), std_feats)])

MinMax Scaler Features:  ['S1', 'S3', 'S4', 'S6', 'S8', 'S11', 'S12', 'D3', 'D39']
Standard Scaler Features:  ['S2', 'S7', 'S9', 'S10', 'S13', 'S14', 'D1', 'D2', 'D4', 'D5', 'D6', 'D7', 'D8', 'D9', 'D10', 'D11', 'D12', 'D13', 'D14', 'D15', 'D16', 'D17', 'D18', 'D19', 'D20', 'D22', 'D23', 'D24', 'D25', 'D26', 'D27', 'D28', 'D29', 'D30', 'D31', 'D32', 'D33', 'D34', 'D35', 'D36', 'D37', 'D38']


In [12]:
def ML_pipeline_KFold_LP(X, y, preprocessor, param_grid, label_powerset, verbose=5):
    prec_scores = []
    rec_scores = []
    f1_scores = []
    best_models = []
    ys = []
    for i in range(10):
        random_state = 431 * i
        X_other, X_test, y_other, y_test = train_test_split(X, y, test_size = 0.2, random_state=random_state)
        kf = KFold(n_splits=4, shuffle=True, random_state=random_state)
        clf = Pipeline([
        ('preprocessor', preprocessor), 
        ('clf', label_powerset)
        ])
        grid = GridSearchCV(clf,  param_grid, scoring='f1_macro', cv=kf, return_train_score=True, verbose=verbose, n_jobs=-1)   
        grid.fit(X_other, y_other)
        y_pred = grid.predict(X_test)
        prec = precision_score(y_test, y_pred, average="macro")
        rec = recall_score(y_test, y_pred, average="macro")
        f1 = f1_score(y_test, y_pred, average="macro")
        print("best estimator", grid.best_params_, "precision score", prec, "recall score", rec, "f1 score", f1)
        best_models.append(grid.best_params_)
        prec_scores.append(prec)
        rec_scores.append(rec)
        f1_scores.append(f1)
        ys.append((label_powerset.transform(y_test), label_powerset.transform(y_pred), label_powerset.reverse_combinations_, label_powerset.unique_combinations_))
    test_scores = [prec_scores, rec_scores, f1_scores]
    return test_scores, best_models, ys

In [38]:
param_grid = {  'clf': [DummyClassifier()],
                'clf__strategy': ["stratified", "most_frequent", "prior", "uniform", "constant"]}

scores_LP_dummy, models_LP_dummy, xx = ML_pipeline_KFold_LP(X, y, preprocessor, param_grid, LabelPowerset())

Fitting 4 folds for each of 5 candidates, totalling 20 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.9s
[Parallel(n_jobs=-1)]: Done  10 out of  20 | elapsed:    1.0s remaining:    1.0s
[Parallel(n_jobs=-1)]: Done  15 out of  20 | elapsed:    1.1s remaining:    0.4s
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    1.1s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    1.1s finished


best estimator {'clf': DummyClassifier(strategy='uniform'), 'clf__strategy': 'uniform'} precision score 0.2634201955086679 recall score 0.49893043988196356 f1 score 0.3367361922725059
Fitting 4 folds for each of 5 candidates, totalling 20 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done  10 out of  20 | elapsed:    0.1s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done  15 out of  20 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.1s finished


best estimator {'clf': DummyClassifier(strategy='uniform'), 'clf__strategy': 'uniform'} precision score 0.26177685044361554 recall score 0.48332614061494916 f1 score 0.33551420992135533
Fitting 4 folds for each of 5 candidates, totalling 20 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done  10 out of  20 | elapsed:    0.1s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done  15 out of  20 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.1s finished


best estimator {'clf': DummyClassifier(strategy='uniform'), 'clf__strategy': 'uniform'} precision score 0.263790476953163 recall score 0.5077632403482509 f1 score 0.3407032411181768
Fitting 4 folds for each of 5 candidates, totalling 20 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done  10 out of  20 | elapsed:    0.1s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done  15 out of  20 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.1s finished


best estimator {'clf': DummyClassifier(strategy='uniform'), 'clf__strategy': 'uniform'} precision score 0.26253635581871826 recall score 0.5043401816683535 f1 score 0.3391278692412711
Fitting 4 folds for each of 5 candidates, totalling 20 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done  10 out of  20 | elapsed:    0.1s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done  15 out of  20 | elapsed:    0.2s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.2s finished


best estimator {'clf': DummyClassifier(strategy='uniform'), 'clf__strategy': 'uniform'} precision score 0.27312442978548446 recall score 0.5103229441750087 f1 score 0.34921553993023363
Fitting 4 folds for each of 5 candidates, totalling 20 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done  10 out of  20 | elapsed:    0.1s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done  15 out of  20 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.1s finished


best estimator {'clf': DummyClassifier(strategy='uniform'), 'clf__strategy': 'uniform'} precision score 0.2630864840211404 recall score 0.5016672737071005 f1 score 0.33921749423797093
Fitting 4 folds for each of 5 candidates, totalling 20 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done  10 out of  20 | elapsed:    0.1s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done  15 out of  20 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.1s finished


best estimator {'clf': DummyClassifier(strategy='uniform'), 'clf__strategy': 'uniform'} precision score 0.2656011339109125 recall score 0.5024977580324437 f1 score 0.340550796871949
Fitting 4 folds for each of 5 candidates, totalling 20 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done  10 out of  20 | elapsed:    0.1s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done  15 out of  20 | elapsed:    0.2s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.2s finished


best estimator {'clf': DummyClassifier(strategy='uniform'), 'clf__strategy': 'uniform'} precision score 0.2742457769109228 recall score 0.49752505673954667 f1 score 0.3476578747872765
Fitting 4 folds for each of 5 candidates, totalling 20 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done  10 out of  20 | elapsed:    0.1s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done  15 out of  20 | elapsed:    0.2s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.2s finished


best estimator {'clf': DummyClassifier(strategy='uniform'), 'clf__strategy': 'uniform'} precision score 0.2732857142691926 recall score 0.5009334266880511 f1 score 0.34840574233649724
Fitting 4 folds for each of 5 candidates, totalling 20 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done  10 out of  20 | elapsed:    0.1s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done  15 out of  20 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.1s finished


best estimator {'clf': DummyClassifier(strategy='uniform'), 'clf__strategy': 'uniform'} precision score 0.2589097964804005 recall score 0.4964716562364111 f1 score 0.33485364382470917


## Random Forest Classifier

In [39]:
param_grid = {  'clf': [RandomForestClassifier()],
                'clf__max_depth': [15, 20, 25, 30, 35, 40], # 5, 7, 
                'clf__max_features': [15, 20, 25, 30, 35, 40, 45, 51]} # 4, 8, 

scores_LP_RF, models_LP_RF, ys = ML_pipeline_KFold_LP(X, y, preprocessor, param_grid, LabelPowerset(), verbose=5)


Fitting 4 folds for each of 48 candidates, totalling 192 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    3.7s
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:   50.8s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:  2.3min
[Parallel(n_jobs=-1)]: Done 192 out of 192 | elapsed:  3.1min finished


best estimator {'clf': RandomForestClassifier(max_depth=25, max_features=51), 'clf__max_depth': 25, 'clf__max_features': 51} precision score 0.7409578923490715 recall score 0.5992742421523504 f1 score 0.6606785525717488
Fitting 4 folds for each of 48 candidates, totalling 192 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    3.7s
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:   51.5s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:  2.3min
[Parallel(n_jobs=-1)]: Done 192 out of 192 | elapsed:  3.1min finished


best estimator {'clf': RandomForestClassifier(max_depth=35, max_features=35), 'clf__max_depth': 35, 'clf__max_features': 35} precision score 0.7704478527629613 recall score 0.5924918681042989 f1 score 0.6675486089300251
Fitting 4 folds for each of 48 candidates, totalling 192 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    3.7s
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:   53.8s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:  2.4min
[Parallel(n_jobs=-1)]: Done 192 out of 192 | elapsed:  3.2min finished


best estimator {'clf': RandomForestClassifier(max_depth=40, max_features=40), 'clf__max_depth': 40, 'clf__max_features': 40} precision score 0.746864875577428 recall score 0.5891012458623174 f1 score 0.6573480007320912
Fitting 4 folds for each of 48 candidates, totalling 192 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    4.0s
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:   54.6s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:  2.4min
[Parallel(n_jobs=-1)]: Done 192 out of 192 | elapsed:  3.2min finished


best estimator {'clf': RandomForestClassifier(max_depth=25, max_features=35), 'clf__max_depth': 25, 'clf__max_features': 35} precision score 0.7289148630401011 recall score 0.5783718761756497 f1 score 0.6437175885996266
Fitting 4 folds for each of 48 candidates, totalling 192 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    3.8s
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:   52.6s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:  2.3min
[Parallel(n_jobs=-1)]: Done 192 out of 192 | elapsed:  3.2min finished


best estimator {'clf': RandomForestClassifier(max_depth=25, max_features=15), 'clf__max_depth': 25, 'clf__max_features': 15} precision score 0.7491813797290602 recall score 0.599244597215084 f1 score 0.6647912179598328
Fitting 4 folds for each of 48 candidates, totalling 192 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    3.8s
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:   52.1s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:  2.3min
[Parallel(n_jobs=-1)]: Done 192 out of 192 | elapsed:  3.2min finished


best estimator {'clf': RandomForestClassifier(max_depth=35, max_features=51), 'clf__max_depth': 35, 'clf__max_features': 51} precision score 0.7436739883511115 recall score 0.5936681605417241 f1 score 0.6593691503763403
Fitting 4 folds for each of 48 candidates, totalling 192 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    3.8s
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:   53.4s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:  2.4min
[Parallel(n_jobs=-1)]: Done 192 out of 192 | elapsed:  3.2min finished


best estimator {'clf': RandomForestClassifier(max_depth=25, max_features=25), 'clf__max_depth': 25, 'clf__max_features': 25} precision score 0.7598547810260672 recall score 0.6012978540299326 f1 score 0.6692242246144025
Fitting 4 folds for each of 48 candidates, totalling 192 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    3.7s
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:   51.3s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:  2.3min
[Parallel(n_jobs=-1)]: Done 192 out of 192 | elapsed:  3.1min finished


best estimator {'clf': RandomForestClassifier(max_depth=20, max_features=35), 'clf__max_depth': 20, 'clf__max_features': 35} precision score 0.7491240822179236 recall score 0.5859880302927093 f1 score 0.6557835010138127
Fitting 4 folds for each of 48 candidates, totalling 192 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    3.6s
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:   50.3s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:  2.3min
[Parallel(n_jobs=-1)]: Done 192 out of 192 | elapsed:  3.1min finished


best estimator {'clf': RandomForestClassifier(max_depth=25, max_features=20), 'clf__max_depth': 25, 'clf__max_features': 20} precision score 0.7568708039253399 recall score 0.6000475710265033 f1 score 0.6683760190392501
Fitting 4 folds for each of 48 candidates, totalling 192 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    3.6s
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:   50.9s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:  2.3min
[Parallel(n_jobs=-1)]: Done 192 out of 192 | elapsed:  3.1min finished


best estimator {'clf': RandomForestClassifier(max_depth=20, max_features=45), 'clf__max_depth': 20, 'clf__max_features': 45} precision score 0.7502400716118086 recall score 0.5836876337780571 f1 score 0.654117559539748


## K Nearest Neighbors Classifier

In [40]:
param_grid ={ 'clf': [KNeighborsClassifier()], 
              'clf__n_neighbors': [5,10,15,20,30,40,50,60],
              'clf__weights': ['distance', 'uniform'] }

scores_LP_KNN, models_LP_KNN, ys_LP_KNN = ML_pipeline_KFold_LP(X, y, preprocessor, param_grid, LabelPowerset(), verbose=5)


Fitting 4 folds for each of 16 candidates, totalling 64 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done  62 out of  64 | elapsed:   10.2s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done  64 out of  64 | elapsed:   10.3s finished


best estimator {'clf': KNeighborsClassifier(weights='distance'), 'clf__n_neighbors': 5, 'clf__weights': 'distance'} precision score 0.6837683718396629 recall score 0.5990183309870312 f1 score 0.6377527823277211
Fitting 4 folds for each of 16 candidates, totalling 64 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done  62 out of  64 | elapsed:   10.1s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done  64 out of  64 | elapsed:   10.2s finished


best estimator {'clf': KNeighborsClassifier(weights='distance'), 'clf__n_neighbors': 5, 'clf__weights': 'distance'} precision score 0.7108053998963872 recall score 0.5991383499554908 f1 score 0.6481764410973766
Fitting 4 folds for each of 16 candidates, totalling 64 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done  62 out of  64 | elapsed:   10.4s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done  64 out of  64 | elapsed:   10.4s finished


best estimator {'clf': KNeighborsClassifier(weights='distance'), 'clf__n_neighbors': 5, 'clf__weights': 'distance'} precision score 0.6879775811570709 recall score 0.6209547407025001 f1 score 0.6518742731520786
Fitting 4 folds for each of 16 candidates, totalling 64 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done  62 out of  64 | elapsed:   10.4s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done  64 out of  64 | elapsed:   10.5s finished


best estimator {'clf': KNeighborsClassifier(weights='distance'), 'clf__n_neighbors': 5, 'clf__weights': 'distance'} precision score 0.6723382492601756 recall score 0.5958014439547328 f1 score 0.6314950157675446
Fitting 4 folds for each of 16 candidates, totalling 64 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done  62 out of  64 | elapsed:   10.4s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done  64 out of  64 | elapsed:   10.5s finished


best estimator {'clf': KNeighborsClassifier(weights='distance'), 'clf__n_neighbors': 5, 'clf__weights': 'distance'} precision score 0.7018835838934923 recall score 0.6102520607841965 f1 score 0.6519558233215222
Fitting 4 folds for each of 16 candidates, totalling 64 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done  62 out of  64 | elapsed:   10.3s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done  64 out of  64 | elapsed:   10.4s finished


best estimator {'clf': KNeighborsClassifier(weights='distance'), 'clf__n_neighbors': 5, 'clf__weights': 'distance'} precision score 0.6789116779025265 recall score 0.6046637711767389 f1 score 0.6383819031181458
Fitting 4 folds for each of 16 candidates, totalling 64 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done  62 out of  64 | elapsed:   10.3s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done  64 out of  64 | elapsed:   10.4s finished


best estimator {'clf': KNeighborsClassifier(weights='distance'), 'clf__n_neighbors': 5, 'clf__weights': 'distance'} precision score 0.7071932015263092 recall score 0.6196120156425385 f1 score 0.6592409065257201
Fitting 4 folds for each of 16 candidates, totalling 64 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done  62 out of  64 | elapsed:   10.7s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done  64 out of  64 | elapsed:   10.7s finished


best estimator {'clf': KNeighborsClassifier(weights='distance'), 'clf__n_neighbors': 5, 'clf__weights': 'distance'} precision score 0.6892854967703195 recall score 0.592806025127644 f1 score 0.6364880718533638
Fitting 4 folds for each of 16 candidates, totalling 64 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done  62 out of  64 | elapsed:   10.3s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done  64 out of  64 | elapsed:   10.4s finished


best estimator {'clf': KNeighborsClassifier(weights='distance'), 'clf__n_neighbors': 5, 'clf__weights': 'distance'} precision score 0.6912060542906682 recall score 0.6221273737124694 f1 score 0.6539230573867166
Fitting 4 folds for each of 16 candidates, totalling 64 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done  62 out of  64 | elapsed:   10.4s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done  64 out of  64 | elapsed:   10.7s finished


best estimator {'clf': KNeighborsClassifier(weights='distance'), 'clf__n_neighbors': 5, 'clf__weights': 'distance'} precision score 0.6758868561110928 recall score 0.5890445095448369 f1 score 0.6282457774557305


## Logistic Regression

In [26]:
param_grid = { 'penalty': ['l2'],
               'C': np.logspace(0, 4, 5),
               'solver': ['newton-cg'] }
param_grid = ParameterGrid(param_grid)
test_scores = np.zeros(10)
final_models = []

for i in range(10):
    random_state = 431 * i 
    print('random state: 431 * {}'.format(i))
    X_other, X_test, y_other, y_test = train_test_split(X, y, test_size = 0.2, random_state=random_state)
    kf = KFold(n_splits=4, shuffle=True, random_state=random_state)
            
    models = []
    train_scores = np.zeros(shape=(4, len(param_grid)))
    val_scores = np.zeros(shape=(4, len(param_grid)))
    
    for p in range(len(param_grid)):
        params = param_grid[p]
        print('working on params:  ', params) 
        clf = LabelPowerset(LogisticRegression(**params,random_state=random_state, max_iter=10000, 
                                            warm_start=True, multi_class='multinomial',  n_jobs=-1))
        j = 0
        for train_index, val_index in kf.split(X_other):
            X_train, X_val = X.iloc[train_index], X.iloc[val_index]
            y_train, y_val = y.iloc[train_index], y.iloc[val_index]
            X_train_prep = preprocessor.fit_transform(X_train)
            X_val_prep = preprocessor.transform(X_val)
            X_test_prep = preprocessor.transform(X_test)
            clf.fit(X_train_prep, y_train)
            
            y_train_pred = clf.predict(X_train_prep)
            prec = precision_score(y_train, y_train_pred, average="macro", zero_division=0)
            rec = recall_score(y_train, y_train_pred, average="macro")
            f1 = f1_score(y_train, y_train_pred, average="macro")
         #   print('train:', i, "precision score", prec, "recall score", rec, "f1 score", f1)
            train_scores[j][p] = f1
            
            y_val_pred = clf.predict(X_val_prep)
            prec_val = precision_score(y_val, y_val_pred, average="macro", zero_division=0)
            rec_val = recall_score(y_val, y_val_pred, average="macro")
            f1_val = f1_score(y_val, y_val_pred, average="macro")
          #  print('validation:', i, "precision score", prec_val, "recall score", rec_val, "f1 score", f1_val)
            val_scores[j][p] = f1_val
            j += 1
        models.append(clf) 
            
        print('mean train f1 score:', np.mean(train_scores[:, p]), '    mean validation f1 score:', np.mean(val_scores[:, p]))
    amax_mean = np.argmax(np.mean(val_scores, axis=0))
    print('The best model parameters were:', param_grid[amax_mean])
    print('The corresponding mean validation score is:',np.max(np.mean(val_scores, axis=0)))
    final_models.append(models[amax_mean])
    y_test_pred = final_models[-1].predict(X_test_prep)
    test_scores[i] = f1_score(y_test, y_test_pred, average="macro")
    print('Test Score: ', test_scores[i])
 


random state: 431 * 0
working on params:   {'solver': 'newton-cg', 'penalty': 'l2', 'C': 1.0}
mean train f1 score: 0.3722376197766682 mean validation f1 score: 0.33712264519633056
working on params:   {'solver': 'newton-cg', 'penalty': 'l2', 'C': 10.0}
mean train f1 score: 0.4006042069559428 mean validation f1 score: 0.3599816193346029
working on params:   {'solver': 'newton-cg', 'penalty': 'l2', 'C': 100.0}
mean train f1 score: 0.4175288492320535 mean validation f1 score: 0.37131775727072924
working on params:   {'solver': 'newton-cg', 'penalty': 'l2', 'C': 1000.0}
mean train f1 score: 0.4285598031052149 mean validation f1 score: 0.37421435098172995
working on params:   {'solver': 'newton-cg', 'penalty': 'l2', 'C': 10000.0}


KeyboardInterrupt: 

## Support Vector Classifier

In [None]:

L = LabelPowerset()

y_svc = L.transform(y)

scores_LP_SVC, models_LP_SVC, ys_LP_SVC = ML_pipeline_KFold_LP(X, y_svc, preprocessor, param_grid, LabelPowerset(), verbose=5)


In [34]:
param_grid = { 'C': np.logspace(1,4,num=4),
               'tol': np.logspace(-5, -2,num=4) }

param_grid = ParameterGrid(param_grid)
test_scores = np.zeros(10)
final_models = []

for i in range(10):
    random_state = 431 * i 
    print('random state: 431 * {}'.format(i))
    X_other, X_test, y_other, y_test = train_test_split(X, y, test_size = 0.2, random_state=random_state)
    kf = KFold(n_splits=4, shuffle=True, random_state=random_state)
            
    models = []
    train_scores = np.zeros(shape=(4, len(param_grid)))
    val_scores = np.zeros(shape=(4, len(param_grid)))
    
    for p in range(len(param_grid)):
        params = param_grid[p]
        print('working on params:  ', params) 
        clf = LabelPowerset(SVC(**params,random_state=random_state, max_iter=100000, cache_size=1000))
        j = 0
        for train_index, val_index in kf.split(X_other):
            X_train, X_val = X.iloc[train_index], X.iloc[val_index]
            y_train, y_val = y.iloc[train_index], y.iloc[val_index]
            X_train_prep = preprocessor.fit_transform(X_train)
            X_val_prep = preprocessor.transform(X_val)
            X_test_prep = preprocessor.transform(X_test)
            clf.fit(X_train_prep, y_train)
            
            y_train_pred = clf.predict(X_train_prep)
            prec = precision_score(y_train, y_train_pred, average="macro", zero_division=0)
            rec = recall_score(y_train, y_train_pred, average="macro")
            f1 = f1_score(y_train, y_train_pred, average="macro")
         #   print('train:', i, "precision score", prec, "recall score", rec, "f1 score", f1)
            train_scores[j][p] = f1
            
            y_val_pred = clf.predict(X_val_prep)
            prec_val = precision_score(y_val, y_val_pred, average="macro", zero_division=0)
            rec_val = recall_score(y_val, y_val_pred, average="macro")
            f1_val = f1_score(y_val, y_val_pred, average="macro")
          #  print('validation:', i, "precision score", prec_val, "recall score", rec_val, "f1 score", f1_val)
            val_scores[j][p] = f1_val
            j += 1
        models.append(clf) 
            
        print('mean train f1 score:', np.mean(train_scores[:, p]), '    mean validation f1 score:', np.mean(val_scores[:, p]))
    amax_mean = np.argmax(np.mean(val_scores, axis=0))
    print('The best model parameters were:', param_grid[amax_mean])
    print('The corresponding mean validation score is:',np.max(np.mean(val_scores, axis=0)))
    final_models.append(models[amax_mean])
    y_test_pred = final_models[-1].predict(X_test_prep)
    test_scores[i] = f1_score(y_test, y_test_pred, average="macro")
    print('Test Score: ', test_scores[i])
 



random state: 431 * 0
working on params:   {'tol': 1e-05, 'C': 10.0}
mean train f1 score: 0.5803715570447965 mean validation f1 score: 0.4535622916690111
working on params:   {'tol': 0.0001, 'C': 10.0}
mean train f1 score: 0.5803715570447965 mean validation f1 score: 0.4535622916690111
working on params:   {'tol': 0.001, 'C': 10.0}
mean train f1 score: 0.5803715570447965 mean validation f1 score: 0.4535622916690111
working on params:   {'tol': 0.01, 'C': 10.0}
mean train f1 score: 0.580227548030398 mean validation f1 score: 0.4534683804972424
working on params:   {'tol': 1e-05, 'C': 39.810717055349734}
mean train f1 score: 0.6807236753677993 mean validation f1 score: 0.5053965695399787
working on params:   {'tol': 0.0001, 'C': 39.810717055349734}
mean train f1 score: 0.6807236753677993 mean validation f1 score: 0.5053965695399787
working on params:   {'tol': 0.001, 'C': 39.810717055349734}
mean train f1 score: 0.6807236753677993 mean validation f1 score: 0.5053788536119304
working on p

KeyboardInterrupt: 

In [36]:
param_grid = { 'C': np.logspace(5,7,num=3),
               'tol': np.logspace(-5, -2,num=4) }

param_grid = ParameterGrid(param_grid)
test_scores = np.zeros(10)
final_models = []

for i in range(10):
    random_state = 431 * i 
    print('random state: 431 * {}'.format(i))
    X_other, X_test, y_other, y_test = train_test_split(X, y, test_size = 0.2, random_state=random_state)
    kf = KFold(n_splits=4, shuffle=True, random_state=random_state)
            
    models = []
    train_scores = np.zeros(shape=(4, len(param_grid)))
    val_scores = np.zeros(shape=(4, len(param_grid)))
    
    for p in range(len(param_grid)):
        params = param_grid[p]
        print('working on params:  ', params) 
        clf = LabelPowerset(SVC(**params,random_state=random_state, max_iter=10000000, cache_size=1000))
        j = 0
        for train_index, val_index in kf.split(X_other):
            X_train, X_val = X.iloc[train_index], X.iloc[val_index]
            y_train, y_val = y.iloc[train_index], y.iloc[val_index]
            X_train_prep = preprocessor.fit_transform(X_train)
            X_val_prep = preprocessor.transform(X_val)
            X_test_prep = preprocessor.transform(X_test)
            clf.fit(X_train_prep, y_train)
            
            y_train_pred = clf.predict(X_train_prep)
            prec = precision_score(y_train, y_train_pred, average="macro", zero_division=0)
            rec = recall_score(y_train, y_train_pred, average="macro")
            f1 = f1_score(y_train, y_train_pred, average="macro")
         #   print('train:', i, "precision score", prec, "recall score", rec, "f1 score", f1)
            train_scores[j][p] = f1
            
            y_val_pred = clf.predict(X_val_prep)
            prec_val = precision_score(y_val, y_val_pred, average="macro", zero_division=0)
            rec_val = recall_score(y_val, y_val_pred, average="macro")
            f1_val = f1_score(y_val, y_val_pred, average="macro")
          #  print('validation:', i, "precision score", prec_val, "recall score", rec_val, "f1 score", f1_val)
            val_scores[j][p] = f1_val
            j += 1
        models.append(clf) 
            
        print('mean train f1 score:', np.mean(train_scores[:, p]), '    mean validation f1 score:', np.mean(val_scores[:, p]))
    amax_mean = np.argmax(np.mean(val_scores, axis=0))
    print('The best model parameters were:', param_grid[amax_mean])
    print('The corresponding mean validation score is:',np.max(np.mean(val_scores, axis=0)))
    final_models.append(models[amax_mean])
    y_test_pred = final_models[-1].predict(X_test_prep)
    test_scores[i] = f1_score(y_test, y_test_pred, average="macro")
    print('Test Score: ', test_scores[i])
 


random state: 431 * 0
working on params:   {'tol': 1e-05, 'C': 100000.0}
mean train f1 score: 0.9455544801372688 mean validation f1 score: 0.572010051170791
working on params:   {'tol': 0.0001, 'C': 100000.0}
mean train f1 score: 0.9455544801372688 mean validation f1 score: 0.572010051170791
working on params:   {'tol': 0.001, 'C': 100000.0}
mean train f1 score: 0.9455544801372688 mean validation f1 score: 0.572010051170791
working on params:   {'tol': 0.01, 'C': 100000.0}
mean train f1 score: 0.9455167881131996 mean validation f1 score: 0.5721401719842774
working on params:   {'tol': 1e-05, 'C': 1000000.0}
mean train f1 score: 0.9659223760396642 mean validation f1 score: 0.5700681635023653
working on params:   {'tol': 0.0001, 'C': 1000000.0}
mean train f1 score: 0.9659223760396642 mean validation f1 score: 0.5700681635023653
working on params:   {'tol': 0.001, 'C': 1000000.0}
mean train f1 score: 0.9659223760396642 mean validation f1 score: 0.5703308081079496
working on params:   {'to

KeyboardInterrupt: 

### RF Method 2

In [37]:
param_grid = {  'max_depth': [15, 20, 25, 30, 35, 40], # 5, 7, 
                'max_features': [15, 20, 25, 30, 35, 40, 45, 51]} # 4, 8, 

param_grid = ParameterGrid(param_grid)
test_scores = np.zeros(10)
final_models = []

for i in range(10):
    random_state = 431 * i 
    print('random state: 431 * {}'.format(i))
    X_other, X_test, y_other, y_test = train_test_split(X, y, test_size = 0.2, random_state=random_state)
    kf = KFold(n_splits=4, shuffle=True, random_state=random_state)
            
    models = []
    train_scores = np.zeros(shape=(4, len(param_grid)))
    val_scores = np.zeros(shape=(4, len(param_grid)))
    
    for p in range(len(param_grid)):
        params = param_grid[p]
        print('working on params:  ', params) 
        clf = LabelPowerset(RandomForestClassifier(**params,random_state=random_state))
        j = 0
        for train_index, val_index in kf.split(X_other):
            X_train, X_val = X.iloc[train_index], X.iloc[val_index]
            y_train, y_val = y.iloc[train_index], y.iloc[val_index]
            X_train_prep = preprocessor.fit_transform(X_train)
            X_val_prep = preprocessor.transform(X_val)
            X_test_prep = preprocessor.transform(X_test)
            clf.fit(X_train_prep, y_train)
            
            y_train_pred = clf.predict(X_train_prep)
            prec = precision_score(y_train, y_train_pred, average="macro", zero_division=0)
            rec = recall_score(y_train, y_train_pred, average="macro")
            f1 = f1_score(y_train, y_train_pred, average="macro")
         #   print('train:', i, "precision score", prec, "recall score", rec, "f1 score", f1)
            train_scores[j][p] = f1
            
            y_val_pred = clf.predict(X_val_prep)
            prec_val = precision_score(y_val, y_val_pred, average="macro", zero_division=0)
            rec_val = recall_score(y_val, y_val_pred, average="macro")
            f1_val = f1_score(y_val, y_val_pred, average="macro")
          #  print('validation:', i, "precision score", prec_val, "recall score", rec_val, "f1 score", f1_val)
            val_scores[j][p] = f1_val
            j += 1
        models.append(clf) 
            
        print('mean train f1 score:', np.mean(train_scores[:, p]), '    mean validation f1 score:', np.mean(val_scores[:, p]))
    amax_mean = np.argmax(np.mean(val_scores, axis=0))
    print('The best model parameters were:', param_grid[amax_mean])
    print('The corresponding mean validation score is:',np.max(np.mean(val_scores, axis=0)))
    final_models.append(models[amax_mean])
    y_test_pred = final_models[-1].predict(X_test_prep)
    test_scores[i] = f1_score(y_test, y_test_pred, average="macro")
    print('Test Score: ', test_scores[i])
 

random state: 431 * 0
working on params:   {'max_features': 15, 'max_depth': 15}
mean train f1 score: 0.9706086773681125 mean validation f1 score: 0.6057641000240094
working on params:   {'max_features': 20, 'max_depth': 15}
mean train f1 score: 0.9693094336629514 mean validation f1 score: 0.6063666200834701
working on params:   {'max_features': 25, 'max_depth': 15}
mean train f1 score: 0.9699166184068558 mean validation f1 score: 0.6068555807340391
working on params:   {'max_features': 30, 'max_depth': 15}
mean train f1 score: 0.9709466212179398 mean validation f1 score: 0.6100749534105043
working on params:   {'max_features': 35, 'max_depth': 15}
mean train f1 score: 0.9702581398932802 mean validation f1 score: 0.6052003623426836
working on params:   {'max_features': 40, 'max_depth': 15}
mean train f1 score: 0.969100747780782 mean validation f1 score: 0.6012020766671179
working on params:   {'max_features': 45, 'max_depth': 15}
mean train f1 score: 0.9706105925967108 mean validation 

mean train f1 score: 0.9883349953366736 mean validation f1 score: 0.6148239665652093
working on params:   {'max_features': 20, 'max_depth': 20}
mean train f1 score: 0.9884503495601563 mean validation f1 score: 0.6188052330813328
working on params:   {'max_features': 25, 'max_depth': 20}
mean train f1 score: 0.988477366094987 mean validation f1 score: 0.6164139244110325
working on params:   {'max_features': 30, 'max_depth': 20}
mean train f1 score: 0.9883963540158724 mean validation f1 score: 0.6167458944215195
working on params:   {'max_features': 35, 'max_depth': 20}
mean train f1 score: 0.9885481473097927 mean validation f1 score: 0.6156524711690121
working on params:   {'max_features': 40, 'max_depth': 20}
mean train f1 score: 0.9883634860212205 mean validation f1 score: 0.6129063424482906
working on params:   {'max_features': 45, 'max_depth': 20}
mean train f1 score: 0.9884846014640484 mean validation f1 score: 0.6172994291638938
working on params:   {'max_features': 51, 'max_depth

mean train f1 score: 0.988418946525428 mean validation f1 score: 0.6314706710170386
working on params:   {'max_features': 20, 'max_depth': 25}
mean train f1 score: 0.9884200197534511 mean validation f1 score: 0.6299696506766109
working on params:   {'max_features': 25, 'max_depth': 25}
mean train f1 score: 0.9885639502521673 mean validation f1 score: 0.6301643188638043
working on params:   {'max_features': 30, 'max_depth': 25}
mean train f1 score: 0.9885656333593675 mean validation f1 score: 0.632387625524575
working on params:   {'max_features': 35, 'max_depth': 25}
mean train f1 score: 0.9883838572760943 mean validation f1 score: 0.6307768614605387
working on params:   {'max_features': 40, 'max_depth': 25}
mean train f1 score: 0.9884254766314261 mean validation f1 score: 0.6295425885435982
working on params:   {'max_features': 45, 'max_depth': 25}
mean train f1 score: 0.9885296310798196 mean validation f1 score: 0.6297251447071781
working on params:   {'max_features': 51, 'max_depth'

KeyboardInterrupt: 

In [39]:
param_grid = {  'max_depth': [25, 30, 35], # 5, 7, 
                'max_features': [27, 35, 43, 51]} # 4, 8, 

param_grid = ParameterGrid(param_grid)
test_scores = np.zeros(10)
final_models = []

for i in range(10):
    random_state = 700 * i 
    print('random state: 700 * {}'.format(i))
    X_other, X_test, y_other, y_test = train_test_split(X, y, test_size = 0.2, random_state=random_state)
    kf = KFold(n_splits=4, shuffle=True, random_state=random_state)
            
    models = []
    train_scores = np.zeros(shape=(4, len(param_grid)))
    val_scores = np.zeros(shape=(4, len(param_grid)))
    
    for p in range(len(param_grid)):
        params = param_grid[p]
        print('working on params:  ', params) 
        clf = LabelPowerset(RandomForestClassifier(**params,random_state=random_state))
        j = 0
        for train_index, val_index in kf.split(X_other):
            X_train, X_val = X.iloc[train_index], X.iloc[val_index]
            y_train, y_val = y.iloc[train_index], y.iloc[val_index]
            X_train_prep = preprocessor.fit_transform(X_train)
            X_val_prep = preprocessor.transform(X_val)
            X_test_prep = preprocessor.transform(X_test)
            clf.fit(X_train_prep, y_train)
            
            y_train_pred = clf.predict(X_train_prep)
            prec = precision_score(y_train, y_train_pred, average="macro", zero_division=0)
            rec = recall_score(y_train, y_train_pred, average="macro")
            f1 = f1_score(y_train, y_train_pred, average="macro")
         #   print('train:', i, "precision score", prec, "recall score", rec, "f1 score", f1)
            train_scores[j][p] = f1
            
            y_val_pred = clf.predict(X_val_prep)
            prec_val = precision_score(y_val, y_val_pred, average="macro", zero_division=0)
            rec_val = recall_score(y_val, y_val_pred, average="macro")
            f1_val = f1_score(y_val, y_val_pred, average="macro")
          #  print('validation:', i, "precision score", prec_val, "recall score", rec_val, "f1 score", f1_val)
            val_scores[j][p] = f1_val
            j += 1
        models.append(clf) 
            
        print('mean train f1 score:', np.mean(train_scores[:, p]), '    mean validation f1 score:', np.mean(val_scores[:, p]))
    amax_mean = np.argmax(np.mean(val_scores, axis=0))
    print('The best model parameters were:', param_grid[amax_mean])
    print('The corresponding mean validation score is:',np.max(np.mean(val_scores, axis=0)))
    final_models.append(models[amax_mean])
    y_test_pred = final_models[-1].predict(X_test_prep)
    test_scores[i] = f1_score(y_test, y_test_pred, average="macro")
    print('Test Score: ', test_scores[i])
 

random state: 700 * 0
working on params:   {'max_features': 27, 'max_depth': 25}
mean train f1 score: 0.9888331374317167     mean validation f1 score: 0.6232604101330355
working on params:   {'max_features': 35, 'max_depth': 25}
mean train f1 score: 0.9888349416981183     mean validation f1 score: 0.619706221773355
working on params:   {'max_features': 43, 'max_depth': 25}
mean train f1 score: 0.9889282109841334     mean validation f1 score: 0.6204870248606764
working on params:   {'max_features': 51, 'max_depth': 25}
mean train f1 score: 0.9889241098752963     mean validation f1 score: 0.6260557226947525
working on params:   {'max_features': 27, 'max_depth': 30}
mean train f1 score: 0.9888376896912703     mean validation f1 score: 0.6209472468792516
working on params:   {'max_features': 35, 'max_depth': 30}
mean train f1 score: 0.9888313706416364     mean validation f1 score: 0.6232280358331659
working on params:   {'max_features': 43, 'max_depth': 30}
mean train f1 score: 0.988925453

mean train f1 score: 0.9891658119424833     mean validation f1 score: 0.6196198259508181
working on params:   {'max_features': 51, 'max_depth': 25}
mean train f1 score: 0.9891809681988171     mean validation f1 score: 0.6171713037536423
working on params:   {'max_features': 27, 'max_depth': 30}
mean train f1 score: 0.9890823649333008     mean validation f1 score: 0.6229033574296124
working on params:   {'max_features': 35, 'max_depth': 30}
mean train f1 score: 0.9891363743746321     mean validation f1 score: 0.6226952716986194
working on params:   {'max_features': 43, 'max_depth': 30}
mean train f1 score: 0.9891636091414272     mean validation f1 score: 0.6229229904228455
working on params:   {'max_features': 51, 'max_depth': 30}
mean train f1 score: 0.9891816898574083     mean validation f1 score: 0.6148727590726404
working on params:   {'max_features': 27, 'max_depth': 35}
mean train f1 score: 0.989081625175251     mean validation f1 score: 0.6234089309818479
working on params:   {'m

mean train f1 score: 0.9887501533230167     mean validation f1 score: 0.6254192484974966
working on params:   {'max_features': 43, 'max_depth': 30}
mean train f1 score: 0.9887514476013176     mean validation f1 score: 0.6258392437440687
working on params:   {'max_features': 51, 'max_depth': 30}
mean train f1 score: 0.9887519639673181     mean validation f1 score: 0.6191408434388492
working on params:   {'max_features': 27, 'max_depth': 35}
mean train f1 score: 0.9887484012564689     mean validation f1 score: 0.6257326778545091
working on params:   {'max_features': 35, 'max_depth': 35}
mean train f1 score: 0.9887493662239905     mean validation f1 score: 0.6250345689609921
working on params:   {'max_features': 43, 'max_depth': 35}
mean train f1 score: 0.9887510944829403     mean validation f1 score: 0.6242989436263486
working on params:   {'max_features': 51, 'max_depth': 35}
mean train f1 score: 0.9887519639673181     mean validation f1 score: 0.618566236986564
The best model parameter