In [1]:
import json
from datetime import datetime
from pprint import pprint

from catboost import CatBoostClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.naive_bayes import BernoulliNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline
from tqdm.notebook import tqdm
from xgboost import XGBClassifier

In [2]:
from heart import *
from hotel import *
from airline import *

data_and_transformers = [
    ('heart', load_heart(), transformers_heart()),
    ('airline', load_airline(), transformers_airline()),
    ('hotel', load_hotel(), transformers_hotel())
]

In [3]:
models_and_param_grids = [
    ('nb', BernoulliNB(binarize=False), {
        'bernoullinb__alpha': [1.0,]
    }),
    ('xgb', XGBClassifier(random_state=0, objective='binary:logistic'), {
        'xgbclassifier__n_estimators': [100,],
    }),
    ('rf', RandomForestClassifier(random_state=0), {
        'randomforestclassifier__n_estimators': [10,],
    }),
    ('logreg', LogisticRegression(random_state=0), {
        'logisticregression__C': [1,], 
    }),
    ('knn', KNeighborsClassifier(), {
        'kneighborsclassifier__n_neighbors': [5,],
        'kneighborsclassifier__weights': ['distance'],
        # 'kneighborsclassifier__metric': ['hamming'],
    }),
    ('catboost', CatBoostClassifier(verbose=0, random_state=0), {
        'catboostclassifier__n_estimators': [100,],
    }),
]

In [4]:
results = []

for data_name, data, transformers in tqdm(data_and_transformers):
    for model_name, model, param_grid in tqdm(models_and_param_grids):

        pipeline = make_pipeline(*transformers, model)
        grid_search = GridSearchCV(
            estimator=pipeline,
            cv=StratifiedKFold(n_splits=5),
            param_grid=param_grid,
            n_jobs=3,
            verbose=100,
            scoring=['accuracy', 'f1', 'precision', 'recall'],
            refit='f1',
            error_score='raise'
        )
        display(grid_search)

        X_train, X_test, y_train, y_test = data
        grid_search.fit(X=X_train, y=y_train)
        y_pred = grid_search.predict(X_test)

        res = {
            'dataset': data_name,
            'model': model_name,
            'f1': f1_score(y_test, y_pred),
            'accuracy': accuracy_score(y_test, y_pred)
        }
        pprint(res)
        with open(f'{datetime.now()}_{model_name}_{data_name}.json', 'wt') as f:
            json.dump(res, f)
        results.append(res)

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5; 1/1] START bernoullinb__alpha=1.0......................................
[CV 3/5; 1/1] START bernoullinb__alpha=1.0......................................
[CV 2/5; 1/1] START bernoullinb__alpha=1.0......................................
[CV 3/5; 1/1] END bernoullinb__alpha=1.0; accuracy: (test=0.833) f1: (test=0.369) precision: (test=0.287) recall: (test=0.517) total time=   2.2s
[CV 2/5; 1/1] END bernoullinb__alpha=1.0; accuracy: (test=0.835) f1: (test=0.370) precision: (test=0.289) recall: (test=0.513) total time=   2.1s
[CV 1/5; 1/1] END bernoullinb__alpha=1.0; accuracy: (test=0.833) f1: (test=0.370) precision: (test=0.287) recall: (test=0.521) total time=   2.3s
[CV 4/5; 1/1] START bernoullinb__alpha=1.0......................................
[CV 5/5; 1/1] START bernoullinb__alpha=1.0......................................
[CV 5/5; 1/1] END bernoullinb__alpha=1.0; accuracy: (test=0.834) f1: (test=0.374) precision: (test

Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5; 1/1] START xgbclassifier__n_estimators=100.............................
[CV 2/5; 1/1] START xgbclassifier__n_estimators=100.............................
[CV 3/5; 1/1] START xgbclassifier__n_estimators=100.............................
[CV 1/5; 1/1] END xgbclassifier__n_estimators=100; accuracy: (test=0.907) f1: (test=0.171) precision: (test=0.520) recall: (test=0.102) total time=   6.2s
[CV 4/5; 1/1] START xgbclassifier__n_estimators=100.............................
[CV 3/5; 1/1] END xgbclassifier__n_estimators=100; accuracy: (test=0.906) f1: (test=0.172) precision: (test=0.513) recall: (test=0.103) total time=   6.3s
[CV 5/5; 1/1] START xgbclassifier__n_estimators=100.............................
[CV 2/5; 1/1] END xgbclassifier__n_estimators=100; accuracy: (test=0.907) f1: (test=0.193) precision: (test=0.532) recall: (test=0.118) total time=   6.6s
[CV 5/5; 1/1] END xgbclassifier__n_estimators=100; accuracy: (test=0.90

Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 3/5; 1/1] START randomforestclassifier__n_estimators=10.....................
[CV 2/5; 1/1] START randomforestclassifier__n_estimators=10.....................
[CV 1/5; 1/1] START randomforestclassifier__n_estimators=10.....................
[CV 3/5; 1/1] END randomforestclassifier__n_estimators=10; accuracy: (test=0.895) f1: (test=0.192) precision: (test=0.348) recall: (test=0.133) total time=  11.3s
[CV 4/5; 1/1] START randomforestclassifier__n_estimators=10.....................
[CV 1/5; 1/1] END randomforestclassifier__n_estimators=10; accuracy: (test=0.895) f1: (test=0.190) precision: (test=0.350) recall: (test=0.130) total time=  11.6s
[CV 5/5; 1/1] START randomforestclassifier__n_estimators=10.....................
[CV 2/5; 1/1] END randomforestclassifier__n_estimators=10; accuracy: (test=0.895) f1: (test=0.202) precision: (test=0.358) recall: (test=0.140) total time=  11.7s
[CV 4/5; 1/1] END randomforestclassifier__n_est

Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5; 1/1] START logisticregression__C=1.....................................
[CV 2/5; 1/1] START logisticregression__C=1.....................................
[CV 3/5; 1/1] START logisticregression__C=1.....................................


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 3/5; 1/1] END logisticregression__C=1; accuracy: (test=0.907) f1: (test=0.185) precision: (test=0.538) recall: (test=0.112) total time=   8.7s
[CV 4/5; 1/1] START logisticregression__C=1.....................................
[CV 1/5; 1/1] END logisticregression__C=1; accuracy: (test=0.907) f1: (test=0.190) precision: (test=0.517) recall: (test=0.116) total time=   8.8s
[CV 5/5; 1/1] START logisticregression__C=1.....................................
[CV 2/5; 1/1] END logisticregression__C=1; accuracy: (test=0.908) f1: (test=0.200) precision: (test=0.561) recall: (test=0.121) total time=   8.9s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

[CV 4/5; 1/1] END logisticregression__C=1; accuracy: (test=0.906) f1: (test=0.188) precision: (test=0.514) recall: (test=0.115) total time=   6.5s
[CV 5/5; 1/1] END logisticregression__C=1; accuracy: (test=0.907) f1: (test=0.204) precision: (test=0.534) recall: (test=0.126) total time=   6.5s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


{'accuracy': 0.9092754651529485,
 'dataset': 'heart',
 'f1': 0.22338451155728023,
 'model': 'logreg'}


Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5; 1/1] START kneighborsclassifier__n_neighbors=5, kneighborsclassifier__weights=distance
[CV 2/5; 1/1] START kneighborsclassifier__n_neighbors=5, kneighborsclassifier__weights=distance
[CV 3/5; 1/1] START kneighborsclassifier__n_neighbors=5, kneighborsclassifier__weights=distance
[CV 2/5; 1/1] END kneighborsclassifier__n_neighbors=5, kneighborsclassifier__weights=distance; accuracy: (test=0.893) f1: (test=0.215) precision: (test=0.351) recall: (test=0.155) total time= 1.5min
[CV 4/5; 1/1] START kneighborsclassifier__n_neighbors=5, kneighborsclassifier__weights=distance
[CV 1/5; 1/1] END kneighborsclassifier__n_neighbors=5, kneighborsclassifier__weights=distance; accuracy: (test=0.892) f1: (test=0.200) precision: (test=0.332) recall: (test=0.143) total time= 1.5min
[CV 5/5; 1/1] START kneighborsclassifier__n_neighbors=5, kneighborsclassifier__weights=distance
[CV 3/5; 1/1] END kneighborsclassifier__n_neighbors=5, kneighbo

Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5; 1/1] START catboostclassifier__n_estimators=100........................
[CV 3/5; 1/1] START catboostclassifier__n_estimators=100........................
[CV 2/5; 1/1] START catboostclassifier__n_estimators=100........................
[CV 1/5; 1/1] END catboostclassifier__n_estimators=100; accuracy: (test=0.905) f1: (test=0.167) precision: (test=0.492) recall: (test=0.101) total time=  20.3s
[CV 4/5; 1/1] START catboostclassifier__n_estimators=100........................
[CV 3/5; 1/1] END catboostclassifier__n_estimators=100; accuracy: (test=0.906) f1: (test=0.171) precision: (test=0.495) recall: (test=0.103) total time=  20.4s
[CV 2/5; 1/1] END catboostclassifier__n_estimators=100; accuracy: (test=0.906) f1: (test=0.178) precision: (test=0.511) recall: (test=0.108) total time=  20.8s
[CV 5/5; 1/1] START catboostclassifier__n_estimators=100........................




[CV 4/5; 1/1] END catboostclassifier__n_estimators=100; accuracy: (test=0.906) f1: (test=0.175) precision: (test=0.507) recall: (test=0.106) total time=  15.2s
[CV 5/5; 1/1] END catboostclassifier__n_estimators=100; accuracy: (test=0.906) f1: (test=0.185) precision: (test=0.516) recall: (test=0.112) total time=  14.6s
{'accuracy': 0.9067526017029328,
 'dataset': 'heart',
 'f1': 0.18416968442834972,
 'model': 'catboost'}


  0%|          | 0/6 [00:00<?, ?it/s]

Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5; 1/1] START bernoullinb__alpha=1.0......................................
[CV 3/5; 1/1] START bernoullinb__alpha=1.0......................................
[CV 2/5; 1/1] START bernoullinb__alpha=1.0......................................




[CV 2/5; 1/1] END bernoullinb__alpha=1.0; accuracy: (test=0.871) f1: (test=0.849) precision: (test=0.859) recall: (test=0.840) total time=   3.3s
[CV 1/5; 1/1] END bernoullinb__alpha=1.0; accuracy: (test=0.872) f1: (test=0.850) precision: (test=0.861) recall: (test=0.839) total time=   3.3s
[CV 3/5; 1/1] END bernoullinb__alpha=1.0; accuracy: (test=0.871) f1: (test=0.850) precision: (test=0.856) recall: (test=0.844) total time=   3.3s
[CV 4/5; 1/1] START bernoullinb__alpha=1.0......................................
[CV 5/5; 1/1] START bernoullinb__alpha=1.0......................................




[CV 4/5; 1/1] END bernoullinb__alpha=1.0; accuracy: (test=0.870) f1: (test=0.848) precision: (test=0.858) recall: (test=0.839) total time=   2.6s
[CV 5/5; 1/1] END bernoullinb__alpha=1.0; accuracy: (test=0.876) f1: (test=0.856) precision: (test=0.858) recall: (test=0.855) total time=   2.8s
{'accuracy': 0.8685324915306437,
 'dataset': 'airline',
 'f1': 0.8493138595949346,
 'model': 'nb'}


Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5; 1/1] START xgbclassifier__n_estimators=100.............................
[CV 2/5; 1/1] START xgbclassifier__n_estimators=100.............................
[CV 3/5; 1/1] START xgbclassifier__n_estimators=100.............................




[CV 3/5; 1/1] END xgbclassifier__n_estimators=100; accuracy: (test=0.960) f1: (test=0.953) precision: (test=0.966) recall: (test=0.941) total time=   6.7s
[CV 4/5; 1/1] START xgbclassifier__n_estimators=100.............................
[CV 1/5; 1/1] END xgbclassifier__n_estimators=100; accuracy: (test=0.960) f1: (test=0.954) precision: (test=0.966) recall: (test=0.941) total time=   7.0s
[CV 5/5; 1/1] START xgbclassifier__n_estimators=100.............................
[CV 2/5; 1/1] END xgbclassifier__n_estimators=100; accuracy: (test=0.962) f1: (test=0.956) precision: (test=0.969) recall: (test=0.943) total time=   7.0s
[CV 5/5; 1/1] END xgbclassifier__n_estimators=100; accuracy: (test=0.962) f1: (test=0.956) precision: (test=0.970) recall: (test=0.943) total time=   5.0s




[CV 4/5; 1/1] END xgbclassifier__n_estimators=100; accuracy: (test=0.961) f1: (test=0.955) precision: (test=0.972) recall: (test=0.937) total time=   6.0s
{'accuracy': 0.9618493994456422,
 'dataset': 'airline',
 'f1': 0.9559692540098635,
 'model': 'xgb'}


Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5; 1/1] START randomforestclassifier__n_estimators=10.....................
[CV 2/5; 1/1] START randomforestclassifier__n_estimators=10.....................
[CV 3/5; 1/1] START randomforestclassifier__n_estimators=10.....................




[CV 1/5; 1/1] END randomforestclassifier__n_estimators=10; accuracy: (test=0.947) f1: (test=0.937) precision: (test=0.965) recall: (test=0.911) total time=   7.5s
[CV 4/5; 1/1] START randomforestclassifier__n_estimators=10.....................
[CV 2/5; 1/1] END randomforestclassifier__n_estimators=10; accuracy: (test=0.948) f1: (test=0.938) precision: (test=0.963) recall: (test=0.915) total time=   7.8s
[CV 5/5; 1/1] START randomforestclassifier__n_estimators=10.....................
[CV 3/5; 1/1] END randomforestclassifier__n_estimators=10; accuracy: (test=0.945) f1: (test=0.935) precision: (test=0.962) recall: (test=0.909) total time=   8.0s




[CV 4/5; 1/1] END randomforestclassifier__n_estimators=10; accuracy: (test=0.949) f1: (test=0.939) precision: (test=0.970) recall: (test=0.910) total time=   6.5s
[CV 5/5; 1/1] END randomforestclassifier__n_estimators=10; accuracy: (test=0.949) f1: (test=0.940) precision: (test=0.966) recall: (test=0.915) total time=   6.7s
{'accuracy': 0.9497998152140438,
 'dataset': 'airline',
 'f1': 0.9414669180357303,
 'model': 'rf'}


Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5; 1/1] START logisticregression__C=1.....................................
[CV 2/5; 1/1] START logisticregression__C=1.....................................
[CV 3/5; 1/1] START logisticregression__C=1.....................................


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 1/5; 1/1] END logisticregression__C=1; accuracy: (test=0.933) f1: (test=0.922) precision: (test=0.932) recall: (test=0.911) total time=  11.1s
[CV 4/5; 1/1] START logisticregression__C=1.....................................


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 2/5; 1/1] END logisticregression__C=1; accuracy: (test=0.933) f1: (test=0.922) precision: (test=0.931) recall: (test=0.912) total time=  12.2s
[CV 5/5; 1/1] START logisticregression__C=1.....................................


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 3/5; 1/1] END logisticregression__C=1; accuracy: (test=0.931) f1: (test=0.920) precision: (test=0.928) recall: (test=0.913) total time=  12.9s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 4/5; 1/1] END logisticregression__C=1; accuracy: (test=0.934) f1: (test=0.923) precision: (test=0.936) recall: (test=0.911) total time=   9.1s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 5/5; 1/1] END logisticregression__C=1; accuracy: (test=0.934) f1: (test=0.924) precision: (test=0.933) recall: (test=0.915) total time=   9.7s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


{'accuracy': 0.9329765937788728,
 'dataset': 'airline',
 'f1': 0.9229270884058612,
 'model': 'logreg'}


Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5; 1/1] START kneighborsclassifier__n_neighbors=5, kneighborsclassifier__weights=distance
[CV 2/5; 1/1] START kneighborsclassifier__n_neighbors=5, kneighborsclassifier__weights=distance
[CV 3/5; 1/1] START kneighborsclassifier__n_neighbors=5, kneighborsclassifier__weights=distance




[CV 1/5; 1/1] END kneighborsclassifier__n_neighbors=5, kneighborsclassifier__weights=distance; accuracy: (test=0.926) f1: (test=0.913) precision: (test=0.928) recall: (test=0.898) total time=  33.9s
[CV 4/5; 1/1] START kneighborsclassifier__n_neighbors=5, kneighborsclassifier__weights=distance
[CV 2/5; 1/1] END kneighborsclassifier__n_neighbors=5, kneighborsclassifier__weights=distance; accuracy: (test=0.927) f1: (test=0.915) precision: (test=0.928) recall: (test=0.902) total time=  34.0s
[CV 5/5; 1/1] START kneighborsclassifier__n_neighbors=5, kneighborsclassifier__weights=distance
[CV 3/5; 1/1] END kneighborsclassifier__n_neighbors=5, kneighborsclassifier__weights=distance; accuracy: (test=0.928) f1: (test=0.915) precision: (test=0.929) recall: (test=0.902) total time=  34.1s




[CV 4/5; 1/1] END kneighborsclassifier__n_neighbors=5, kneighborsclassifier__weights=distance; accuracy: (test=0.929) f1: (test=0.917) precision: (test=0.933) recall: (test=0.901) total time=  20.4s
[CV 5/5; 1/1] END kneighborsclassifier__n_neighbors=5, kneighborsclassifier__weights=distance; accuracy: (test=0.931) f1: (test=0.920) precision: (test=0.930) recall: (test=0.909) total time=  22.5s
{'accuracy': 0.9307052663997536,
 'dataset': 'airline',
 'f1': 0.9200284343344589,
 'model': 'knn'}


Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 2/5; 1/1] START catboostclassifier__n_estimators=100........................
[CV 1/5; 1/1] START catboostclassifier__n_estimators=100........................
[CV 3/5; 1/1] START catboostclassifier__n_estimators=100........................
[CV 2/5; 1/1] END catboostclassifier__n_estimators=100; accuracy: (test=0.961) f1: (test=0.954) precision: (test=0.966) recall: (test=0.942) total time=  12.4s
[CV 4/5; 1/1] START catboostclassifier__n_estimators=100........................




[CV 1/5; 1/1] END catboostclassifier__n_estimators=100; accuracy: (test=0.959) f1: (test=0.952) precision: (test=0.964) recall: (test=0.940) total time=  13.1s
[CV 5/5; 1/1] START catboostclassifier__n_estimators=100........................
[CV 3/5; 1/1] END catboostclassifier__n_estimators=100; accuracy: (test=0.958) f1: (test=0.951) precision: (test=0.964) recall: (test=0.939) total time=  13.2s




[CV 4/5; 1/1] END catboostclassifier__n_estimators=100; accuracy: (test=0.960) f1: (test=0.953) precision: (test=0.969) recall: (test=0.938) total time=   8.1s
[CV 5/5; 1/1] END catboostclassifier__n_estimators=100; accuracy: (test=0.961) f1: (test=0.955) precision: (test=0.968) recall: (test=0.943) total time=   7.8s
{'accuracy': 0.9614259316291962,
 'dataset': 'airline',
 'f1': 0.9554666666666666,
 'model': 'catboost'}


  0%|          | 0/6 [00:00<?, ?it/s]

Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5; 1/1] START bernoullinb__alpha=1.0......................................
[CV 2/5; 1/1] START bernoullinb__alpha=1.0......................................
[CV 3/5; 1/1] START bernoullinb__alpha=1.0......................................
[CV 1/5; 1/1] END bernoullinb__alpha=1.0; accuracy: (test=0.742) f1: (test=0.582) precision: (test=0.725) recall: (test=0.486) total time=   2.8s
[CV 4/5; 1/1] START bernoullinb__alpha=1.0......................................
[CV 2/5; 1/1] END bernoullinb__alpha=1.0; accuracy: (test=0.742) f1: (test=0.591) precision: (test=0.711) recall: (test=0.506) total time=   3.2s
[CV 5/5; 1/1] START bernoullinb__alpha=1.0......................................
[CV 3/5; 1/1] END bernoullinb__alpha=1.0; accuracy: (test=0.743) f1: (test=0.584) precision: (test=0.727) recall: (test=0.488) total time=   3.1s
[CV 4/5; 1/1] END bernoullinb__alpha=1.0; accuracy: (test=0.743) f1: (test=0.583) precision: (test



[CV 5/5; 1/1] END bernoullinb__alpha=1.0; accuracy: (test=0.746) f1: (test=0.584) precision: (test=0.741) recall: (test=0.481) total time=   2.7s
{'accuracy': 0.7403886422648464,
 'dataset': 'hotel',
 'f1': 0.585545229658354,
 'model': 'nb'}


Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5; 1/1] START xgbclassifier__n_estimators=100.............................
[CV 2/5; 1/1] START xgbclassifier__n_estimators=100.............................
[CV 3/5; 1/1] START xgbclassifier__n_estimators=100.............................
[CV 1/5; 1/1] END xgbclassifier__n_estimators=100; accuracy: (test=0.789) f1: (test=0.648) precision: (test=0.842) recall: (test=0.527) total time=   5.3s
[CV 2/5; 1/1] END xgbclassifier__n_estimators=100; accuracy: (test=0.785) f1: (test=0.644) precision: (test=0.829) recall: (test=0.526) total time=   5.0s
[CV 4/5; 1/1] START xgbclassifier__n_estimators=100.............................
[CV 3/5; 1/1] END xgbclassifier__n_estimators=100; accuracy: (test=0.786) f1: (test=0.644) precision: (test=0.834) recall: (test=0.525) total time=   5.1s
[CV 5/5; 1/1] START xgbclassifier__n_estimators=100.............................




[CV 4/5; 1/1] END xgbclassifier__n_estimators=100; accuracy: (test=0.782) f1: (test=0.641) precision: (test=0.820) recall: (test=0.526) total time=   4.0s
[CV 5/5; 1/1] END xgbclassifier__n_estimators=100; accuracy: (test=0.785) f1: (test=0.640) precision: (test=0.837) recall: (test=0.518) total time=   3.6s
{'accuracy': 0.7818075215679705,
 'dataset': 'hotel',
 'f1': 0.6408382738177306,
 'model': 'xgb'}


Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5; 1/1] START randomforestclassifier__n_estimators=10.....................
[CV 2/5; 1/1] START randomforestclassifier__n_estimators=10.....................
[CV 3/5; 1/1] START randomforestclassifier__n_estimators=10.....................
[CV 1/5; 1/1] END randomforestclassifier__n_estimators=10; accuracy: (test=0.774) f1: (test=0.667) precision: (test=0.731) recall: (test=0.613) total time=   7.2s
[CV 4/5; 1/1] START randomforestclassifier__n_estimators=10.....................
[CV 2/5; 1/1] END randomforestclassifier__n_estimators=10; accuracy: (test=0.778) f1: (test=0.675) precision: (test=0.735) recall: (test=0.623) total time=   7.7s
[CV 5/5; 1/1] START randomforestclassifier__n_estimators=10.....................
[CV 3/5; 1/1] END randomforestclassifier__n_estimators=10; accuracy: (test=0.779) f1: (test=0.676) precision: (test=0.736) recall: (test=0.625) total time=   7.8s
[CV 4/5; 1/1] END randomforestclassifier__n_est



[CV 5/5; 1/1] END randomforestclassifier__n_estimators=10; accuracy: (test=0.778) f1: (test=0.676) precision: (test=0.734) recall: (test=0.626) total time=   5.9s
{'accuracy': 0.7777033252366194,
 'dataset': 'hotel',
 'f1': 0.6782250242483026,
 'model': 'rf'}


Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5; 1/1] START logisticregression__C=1.....................................
[CV 2/5; 1/1] START logisticregression__C=1.....................................
[CV 3/5; 1/1] START logisticregression__C=1.....................................


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 1/5; 1/1] END logisticregression__C=1; accuracy: (test=0.765) f1: (test=0.573) precision: (test=0.872) recall: (test=0.426) total time=   8.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 4/5; 1/1] START logisticregression__C=1.....................................
[CV 2/5; 1/1] END logisticregression__C=1; accuracy: (test=0.763) f1: (test=0.568) precision: (test=0.867) recall: (test=0.422) total time=   9.0s
[CV 5/5; 1/1] START logisticregression__C=1.....................................
[CV 3/5; 1/1] END logisticregression__C=1; accuracy: (test=0.764) f1: (test=0.569) precision: (test=0.875) recall: (test=0.422) total time=   9.4s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 4/5; 1/1] END logisticregression__C=1; accuracy: (test=0.763) f1: (test=0.570) precision: (test=0.866) recall: (test=0.425) total time=   7.4s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 5/5; 1/1] END logisticregression__C=1; accuracy: (test=0.760) f1: (test=0.559) precision: (test=0.868) recall: (test=0.412) total time=   7.4s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


{'accuracy': 0.7605327079319876,
 'dataset': 'hotel',
 'f1': 0.5695573622402891,
 'model': 'logreg'}


Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5; 1/1] START kneighborsclassifier__n_neighbors=5, kneighborsclassifier__weights=distance
[CV 2/5; 1/1] START kneighborsclassifier__n_neighbors=5, kneighborsclassifier__weights=distance
[CV 3/5; 1/1] START kneighborsclassifier__n_neighbors=5, kneighborsclassifier__weights=distance
[CV 1/5; 1/1] END kneighborsclassifier__n_neighbors=5, kneighborsclassifier__weights=distance; accuracy: (test=0.772) f1: (test=0.666) precision: (test=0.725) recall: (test=0.616) total time=  27.0s
[CV 4/5; 1/1] START kneighborsclassifier__n_neighbors=5, kneighborsclassifier__weights=distance
[CV 2/5; 1/1] END kneighborsclassifier__n_neighbors=5, kneighborsclassifier__weights=distance; accuracy: (test=0.775) f1: (test=0.670) precision: (test=0.729) recall: (test=0.620) total time=  26.9s
[CV 5/5; 1/1] START kneighborsclassifier__n_neighbors=5, kneighborsclassifier__weights=distance
[CV 3/5; 1/1] END kneighborsclassifier__n_neighbors=5, kneighbo



[CV 4/5; 1/1] END kneighborsclassifier__n_neighbors=5, kneighborsclassifier__weights=distance; accuracy: (test=0.767) f1: (test=0.663) precision: (test=0.713) recall: (test=0.619) total time=  21.8s
[CV 5/5; 1/1] END kneighborsclassifier__n_neighbors=5, kneighborsclassifier__weights=distance; accuracy: (test=0.773) f1: (test=0.669) precision: (test=0.723) recall: (test=0.623) total time=  21.3s
{'accuracy': 0.7713376329675852,
 'dataset': 'hotel',
 'f1': 0.6741076757789184,
 'model': 'knn'}


Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5; 1/1] START catboostclassifier__n_estimators=100........................
[CV 2/5; 1/1] START catboostclassifier__n_estimators=100........................
[CV 3/5; 1/1] START catboostclassifier__n_estimators=100........................
[CV 1/5; 1/1] END catboostclassifier__n_estimators=100; accuracy: (test=0.785) f1: (test=0.644) precision: (test=0.830) recall: (test=0.526) total time=   9.5s
[CV 2/5; 1/1] END catboostclassifier__n_estimators=100; accuracy: (test=0.789) f1: (test=0.652) precision: (test=0.832) recall: (test=0.536) total time=   9.3s
[CV 4/5; 1/1] START catboostclassifier__n_estimators=100........................
[CV 3/5; 1/1] END catboostclassifier__n_estimators=100; accuracy: (test=0.785) f1: (test=0.643) precision: (test=0.830) recall: (test=0.525) total time=   9.4s
[CV 5/5; 1/1] START catboostclassifier__n_estimators=100........................
[CV 4/5; 1/1] END catboostclassifier__n_estimators=100; 



[CV 5/5; 1/1] END catboostclassifier__n_estimators=100; accuracy: (test=0.786) f1: (test=0.643) precision: (test=0.841) recall: (test=0.520) total time=   6.3s
{'accuracy': 0.781221207806349,
 'dataset': 'hotel',
 'f1': 0.6427301326767885,
 'model': 'catboost'}


In [9]:
df_results = pd.DataFrame(results).pivot(
    index='model',
    columns='dataset',
    values=['f1', 'accuracy'],
).swaplevel(axis=1).sort_index(axis=1)
df_results.style.highlight_max().format(precision=3)

dataset,airline,airline,heart,heart,hotel,hotel
Unnamed: 0_level_1,accuracy,f1,accuracy,f1,accuracy,f1
model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
catboost,0.961,0.955,0.907,0.184,0.781,0.643
knn,0.931,0.92,0.892,0.215,0.771,0.674
logreg,0.933,0.923,0.909,0.223,0.761,0.57
nb,0.869,0.849,0.835,0.376,0.74,0.586
rf,0.95,0.941,0.896,0.201,0.778,0.678
xgb,0.962,0.956,0.908,0.195,0.782,0.641


In [12]:
df_results.style.highlight_max(props='textbf:--rwrap;').format(precision=3).to_latex('table.tex')