In [4]:
# -*- coding: UTF-8 -*-
"""Evaluation performance of the most popular classification algorithms"""
import time
import pandas as pd
from sklearn.model_selection import train_test_split, RepeatedStratifiedKFold, cross_validate
from sklearn import metrics
from sklearn.datasets import make_classification
from sklearn.ensemble import GradientBoostingClassifier, HistGradientBoostingClassifier, RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier

In [5]:
def exec_time(start:float, end:float) -> float:
    """Measures code runtime, returns value in milliseconds rounded to 4 decimal places."""
    diff_time = (end - start) * 1000
    return round(diff_time, 4)

In [6]:
# Define test classification dataset
X, y = make_classification(n_samples=2000, n_features=7, n_informative=5, n_redundant=2, random_state=12)
# summarize the dataset
print(X.shape, y.shape)
# Split for train/test data as 80% and 20 %
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=12)
X, y

(2000, 7) (2000,)


(array([[ 0.98494061, -0.72572549,  0.22255677, ...,  1.8681878 ,
         -2.79671927,  1.97024335],
        [ 1.38692078,  1.73777061, -1.43049967, ...,  0.43809266,
          0.0605342 , -0.12174517],
        [ 0.76319675,  1.37060634, -1.46835557, ..., -0.11028388,
          0.1486933 ,  0.14285945],
        ...,
        [-1.2845685 , -0.80567947,  1.29232843, ...,  0.47524624,
         -0.36037226,  1.01687246],
        [ 1.34254657, -0.17345118, -1.61236257, ..., -0.41292282,
         -1.28293218,  0.42119896],
        [ 0.31942181, -1.80482244,  0.02086189, ..., -0.00609257,
         -1.48429554, -0.35664264]]),
 array([1, 1, 0, ..., 0, 0, 0]))

In [8]:
# instance the classification models
rf = RandomForestClassifier()
gb = GradientBoostingClassifier()
hist_gb = HistGradientBoostingClassifier()
xgb = XGBClassifier()
lgb = LGBMClassifier()
catgb = CatBoostClassifier(verbose=0, n_estimators=200)
mlp = MLPClassifier(max_iter=1000, solver='lbfgs')
models = (rf, gb, hist_gb, xgb, lgb, catgb, mlp)
models

(RandomForestClassifier(),
 GradientBoostingClassifier(),
 HistGradientBoostingClassifier(),
 XGBClassifier(base_score=None, booster=None, callbacks=None,
               colsample_bylevel=None, colsample_bynode=None,
               colsample_bytree=None, early_stopping_rounds=None,
               enable_categorical=False, eval_metric=None, gamma=None,
               gpu_id=None, grow_policy=None, importance_type=None,
               interaction_constraints=None, learning_rate=None, max_bin=None,
               max_cat_to_onehot=None, max_delta_step=None, max_depth=None,
               max_leaves=None, min_child_weight=None, missing=nan,
               monotone_constraints=None, n_estimators=100, n_jobs=None,
               num_parallel_tree=None, predictor=None, random_state=None,
               reg_alpha=None, reg_lambda=None, ...),
 LGBMClassifier(),
 <catboost.core.CatBoostClassifier at 0x26dd34a5a20>,
 MLPClassifier(max_iter=1000, solver='lbfgs'))

In [13]:
def make_cross_validation(estimator, X_train, y_train, cv=5):
    """Perform cross validation and teturn metrics:
    accuracy, balanced_accuracy, f1, precision, recall, roc_auc"""
    scorings = ('accuracy', 'balanced_accuracy', 'f1', 'precision', 'recall', 'roc_auc')
    scores = cross_validate(estimator, X_train, y_train, cv=cv, n_jobs=-1, scoring=scorings)
    final_metrics = dict()
    for key, item in sorted(scores.items()):
        final_metrics[key] = item.mean().round(4)
    return final_metrics

In [11]:
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=5, random_state=12)
data_cross_val = []
for mdl in models:
    report_dict = make_cross_validation(mdl, X_train, y_train, cv=cv)
    data_cross_val.append(report_dict)
df_report = pd.DataFrame(data_cross_val, index=[str(mdl) for mdl in models])
df_report

Unnamed: 0,fit_time,score_time,test_accuracy,test_balanced_accuracy,test_f1,test_precision,test_recall,test_roc_auc
RandomForestClassifier(),0.2502,0.0358,0.9249,0.9249,0.9248,0.9249,0.9253,0.9747
GradientBoostingClassifier(),0.2728,0.006,0.9144,0.9144,0.9138,0.9182,0.9101,0.971
HistGradientBoostingClassifier(),0.3713,0.0149,0.9282,0.9283,0.928,0.929,0.9279,0.9792
"XGBClassifier(base_score=None, booster=None, callbacks=None,\n colsample_bylevel=None, colsample_bynode=None,\n colsample_bytree=None, early_stopping_rounds=None,\n enable_categorical=False, eval_metric=None, gamma=None,\n gpu_id=None, grow_policy=None, importance_type=None,\n interaction_constraints=None, learning_rate=None, max_bin=None,\n max_cat_to_onehot=None, max_delta_step=None, max_depth=None,\n max_leaves=None, min_child_weight=None, missing=nan,\n monotone_constraints=None, n_estimators=100, n_jobs=None,\n num_parallel_tree=None, predictor=None, random_state=None,\n reg_alpha=None, reg_lambda=None, ...)",0.1992,0.0088,0.9265,0.9265,0.9264,0.9265,0.9269,0.9785
LGBMClassifier(),0.1017,0.011,0.9285,0.9285,0.9285,0.928,0.9296,0.9792
<catboost.core.CatBoostClassifier object at 0x0000026DD34A5A20>,1.0294,0.0083,0.9331,0.9332,0.9331,0.933,0.9339,0.9817
"MLPClassifier(max_iter=1000, solver='lbfgs')",0.4852,0.0062,0.9264,0.9264,0.9262,0.9271,0.9258,0.973


In [14]:
def test_evaluation(estimator, X_train, y_train, X_test, y_test) -> dict:
    """Calculates the main model metrics - accuracy, balanced-Accuracy, 
    recall, precision, f1_score on the test set and returns them as a map object."""
    estimator.fit(X_train, y_train)
    y_pred = estimator.predict(X_test)

    accuracy = metrics.accuracy_score(y_test, y_pred).round(4)
    balanced_accuracy = metrics.balanced_accuracy_score(y_test, y_pred).round(4)
    precision = metrics.precision_score(y_test, y_pred).round(4)
    f_1 = metrics.f1_score(y_test, y_pred).round(4)
    recall = metrics.recall_score(y_test, y_pred).round(4)
    roc_auc = metrics.roc_auc_score(y_test, y_pred).round(4)

    final_metrics = {'accuracy': accuracy,
                     'balanced_accuracy': balanced_accuracy,
                     'f1': f_1,
                     'precision': precision,
                     'recall': recall,
                     'roc_auc': roc_auc,
                    }
    # Use dict comprehension for round dict values 
    final_metrics = {key:round(val, 4) for key, val in final_metrics.items()} 
    return final_metrics

In [17]:
# Evaluate the models on the test set
data_test = []
for mdl in models:
    report_dict_test = test_evaluation(mdl, X_train, y_train, X_test, y_test)
    data_test.append(report_dict_test)
print('Evaluate the models on the test set: ')
df_report_test = pd.DataFrame(data_test, index=[str(mdl) for mdl in models])
df_report_test

Evaluate the models on the test set: 


Unnamed: 0,accuracy,balanced_accuracy,f1,precision,recall,roc_auc
RandomForestClassifier(),0.9475,0.9475,0.9476,0.95,0.9453,0.9475
GradientBoostingClassifier(),0.945,0.9451,0.9444,0.959,0.9303,0.9451
HistGradientBoostingClassifier(),0.96,0.96,0.96,0.9648,0.9552,0.96
"XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,\n colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,\n early_stopping_rounds=None, enable_categorical=False,\n eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',\n importance_type=None, interaction_constraints='',\n learning_rate=0.300000012, max_bin=256, max_cat_to_onehot=4,\n max_delta_step=0, max_depth=6, max_leaves=0, min_child_weight=1,\n missing=nan, monotone_constraints='()', n_estimators=100,\n n_jobs=0, num_parallel_tree=1, predictor='auto', random_state=0,\n reg_alpha=0, reg_lambda=1, ...)",0.9475,0.9475,0.9474,0.9545,0.9403,0.9475
LGBMClassifier(),0.9525,0.9525,0.9531,0.9461,0.9602,0.9525
<catboost.core.CatBoostClassifier object at 0x0000026DD34A5A20>,0.9575,0.9575,0.9578,0.9554,0.9602,0.9575
"MLPClassifier(max_iter=1000, solver='lbfgs')",0.9575,0.9576,0.9572,0.9694,0.9453,0.9576
