In [1]:
import os
from pathlib import Path

from joblib import load
from sklearn.datasets._base import Bunch

In [2]:
MODEL_FOLDER = Path(os.path.abspath(os.path.curdir)) / "MODELS"

In [3]:
# Gather all the model persistence files (.joblib) and sort them alphabetically
model_files_names = sorted(filter(lambda f: f.endswith(".joblib"), os.listdir(MODEL_FOLDER)))

NEAR_MISS_KEY = "under_sampling_near_miss"
SMOTE_KEY = "over_sampling_smote"

model_files_per_sampler = {
    NEAR_MISS_KEY: [],
    SMOTE_KEY: [],
    }

for model_file_name in model_files_names:
    if NEAR_MISS_KEY in model_file_name:
        model_files_per_sampler[NEAR_MISS_KEY].append(MODEL_FOLDER / model_file_name)
    elif SMOTE_KEY in model_file_name:
        model_files_per_sampler[SMOTE_KEY].append(MODEL_FOLDER / model_file_name)
    else:
        continue  # discard the file

In [4]:
for sampler in model_files_per_sampler:
    print(f"FIT Time (Avg += Std) for {sampler}")
    for model_file in model_files_per_sampler[sampler]:
        gs_model = load(model_file)
        cv_results = Bunch(**gs_model.cv_results_)
        avg_time = cv_results.mean_fit_time[gs_model.best_index_]
        std_time = cv_results.std_fit_time[gs_model.best_index_]
        model_cls_name = str(gs_model.best_estimator_.named_steps["model"].__class__)
        model_name = model_cls_name.split(".")[-1].split("'")[0]  # very very bodged but it works!
        print(f"\t{model_name}: {avg_time} +- {std_time}")

FIT Time (Avg += Std) for under_sampling_near_miss
	DecisionTreeClassifier: 0.3131646919250488 +- 0.02634543162487452
	RandomForestClassifier: 0.6344326686859131 +- 0.08920453610391588
FIT Time (Avg += Std) for over_sampling_smote
	DecisionTreeClassifier: 15.968973932266236 +- 1.349229143942705
	RandomForestClassifier: 102.11997676849366 +- 3.3096473230205175
