This notebook shows some examples of fitting different models to classification/regression datasets. We start by loading some classifiers / regressors from `imodels`.

In [44]:
%load_ext autoreload
%autoreload 2
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import plot_tree, DecisionTreeClassifier
from sklearn import metrics

# installable with: `pip install imodels`
import imodels
np.random.seed(13)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Let's start by loading some data in...

In [45]:
X, y, feature_names = imodels.get_clean_dataset("heart")

print("shapes", X.shape, y.shape, "nunique", np.unique(y).size)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=42, test_size=0.2
)

m = imodels.AutoInterpretableClassifier()
m.fit(X_train, y_train)

print("best params", m.est_.best_params_)
print("best score", m.est_.best_score_)
print("best estimator", m.est_.best_estimator_)
print("best estimator params", m.est_.best_estimator_.get_params())

shapes (270, 15) (270,) nunique 2
best params {'est': LogisticRegression(C=0.1), 'est__C': 0.1, 'est__penalty': 'l2'}
best score 0.9041292106586225
best estimator Pipeline(steps=[('est', LogisticRegression(C=0.1))])
best estimator params {'memory': None, 'steps': [('est', LogisticRegression(C=0.1))], 'verbose': False, 'est': LogisticRegression(C=0.1), 'est__C': 0.1, 'est__class_weight': None, 'est__dual': False, 'est__fit_intercept': True, 'est__intercept_scaling': 1, 'est__l1_ratio': None, 'est__max_iter': 100, 'est__multi_class': 'auto', 'est__n_jobs': None, 'est__penalty': 'l2', 'est__random_state': None, 'est__solver': 'lbfgs', 'est__tol': 0.0001, 'est__verbose': 0, 'est__warm_start': False}


30 fits failed out of a total of 100.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
15 fits failed with the following error:
Traceback (most recent call last):
  File "/home/chansingh/imodelsx/.venv/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 732, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/chansingh/imodelsx/.venv/lib/python3.11/site-packages/sklearn/base.py", line 1151, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/chansingh/imodelsx/.venv/lib/python3.11/site-packages/sklearn/pipeline.py", line 420, in fit
    self._final_estimator.fit(Xt, y, **fit_params_last_step)
  File "/home/chansing

In [43]:
df = pd.DataFrame(m.est_.cv_results_).sort_values("rank_test_score")
first_cols = ["rank_test_score", "mean_test_score", "std_test_score"]
df = df[first_cols + [c for c in df.columns if c not in first_cols]].round(3)
# remove std_ cols
df = df[[c for c in df.columns if "std_" not in c]]
df

Unnamed: 0,rank_test_score,mean_test_score,mean_fit_time,mean_score_time,param_est,param_est__max_leaf_nodes,param_est__C,param_est__max_rules,param_est__n_estimators,param_est__n_boosting_rounds,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score
3,1,0.904,0.002,0.001,LogisticRegression(C=0.1),,0.1,,,,"{'est': LogisticRegression(C=0.1), 'est__C': 0.1}",0.871,0.88,0.971,0.917,0.882
8,2,0.902,0.126,0.011,TreeGAMClassifier(),,,,,10.0,"{'est': TreeGAMClassifier(), 'est__n_boosting_...",0.885,0.88,0.951,0.91,0.884
9,3,0.895,1.939,0.055,TreeGAMClassifier(),,,,,100.0,"{'est': TreeGAMClassifier(), 'est__n_boosting_...",0.868,0.894,0.942,0.905,0.866
4,4,0.895,0.003,0.001,LogisticRegression(C=0.1),,1.0,,,,"{'est': LogisticRegression(C=0.1), 'est__C': 1}",0.858,0.878,0.958,0.9,0.88
5,5,0.879,0.003,0.001,LogisticRegression(C=0.1),,10.0,,,,"{'est': LogisticRegression(C=0.1), 'est__C': 10}",0.852,0.873,0.938,0.855,0.875
6,6,0.877,0.371,0.015,RuleFitClassifier(max_rules=30),,,10.0,20.0,,"{'est': RuleFitClassifier(max_rules=30), 'est_...",0.825,0.857,0.962,0.88,0.86
12,7,0.845,0.013,0.001,FIGSClassifier(max_rules=12),,,5.0,,,"{'est': FIGSClassifier(max_rules=12), 'est__ma...",0.792,0.868,0.954,0.833,0.778
13,8,0.806,0.032,0.001,FIGSClassifier(max_rules=12),,,10.0,,,"{'est': FIGSClassifier(max_rules=12), 'est__ma...",0.76,0.716,0.95,0.85,0.755
1,9,0.798,0.001,0.001,DecisionTreeClassifier(),5.0,,,,,"{'est': DecisionTreeClassifier(), 'est__max_le...",0.708,0.821,0.919,0.8,0.74
7,10,0.769,3.917,0.087,RuleFitClassifier(max_rules=30),,,100.0,20.0,,"{'est': RuleFitClassifier(max_rules=30), 'est_...",0.879,0.667,0.909,0.73,0.662
