# EBM benchmark with skorecard

This benchmark was adjusted from https://raw.githubusercontent.com/interpretml/interpret/master/benchmarks/EBM%20Classification%20Comparison.ipynb.



In [8]:
# To run benchmark script, you will need to install XGBoost 
# (pip install XGBoost)

import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
import warnings
warnings.filterwarnings("ignore")

def load_breast_data():
    breast = load_breast_cancer()
    feature_names = list(breast.feature_names)
    X, y = pd.DataFrame(breast.data, columns=feature_names), breast.target
    dataset = {
        'problem': 'classification',
        'full': {
            'X': X,
            'y': y,
        },
    }
    return dataset


def load_adult_data():
    df = pd.read_csv(
        "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data",
        header=None)
    df.columns = [
        "Age", "WorkClass", "fnlwgt", "Education", "EducationNum",
        "MaritalStatus", "Occupation", "Relationship", "Race", "Gender",
        "CapitalGain", "CapitalLoss", "HoursPerWeek", "NativeCountry", "Income"
    ]
    train_cols = df.columns[0:-1]
    label = df.columns[-1]
    X_df = df[train_cols]
    y_df = df[label]

    dataset = {
        'problem': 'classification',
        'full': {
            'X': X_df,
            'y': y_df,
        },
    }

    return dataset

def load_heart_data():
    # https://www.kaggle.com/ronitf/heart-disease-uci
    df = pd.read_csv(r'heart.csv')
    train_cols = df.columns[0:-1]
    label = df.columns[-1]
    X_df = df[train_cols]
    y_df = df[label]
    dataset = {
        'problem': 'classification',
        'full': {
            'X': X_df,
            'y': y_df,
        },
    }
    
    return dataset


def load_credit_data():
    # https://www.kaggle.com/mlg-ulb/creditcardfraud
    df = pd.read_csv(r'creditcard.csv')
    train_cols = df.columns[0:-1]
    label = df.columns[-1]
    X_df = df[train_cols]
    y_df = df[label]
    dataset = {
        'problem': 'classification',
        'full': {
            'X': X_df,
            'y': y_df,
        },
    }
    
    return dataset


def load_telco_churn_data():
    # https://www.kaggle.com/blastchar/telco-customer-churn
    df = pd.read_csv(r'WA_Fn-UseC_-Telco-Customer-Churn.csv')
    train_cols = df.columns[1:-1] # First column is an ID
    label = df.columns[-1]
    X_df = df[train_cols]
    y_df = df[label] # 'Yes, No'
    dataset = {
        'problem': 'classification',
        'full': {
            'X': X_df,
            'y': y_df,
        },
    }
    
    return dataset

In [9]:
from sklearn.preprocessing import OneHotEncoder, FunctionTransformer, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import StratifiedShuffleSplit, cross_validate
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import OrdinalEncoder

from xgboost import XGBClassifier

from sklearn.linear_model import LogisticRegression

from interpret.glassbox import ExplainableBoostingClassifier

from skorecard import Skorecard

from optbinning import BinningProcess
from optbinning import Scorecard


def format_n(x):
    return "{0:.3f}".format(x)

def process_model(clf, name, X, y, n_splits=3):
    # Evaluate model
    ss = StratifiedShuffleSplit(n_splits=n_splits, test_size=0.25, random_state=1337)
    scores = cross_validate(
        clf, X, y, scoring='roc_auc', cv=ss,
        n_jobs=-1, return_estimator=True
    )

    record = dict()
    record['model_name'] = name
    record['fit_time_mean'] = format_n(np.mean(scores['fit_time']))
    record['fit_time_std'] = format_n(np.std(scores['fit_time']))
    record['test_score_mean'] = format_n(np.mean(scores['test_score']))
    record['test_score_std'] = format_n(np.std(scores['test_score']))

    return record



def benchmark_models(dataset_name, X, y, ct=None, n_splits=3, random_state=1337):
    if ct is None:
        is_cat = np.array([dt.kind == 'O' for dt in X.dtypes])
        cat_cols = X.columns.values[is_cat]
        num_cols = X.columns.values[~is_cat]

        cat_ohe_step = ('ohe', OneHotEncoder(sparse=False,
                                             handle_unknown='ignore'))

        cat_pipe = Pipeline([cat_ohe_step])
        num_pipe = Pipeline([('identity', FunctionTransformer())])
        transformers = [
            ('cat', cat_pipe, cat_cols),
            ('num', num_pipe, num_cols)
        ]
        ct = ColumnTransformer(transformers=transformers)

        cat_ord_step = ('ord_enc', OrdinalEncoder())
        cat_pipe = Pipeline([cat_ord_step])
        transformers = [
            ('cat', cat_pipe, cat_cols),
            ('num', num_pipe, num_cols)
        ]
        ot = ColumnTransformer(transformers=transformers)

    records = []

    summary_record = {}
    summary_record['dataset_name'] = dataset_name
    print()
    print('-' * 78)
    print(dataset_name)
    print('-' * 78)
    print(summary_record)
    print()

    pipe = Pipeline([
        ('ct', ct),
        ('std', StandardScaler()),
        ('lr', LogisticRegression(random_state=random_state)),
    ])
    record = process_model(pipe, 'lr_ohe', X, y, n_splits=n_splits)
    print(record)
    record.update(summary_record)
    records.append(record)

    pipe = Pipeline([
        ('ot', ot),
        ('std', StandardScaler()),
        ('lr', LogisticRegression(max_iter=7000, random_state=random_state)),
    ])
    record = process_model(pipe, 'lr_ordinal', X, y, n_splits=n_splits)
    print(record)
    record.update(summary_record)
    records.append(record)

    # Scorecard
    num_features = list(X._get_numeric_data().columns)
    cat_features = [f for f in X.columns if f not in num_features]
    scorecard = Scorecard(
        binning_process=BinningProcess(
            variable_names=num_features+cat_features,
            categorical_variables=cat_features,
            selection_criteria= {"iv": {"min": 0.02, "max": 1},"quality_score": {"min": 0.01}}),
        estimator=LogisticRegression(solver="lbfgs"),
        scaling_method="min_max",
        scaling_method_params={"min": 300, "max": 850}
    )
    record = process_model(scorecard, 'optbinning.scorecard', X, y, n_splits=n_splits)
    print(record)
    record.update(summary_record)
    records.append(record)

    # Skorecard
    skorecard = Skorecard()
    record = process_model(skorecard, 'skorecard', X, y, n_splits=n_splits)
    print(record)
    record.update(summary_record)
    records.append(record)

    pipe = Pipeline([
        ('ct', ct),
        # n_estimators updated from 10 to 100 due to sci-kit defaults changing in future versions
        ('rf-100', RandomForestClassifier(n_estimators=100, n_jobs=-1, random_state=random_state)),
    ])
    record = process_model(pipe, 'rf-100', X, y, n_splits=n_splits)
    print(record)
    record.update(summary_record)
    records.append(record)
    
    pipe = Pipeline([
        ('ct', ct),
        ('xgb', XGBClassifier(random_state=random_state, eval_metric='logloss')),
    ])
    record = process_model(pipe, 'xgb', X, y, n_splits=n_splits)
    print(record)
    record.update(summary_record)
    records.append(record)

    # No pipeline needed due to EBM handling string datatypes
    ebm_inter = ExplainableBoostingClassifier(n_jobs=-1, random_state=random_state)
    record = process_model(ebm_inter, 'ebm', X, y, n_splits=n_splits)
    print(record)
    record.update(summary_record)
    records.append(record)

    return records

In [10]:
results = []
n_splits = 3

In [11]:
from skorecard.datasets import load_uci_credit_card

X,y = load_uci_credit_card(return_X_y=True)
result = benchmark_models('UCI-creditcard', X, y, n_splits=n_splits)
results.append(result)


------------------------------------------------------------------------------
UCI-creditcard
------------------------------------------------------------------------------
{'dataset_name': 'UCI-creditcard'}

{'model_name': 'lr_ohe', 'fit_time_mean': '0.009', 'fit_time_std': '0.001', 'test_score_mean': '0.621', 'test_score_std': '0.023'}
{'model_name': 'lr_ordinal', 'fit_time_mean': '0.008', 'fit_time_std': '0.000', 'test_score_mean': '0.621', 'test_score_std': '0.023'}


Traceback (most recent call last):
  File "/Users/ue86yw/Developer/miniconda3/envs/py38/lib/python3.8/site-packages/sklearn/metrics/_scorer.py", line 334, in _score
    y_pred = method_caller(clf, "decision_function", X)
  File "/Users/ue86yw/Developer/miniconda3/envs/py38/lib/python3.8/site-packages/sklearn/metrics/_scorer.py", line 53, in _cached_call
    return getattr(estimator, method)(*args, **kwargs)
AttributeError: 'Scorecard' object has no attribute 'decision_function'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/ue86yw/Developer/miniconda3/envs/py38/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 674, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/Users/ue86yw/Developer/miniconda3/envs/py38/lib/python3.8/site-packages/sklearn/metrics/_scorer.py", line 199, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true,
  File "/Users/ue

{'model_name': 'optbinning.scorecard', 'fit_time_mean': '0.265', 'fit_time_std': '0.007', 'test_score_mean': 'nan', 'test_score_std': 'nan'}
{'model_name': 'skorecard', 'fit_time_mean': '1.042', 'fit_time_std': '0.235', 'test_score_mean': '0.627', 'test_score_std': '0.018'}
{'model_name': 'rf-100', 'fit_time_mean': '0.344', 'fit_time_std': '0.005', 'test_score_mean': '0.588', 'test_score_std': '0.013'}




{'model_name': 'xgb', 'fit_time_mean': '0.588', 'fit_time_std': '0.002', 'test_score_mean': '0.596', 'test_score_std': '0.005'}


  n_jobs = self._backend.configure(n_jobs=self.n_jobs, parallel=self,
  n_jobs = self._backend.configure(n_jobs=self.n_jobs, parallel=self,
  n_jobs = self._backend.configure(n_jobs=self.n_jobs, parallel=self,


{'model_name': 'ebm', 'fit_time_mean': '1.141', 'fit_time_std': '0.094', 'test_score_mean': '0.644', 'test_score_std': '0.012'}


In [12]:
dataset = load_breast_data()
result = benchmark_models('breast-cancer', dataset['full']['X'], dataset['full']['y'], n_splits=n_splits)
results.append(result)


------------------------------------------------------------------------------
breast-cancer
------------------------------------------------------------------------------
{'dataset_name': 'breast-cancer'}

{'model_name': 'lr_ohe', 'fit_time_mean': '0.012', 'fit_time_std': '0.001', 'test_score_mean': '0.994', 'test_score_std': '0.006'}
{'model_name': 'lr_ordinal', 'fit_time_mean': '0.011', 'fit_time_std': '0.000', 'test_score_mean': '0.994', 'test_score_std': '0.006'}


Traceback (most recent call last):
  File "/Users/ue86yw/Developer/miniconda3/envs/py38/lib/python3.8/site-packages/sklearn/metrics/_scorer.py", line 334, in _score
    y_pred = method_caller(clf, "decision_function", X)
  File "/Users/ue86yw/Developer/miniconda3/envs/py38/lib/python3.8/site-packages/sklearn/metrics/_scorer.py", line 53, in _cached_call
    return getattr(estimator, method)(*args, **kwargs)
AttributeError: 'Scorecard' object has no attribute 'decision_function'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/ue86yw/Developer/miniconda3/envs/py38/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 674, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/Users/ue86yw/Developer/miniconda3/envs/py38/lib/python3.8/site-packages/sklearn/metrics/_scorer.py", line 199, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true,
  File "/Users/ue

{'model_name': 'optbinning.scorecard', 'fit_time_mean': '2.184', 'fit_time_std': '0.027', 'test_score_mean': 'nan', 'test_score_std': 'nan'}
{'model_name': 'skorecard', 'fit_time_mean': '9.318', 'fit_time_std': '0.915', 'test_score_mean': '0.996', 'test_score_std': '0.004'}
{'model_name': 'rf-100', 'fit_time_mean': '0.159', 'fit_time_std': '0.001', 'test_score_mean': '0.992', 'test_score_std': '0.009'}




{'model_name': 'xgb', 'fit_time_mean': '0.185', 'fit_time_std': '0.026', 'test_score_mean': '0.992', 'test_score_std': '0.010'}


  n_jobs = self._backend.configure(n_jobs=self.n_jobs, parallel=self,
  n_jobs = self._backend.configure(n_jobs=self.n_jobs, parallel=self,
  n_jobs = self._backend.configure(n_jobs=self.n_jobs, parallel=self,


{'model_name': 'ebm', 'fit_time_mean': '3.863', 'fit_time_std': '1.145', 'test_score_mean': '0.995', 'test_score_std': '0.006'}


In [13]:
dataset = load_adult_data()
result = benchmark_models('adult', dataset['full']['X'], dataset['full']['y'], n_splits=n_splits)
results.append(result)
# 0.888


------------------------------------------------------------------------------
adult
------------------------------------------------------------------------------
{'dataset_name': 'adult'}

{'model_name': 'lr_ohe', 'fit_time_mean': '0.909', 'fit_time_std': '0.030', 'test_score_mean': '0.906', 'test_score_std': '0.003'}
{'model_name': 'lr_ordinal', 'fit_time_mean': '0.136', 'fit_time_std': '0.000', 'test_score_mean': '0.855', 'test_score_std': '0.002'}


Traceback (most recent call last):
  File "/Users/ue86yw/Developer/miniconda3/envs/py38/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/ue86yw/Developer/miniconda3/envs/py38/lib/python3.8/site-packages/optbinning/scorecard/scorecard.py", line 284, in fit
    return self._fit(X, y, metric_special, metric_missing, show_digits,
  File "/Users/ue86yw/Developer/miniconda3/envs/py38/lib/python3.8/site-packages/optbinning/scorecard/scorecard.py", line 503, in _fit
    X_t = self.binning_process_.fit_transform(
  File "/Users/ue86yw/Developer/miniconda3/envs/py38/lib/python3.8/site-packages/optbinning/binning/binning_process.py", line 676, in fit_transform
    return self.fit(X, y, check_input).transform(X, metric, metric_special,
  File "/Users/ue86yw/Developer/miniconda3/envs/py38/lib/python3.8/site-packages/optbinning/binning/binning_process.py", line 589, in fit
    return self

{'model_name': 'optbinning.scorecard', 'fit_time_mean': '0.184', 'fit_time_std': '0.003', 'test_score_mean': 'nan', 'test_score_std': 'nan'}
{'model_name': 'skorecard', 'fit_time_mean': '2.831', 'fit_time_std': '0.026', 'test_score_mean': '0.888', 'test_score_std': '0.004'}
{'model_name': 'rf-100', 'fit_time_mean': '2.224', 'fit_time_std': '0.013', 'test_score_mean': '0.903', 'test_score_std': '0.002'}




{'model_name': 'xgb', 'fit_time_mean': '7.538', 'fit_time_std': '0.089', 'test_score_mean': '0.927', 'test_score_std': '0.001'}


  n_jobs = self._backend.configure(n_jobs=self.n_jobs, parallel=self,
  n_jobs = self._backend.configure(n_jobs=self.n_jobs, parallel=self,
  n_jobs = self._backend.configure(n_jobs=self.n_jobs, parallel=self,


{'model_name': 'ebm', 'fit_time_mean': '49.284', 'fit_time_std': '2.831', 'test_score_mean': '0.928', 'test_score_std': '0.002'}


In [14]:
dataset = load_telco_churn_data()
result = benchmark_models('telco_churn', dataset['full']['X'], dataset['full']['y'], n_splits=n_splits)
results.append(result)


------------------------------------------------------------------------------
telco_churn
------------------------------------------------------------------------------
{'dataset_name': 'telco_churn'}



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

{'model_name': 'lr_ohe', 'fit_time_mean': '9.855', 'fit_time_std': '0.033', 'test_score_mean': '0.809', 'test_score_std': '0.014'}
{'model_name': 'lr_ordinal', 'fit_time_mean': '0.078', 'fit_time_std': '0.001', 'test_score_mean': 'nan', 'test_score_std': 'nan'}


Traceback (most recent call last):
  File "/Users/ue86yw/Developer/miniconda3/envs/py38/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 674, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/Users/ue86yw/Developer/miniconda3/envs/py38/lib/python3.8/site-packages/sklearn/metrics/_scorer.py", line 199, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true,
  File "/Users/ue86yw/Developer/miniconda3/envs/py38/lib/python3.8/site-packages/sklearn/metrics/_scorer.py", line 334, in _score
    y_pred = method_caller(clf, "decision_function", X)
  File "/Users/ue86yw/Developer/miniconda3/envs/py38/lib/python3.8/site-packages/sklearn/metrics/_scorer.py", line 53, in _cached_call
    return getattr(estimator, method)(*args, **kwargs)
  File "/Users/ue86yw/Developer/miniconda3/envs/py38/lib/python3.8/site-packages/sklearn/utils/metaestimators.py", line 120, in <lambda>
    out = lambda *args, **kwargs: self.fn(obj, *args, **k

{'model_name': 'optbinning.scorecard', 'fit_time_mean': '0.028', 'fit_time_std': '0.001', 'test_score_mean': 'nan', 'test_score_std': 'nan'}
{'model_name': 'skorecard', 'fit_time_mean': '3.390', 'fit_time_std': '0.059', 'test_score_mean': '0.764', 'test_score_std': '0.014'}
{'model_name': 'rf-100', 'fit_time_mean': '7.481', 'fit_time_std': '0.009', 'test_score_mean': '0.824', 'test_score_std': '0.002'}




{'model_name': 'xgb', 'fit_time_mean': '66.219', 'fit_time_std': '0.255', 'test_score_mean': '0.825', 'test_score_std': '0.003'}


  n_jobs = self._backend.configure(n_jobs=self.n_jobs, parallel=self,
  n_jobs = self._backend.configure(n_jobs=self.n_jobs, parallel=self,
  n_jobs = self._backend.configure(n_jobs=self.n_jobs, parallel=self,


{'model_name': 'ebm', 'fit_time_mean': '53.015', 'fit_time_std': '7.524', 'test_score_mean': '0.852', 'test_score_std': '0.004'}


In [15]:
dataset = load_heart_data()
result = benchmark_models('heart', dataset['full']['X'], dataset['full']['y'], n_splits=n_splits)
results.append(result)


------------------------------------------------------------------------------
heart
------------------------------------------------------------------------------
{'dataset_name': 'heart'}

{'model_name': 'lr_ohe', 'fit_time_mean': '0.007', 'fit_time_std': '0.000', 'test_score_mean': '0.895', 'test_score_std': '0.030'}
{'model_name': 'lr_ordinal', 'fit_time_mean': '0.007', 'fit_time_std': '0.000', 'test_score_mean': '0.895', 'test_score_std': '0.030'}


Traceback (most recent call last):
  File "/Users/ue86yw/Developer/miniconda3/envs/py38/lib/python3.8/site-packages/sklearn/metrics/_scorer.py", line 334, in _score
    y_pred = method_caller(clf, "decision_function", X)
  File "/Users/ue86yw/Developer/miniconda3/envs/py38/lib/python3.8/site-packages/sklearn/metrics/_scorer.py", line 53, in _cached_call
    return getattr(estimator, method)(*args, **kwargs)
AttributeError: 'Scorecard' object has no attribute 'decision_function'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/ue86yw/Developer/miniconda3/envs/py38/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 674, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/Users/ue86yw/Developer/miniconda3/envs/py38/lib/python3.8/site-packages/sklearn/metrics/_scorer.py", line 199, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true,
  File "/Users/ue

{'model_name': 'optbinning.scorecard', 'fit_time_mean': '0.561', 'fit_time_std': '0.045', 'test_score_mean': 'nan', 'test_score_std': 'nan'}
{'model_name': 'skorecard', 'fit_time_mean': '1.955', 'fit_time_std': '0.003', 'test_score_mean': '0.911', 'test_score_std': '0.015'}
{'model_name': 'rf-100', 'fit_time_mean': '0.177', 'fit_time_std': '0.002', 'test_score_mean': '0.890', 'test_score_std': '0.008'}




{'model_name': 'xgb', 'fit_time_mean': '0.239', 'fit_time_std': '0.044', 'test_score_mean': '0.851', 'test_score_std': '0.018'}


  n_jobs = self._backend.configure(n_jobs=self.n_jobs, parallel=self,
  n_jobs = self._backend.configure(n_jobs=self.n_jobs, parallel=self,
  n_jobs = self._backend.configure(n_jobs=self.n_jobs, parallel=self,


{'model_name': 'ebm', 'fit_time_mean': '1.147', 'fit_time_std': '0.327', 'test_score_mean': '0.906', 'test_score_std': '0.011'}


In [16]:
records = [item for result in results for item in result]
record_df = pd.DataFrame.from_records(records)[['dataset_name', 'model_name', 'test_score_mean', 'test_score_std']]

In [21]:
record_df = record_df.sort_values(['dataset_name','test_score_mean'], ascending=False)

In [26]:
print(record_df[record_df['model_name'].isin(['lr_ohe','lr_ordinal','rf-100','skorecard','xgb'])].drop(['test_score_std'],axis=1).to_markdown(tablefmt="github",showindex=False))

| dataset_name   | model_name   |   test_score_mean |
|----------------|--------------|-------------------|
| telco_churn    | lr_ordinal   |           nan     |
| telco_churn    | xgb          |             0.825 |
| telco_churn    | rf-100       |             0.824 |
| telco_churn    | lr_ohe       |             0.809 |
| telco_churn    | skorecard    |             0.764 |
| heart          | skorecard    |             0.911 |
| heart          | lr_ohe       |             0.895 |
| heart          | lr_ordinal   |             0.895 |
| heart          | rf-100       |             0.89  |
| heart          | xgb          |             0.851 |
| breast-cancer  | skorecard    |             0.996 |
| breast-cancer  | lr_ohe       |             0.994 |
| breast-cancer  | lr_ordinal   |             0.994 |
| breast-cancer  | rf-100       |             0.992 |
| breast-cancer  | xgb          |             0.992 |
| adult          | xgb          |             0.927 |
| adult          | lr_ohe   