In [90]:
import dalex as dx
from sklearn.tree import DecisionTreeClassifier
from interpret.glassbox import ExplainableBoostingClassifier
from interpret.glassbox import DecisionListClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB


clfs = {
    "CART": DecisionTreeClassifier(random_state=1234),
    "EBM": ExplainableBoostingClassifier(),
    "LR_l2": LogisticRegression(penalty="l2",random_state=1234),
    "GNB": GaussianNB(),
    "LR": LogisticRegression(penalty="none", random_state=1234),
    "DL": DecisionListClassifier(random_state=1234) 
}

dataset = 'heart'

In [91]:
import worstcase_helper
import importlib
importlib.reload(worstcase_helper)

preprocess, X, y = worstcase_helper.load_dataset_with_preprocess("CART", dataset)

In [92]:
preprocess

ColumnTransformer(transformers=[('onehotencoder',
                                 OneHotEncoder(handle_unknown='ignore',
                                               sparse=False),
                                 ['Sex', 'cp', 'Place']),
                                ('standardscaler', StandardScaler(),
                                 ['Age', 'trestbps', 'chol', 'fbs', 'restecg',
                                  'thalach', 'exang', 'oldpeak'])])

## Make Pipelines

In [93]:
from sklearn.pipeline import make_pipeline
def make_pipeline_clf(clf_name):
    clf = make_pipeline(
        preprocess,
        clfs[clf_name]
    )
    return clf

In [94]:
clf_cart = make_pipeline_clf("CART")
clf_cart.fit(X, y)

clf_ebm = make_pipeline_clf("EBM")
clf_ebm.fit(X, y)

clf_lr_l2 = make_pipeline_clf("LR_l2")
clf_lr_l2.fit(X, y)

clf_gnb = make_pipeline_clf("GNB")
clf_gnb.fit(X, y)

clf_lr = make_pipeline_clf("LR")
clf_lr.fit(X, y)

clf_dl = make_pipeline_clf("DL")
clf_dl.fit(X, y)


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().



Pipeline(steps=[('columntransformer',
                 ColumnTransformer(transformers=[('onehotencoder',
                                                  OneHotEncoder(handle_unknown='ignore',
                                                                sparse=False),
                                                  ['Sex', 'cp', 'Place']),
                                                 ('standardscaler',
                                                  StandardScaler(),
                                                  ['Age', 'trestbps', 'chol',
                                                   'fbs', 'restecg', 'thalach',
                                                   'exang', 'oldpeak'])])),
                ('decisionlistclassifier',
                 <interpret.glassbox.skoperules.DecisionListClassifier object at 0x7f16dc05b790>)])

## Add Explainers

In [95]:
clf_cart_exp = dx.Explainer(clf_cart, X, y, label="CART")
clf_ebm_exp = dx.Explainer(clf_ebm, X, y, label="EBM")
clf_lr_l2_exp = dx.Explainer(clf_lr_l2, X, y, label="LR_l2")
clf_gnb_exp = dx.Explainer(clf_gnb, X, y, label="GNB")
clf_lr_exp = dx.Explainer(clf_lr, X, y, label="LR")
clf_dl_exp = dx.Explainer(clf_dl, X, y, label="DL")

Preparation of a new explainer is initiated

  -> data              : 457 rows 11 cols
  -> target variable   : Parameter 'y' was a pandas.DataFrame. Converted to a numpy.ndarray.
  -> target variable   : 457 values
  -> model_class       : sklearn.tree._classes.DecisionTreeClassifier (default)
  -> label             : CART
  -> predict function  : <function yhat_proba_default at 0x7f16e2023dc0> will be used (default)
  -> predict function  : Accepts only pandas.DataFrame, numpy.ndarray causes problems.
  -> predicted values  : min = 0.0, mean = 0.46, max = 1.0
  -> model type        : classification will be used (default)
  -> residual function : difference between y and yhat (default)
  -> residuals         : min = 0.0, mean = 0.0, max = 0.0
  -> model_info        : package sklearn

A new explainer has been created!
Preparation of a new explainer is initiated

  -> data              : 457 rows 11 cols
  -> target variable   : Parameter 'y' was a pandas.DataFrame. Converted to a numpy

## Model partial dependence profiles

In [96]:
cat_feat = ['Sex', 'cp', 'Place']
cont_feat = ['Age', 'trestbps', 'chol',
             'fbs', 'restecg', 'thalach',
             'exang', 'oldpeak']

In [97]:
#categorical pd profiles
pd_cart_cat = clf_cart_exp.model_profile(variable_type="categorical", variables=["Sex"])

# pd_ebm_cat = clf_ebm_exp.model_profile(variables= cat_feat,
#                                          variable_type="categorical")

# pd_lr_l2_cat = clf_lr_l2_exp.model_profile(variables= cat_feat,
#                                          variable_type="categorical")

# pd_gnb_cat = clf_gnb_exp.model_profile(variables= cat_feat,
#                                          variable_type="categorical")

# pd_lr_cat = clf_lr_exp.model_profile(variables= cat_feat,
#                                          variable_type="categorical")

# pd_dl_cat = clf_dl_exp.model_profile(variables= cat_feat,
#                                          variable_type="categorical")


Calculating ceteris paribus: 100%|██████████| 1/1 [00:00<00:00, 38.02it/s]


In [98]:
#continous pd profiles
pd_cart = clf_cart_exp.model_profile(variables= cont_feat)

pd_ebm = clf_ebm_exp.model_profile(variables= cont_feat)

pd_lr_l2 = clf_lr_l2_exp.model_profile(variables= cont_feat)

pd_gnb = clf_gnb_exp.model_profile(variables= cont_feat)

pd_lr = clf_lr_exp.model_profile(variables= cont_feat)

pd_dl = clf_dl_exp.model_profile(variables= cont_feat)

Calculating ceteris paribus: 100%|██████████| 8/8 [00:00<00:00, 11.98it/s]
Calculating ceteris paribus: 100%|██████████| 8/8 [00:07<00:00,  1.05it/s]
Calculating ceteris paribus: 100%|██████████| 8/8 [00:00<00:00, 11.58it/s]
Calculating ceteris paribus: 100%|██████████| 8/8 [00:00<00:00, 11.97it/s]
Calculating ceteris paribus: 100%|██████████| 8/8 [00:01<00:00,  7.63it/s]
Calculating ceteris paribus: 100%|██████████| 8/8 [00:01<00:00,  4.01it/s]


### Plot Aggregated Profiles for Categorical

In [99]:
pd_cart_cat.plot(variables=["Sex"])

In [100]:
pd_cart_cat.plot(variable_type="categorical",variables=['Sex'])

TypeError: plot() got an unexpected keyword argument 'variable_type'

### Plot Aggregated Profiles for Continous

In [None]:
pd_cart