In [1]:
import pandas as pd
from interpret.glassbox import ExplainableBoostingClassifier
from interpret import show
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import precision_recall_fscore_support

In [2]:
# imports for onnx conversion and inference
import onnx
import ebm2onnx
import onnxruntime as rt
import numpy as np
import tempfile

# Binary classification

## Load dataset

In [3]:
df = pd.read_csv('titanic_train.csv')
df = df.dropna()
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
10,11,1,3,"Sandstrom, Miss. Marguerite Rut",female,4.0,1,1,PP 9549,16.7,G6,S
11,12,1,1,"Bonnell, Miss. Elizabeth",female,58.0,0,0,113783,26.55,C103,S


## Train model

In [10]:
feature_columns = ['Age', 'Fare', 'Pclass', 'Embarked']
label_column = "Survived"

y = df[[label_column]]
le = LabelEncoder()
y_enc = le.fit_transform(y)
x = df[feature_columns]
x_train, x_test, y_train, y_test = train_test_split(x, y_enc)
ebm = ExplainableBoostingClassifier(
    interactions=2,
    feature_types=['continuous', 'continuous', 'continuous','categorical']
)
ebm.fit(x_train, y_train)


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().



ExplainableBoostingClassifier(feature_names=['Age', 'Fare', 'Pclass',
                                             'Embarked', 'Age x Fare',
                                             'Age x Pclass'],
                              feature_types=['continuous', 'continuous',
                                             'continuous', 'categorical',
                                             'interaction', 'interaction'],
                              interactions=2)

In [11]:
# A lookup at the generated model
ebm_global = ebm.explain_global()
show(ebm_global)

## Convert model

In [12]:
onnx_model = ebm2onnx.to_onnx(
    model=ebm,
    dtype={
        'Age': 'double',
        'Fare': 'double',
        'Pclass': 'int',
    },
    name="ebm",
)

## Predict with EBM implementation

In [14]:
ebm_pred = ebm.predict(x_test)
pd.DataFrame(precision_recall_fscore_support(y_test, ebm_pred, average=None), index=['Precision', 'Recall', 'FScore', 'Support'])

Unnamed: 0,0,1
Precision,1.0,0.75
Recall,0.153846,1.0
FScore,0.266667,0.857143
Support,13.0,33.0


## Predict with ONNX Runtime

In [15]:
_, filename = tempfile.mkstemp()
onnx.save_model(onnx_model, filename)

sess = rt.InferenceSession(filename)
onnx_pred = sess.run(None, {
    'Age': x_test['Age'].values,
    'Fare': x_test['Fare'].values,
    'Pclass': x_test['Pclass'].values,
    'Embarked': x_test['Embarked'].values,
})

print("metrics of output {}:".format(sess.get_outputs()[0].name))
pd.DataFrame(precision_recall_fscore_support(y_test, onnx_pred[0], average=None), index=['Precision', 'Recall', 'FScore', 'Support'])

metrics of output predict_0:


Unnamed: 0,0,1
Precision,1.0,0.75
Recall,0.153846,1.0
FScore,0.266667,0.857143
Support,13.0,33.0
