In [1]:
import pandas as pd
import numpy as np
import interpret

Read the data

In [2]:
X_train = pd.read_csv('data/X_train.csv')
y_train = pd.read_csv('data/y_train.csv').to_numpy().ravel()

X_test = pd.read_csv('data/X_test.csv')
y_test = pd.read_csv('data/y_test.csv').to_numpy().ravel()

In [3]:
from interpret.glassbox import ExplainableBoostingClassifier
from interpret import show

ebm = ExplainableBoostingClassifier(interactions=0)
ebm.fit(X_train, y_train)

Explain global

In [4]:
ebm_global = ebm.explain_global()
show(ebm_global)

EBM class use 'ebm.term_scores' to create the graph. Each term has a list of scores, each one representing a bin of the variable (or term). In the case of glucose, a low (0-120) value would mean a negative score, suggesting no-diabetes prediction; a high score (120-180) would likely mean that the person has diabetes.

The number of scores in the list is determined by the bins of the variable. And the number of bins depend on the distribution of the variable and the discretization algorithm used. For example, in the case of glucose would be the following bins:

In [5]:
print("Number of bins in glucose: ", len(ebm.bins_[1][0]))
ebm.bins_[1][0]

Number of bins in glucose:  127


array([ 22. ,  50. ,  56.5,  62. ,  67.5,  69.5,  72. ,  73.5,  74.5,
        75.5,  76.5,  77.5,  78.5,  79.5,  80.5,  81.5,  82.5,  83.5,
        84.5,  85.5,  86.5,  87.5,  88.5,  89.5,  90.5,  91.5,  92.5,
        93.5,  94.5,  95.5,  96.5,  97.5,  98.5,  99.5, 100.5, 101.5,
       102.5, 103.5, 104.5, 105.5, 106.5, 107.5, 108.5, 109.5, 110.5,
       111.5, 112.5, 113.5, 114.5, 115.5, 116.5, 117.5, 118.5, 119.5,
       120.5, 121.5, 122.5, 123.5, 124.5, 125.5, 126.5, 127.5, 128.5,
       129.5, 130.5, 131.5, 132.5, 133.5, 134.5, 135.5, 136.5, 137.5,
       138.5, 139.5, 140.5, 141.5, 142.5, 143.5, 144.5, 145.5, 146.5,
       147.5, 148.5, 149.5, 150.5, 151.5, 152.5, 153.5, 154.5, 155.5,
       156.5, 157.5, 158.5, 159.5, 160.5, 161.5, 162.5, 163.5, 164.5,
       165.5, 166.5, 167.5, 168.5, 169.5, 170.5, 172. , 173.5, 174.5,
       175.5, 176.5, 177.5, 178.5, 179.5, 180.5, 182. , 183.5, 185. ,
       186.5, 187.5, 188.5, 189.5, 190.5, 192.5, 194.5, 195.5, 196.5,
       197.5])

And the constant value associated to each bin:

In [6]:
ebm.term_scores_[1]

array([ 0.        , -0.63099025, -0.96340995, -1.07419818, -1.08696046,
       -1.09622566, -1.10274483, -1.10289811, -1.10208151, -1.09683749,
       -1.0895702 , -1.08023566, -1.06673348, -1.05791202, -1.05088763,
       -1.04238371, -1.03588866, -1.02504243, -1.01327663, -1.00053769,
       -0.99254221, -0.9866094 , -0.97700863, -0.96352772, -0.95094951,
       -0.87290783, -0.85902529, -0.82093015, -0.80191835, -0.79039522,
       -0.73511625, -0.74355387, -0.72844433, -0.72850191, -0.70666284,
       -0.58605972, -0.55097082, -0.48845591, -0.47596127, -0.43548435,
       -0.44000901, -0.42161198, -0.27850545, -0.26367686, -0.25517668,
       -0.24383747, -0.22785207, -0.2121048 , -0.20165836, -0.19118097,
       -0.17604766, -0.16344234, -0.15081739, -0.1416803 , -0.12985512,
       -0.120627  , -0.09882757, -0.08985672, -0.09918228,  0.11258764,
        0.11835203,  0.09493013,  0.10516306,  0.12794073,  0.14430878,
        0.34647529,  0.36365193,  0.37696104,  0.39750485,  0.40

So this means that we can modify the value on each bin to visualize, for example, the weights of another model:

#### 1. Constant score

In [7]:
ebm.term_scores_[1] = np.ones(ebm.term_scores_[1].shape)

In [8]:
ebm.term_scores_[1]

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [9]:
ebm_global = ebm.explain_global()
show(ebm_global)

#### 2. Random scores

In [10]:
ebm.term_scores_[1] = np.random.rand(ebm.term_scores_[1].shape[0])

In [11]:
ebm.term_scores_[1]

array([0.18342473, 0.76233241, 0.56980602, 0.00765227, 0.30560962,
       0.70176711, 0.12378445, 0.14867272, 0.93948666, 0.78043207,
       0.96846679, 0.42258515, 0.28923432, 0.56345206, 0.19885715,
       0.97923451, 0.37668653, 0.0774525 , 0.31634093, 0.08541521,
       0.34701232, 0.43113668, 0.91336471, 0.98200716, 0.24825578,
       0.43148267, 0.38367878, 0.62129085, 0.53179358, 0.50298732,
       0.44591625, 0.37331837, 0.08289386, 0.56736462, 0.5007862 ,
       0.81070445, 0.55678201, 0.17616516, 0.73242227, 0.3985093 ,
       0.89245496, 0.12713812, 0.10716994, 0.63086086, 0.54260012,
       0.7859226 , 0.72886602, 0.96955017, 0.54435852, 0.20174199,
       0.57108359, 0.44216475, 0.53838268, 0.03577948, 0.50438323,
       0.13601878, 0.49202572, 0.82708388, 0.44039912, 0.7482375 ,
       0.58712075, 0.83940528, 0.12863823, 0.25859702, 0.58253875,
       0.32253435, 0.86551935, 0.74541791, 0.13262893, 0.69747836,
       0.29591096, 0.32929129, 0.88018727, 0.68122358, 0.97746

In [12]:
ebm_global = ebm.explain_global()
show(ebm_global)

So in this way we can use the same visualization for models like Naive Bayes.

We could also modify the histogram values using 'ebm.histogram_edges_' and 'ebm.histogram_weights_'.

In [13]:
ebm.histogram_edges_[1]

array([  0. ,  16.5,  33. ,  49.5,  66. ,  82.5,  99. , 115.5, 132. ,
       148.5, 165. , 181.5, 198. ])

In [14]:
ebm.histogram_weights_[1]

array([  0.,   4.,   0.,   1.,   3.,  36., 103., 145., 125.,  83.,  49.,
        38.,  27.,   0.])