In [1]:
!pip install shap

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting shap
  Downloading shap-0.41.0-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (572 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m572.4/572.4 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
Collecting slicer==0.0.7
  Downloading slicer-0.0.7-py3-none-any.whl (14 kB)
Installing collected packages: slicer, shap
Successfully installed shap-0.41.0 slicer-0.0.7


In [3]:
!pip install category-encoders

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting category-encoders
  Downloading category_encoders-2.6.0-py2.py3-none-any.whl (81 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.2/81.2 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: category-encoders
Successfully installed category-encoders-2.6.0


In [4]:
import numpy as np
import pandas as pd
from category_encoders import OrdinalEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import shap

In [6]:
!pip install shapash

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting shapash
  Downloading shapash-2.3.2-py2.py3-none-any.whl (652 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m652.6/652.6 kB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m
Collecting dash-table>=5.0.0
  Downloading dash_table-5.0.0-py3-none-any.whl (3.9 kB)
Collecting dash-html-components>=2.0.0
  Downloading dash_html_components-2.0.0-py3-none-any.whl (4.1 kB)
Collecting dash-renderer==1.8.3
  Downloading dash_renderer-1.8.3.tar.gz (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m45.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting dash-core-components>=2.0.0
  Downloading dash_core_components-2.0.0-py3-none-any.whl (3.8 kB)
Collecting dash-daq>=0.5.0
  Downloading dash_daq-0.5.0.tar.gz (642 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m642.7/642.7

In [7]:
from shapash.data.data_loader import data_loading

In [8]:
titan_df, titan_dict = data_loading('titanic')
del titan_df['Name']

In [9]:
titan_df.head()

Unnamed: 0_level_0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,Title
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,0,Third class,male,22.0,1,0,7.25,Southampton,Mr
2,1,First class,female,38.0,1,0,71.28,Cherbourg,Mrs
3,1,Third class,female,26.0,0,0,7.92,Southampton,Miss
4,1,First class,female,35.0,1,0,53.1,Southampton,Mrs
5,0,Third class,male,35.0,0,0,8.05,Southampton,Mr


In [10]:
y = titan_df['Survived']
X = titan_df.drop('Survived', axis=1)

In [11]:
varcat=['Pclass','Sex','Embarked','Title']

In [12]:
categ_encoding = OrdinalEncoder(cols=varcat, \
                                handle_unknown='ignore', \
                                return_df=True).fit(X)
X = categ_encoding.transform(X)

In [13]:
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, train_size=0.75, random_state=1)

rf = RandomForestClassifier(n_estimators=100,min_samples_leaf=3)
rf.fit(Xtrain, ytrain)

In [14]:
ypred=pd.DataFrame(rf.predict(Xtest),columns=['pred'],index=Xtest.index)

# Compute Shapley Contributions with Shap

In [None]:
explainer = shap.KernelExplainer(rf.predict_proba, Xtest)
shap_contrib = explainer.shap_values(Xtest)

# Use Shapash With Shapley Contributions

In [17]:
from shapash import SmartExplainer

In [20]:
xpl = SmartExplainer(
    model=rf,
    preprocessing=categ_encoding,
    features_dict=titan_dict)

In [24]:
xpl.compile(
    contributions=shap_contrib, # Shap Contributions pd.DataFrame
    y_pred=ypred,
    y_target=ytest, # Optional: allows to display True Values vs Predicted Values
    x=Xtest
)

In [27]:
xpl.plot.local_plot(index=85)

In [23]:
summary_df = xpl.to_pandas(max_contrib=3,positive=True,proba=True)
summary_df.head()

Unnamed: 0,pred,proba,feature_1,value_1,contribution_1,feature_2,value_2,contribution_2,feature_3,value_3,contribution_3
863,1,0.809327,Sex,female,0.215846,Title of passenger,Mrs,0.136616,Ticket class,First class,0.135005
224,0,0.981119,Title of passenger,Mr,0.100721,Sex,male,0.07869,Passenger fare,7.9,0.067696
85,1,0.864417,Sex,female,0.19609,Title of passenger,Miss,0.171313,Ticket class,Second class,0.094955
681,1,0.695143,Title of passenger,Miss,0.155351,Port of embarkation,Queenstown,0.142937,Sex,female,0.140913
536,1,0.928014,Title of passenger,Miss,0.167905,Ticket class,Second class,0.13505,Sex,female,0.107437


In [30]:
xpl.plot.features_importance()

In [31]:
xpl.plot.contribution_plot(col='Pclass')

In [32]:
app = xpl.run_app(title_story='ACV backend')

Dash is running on http://0.0.0.0:8050/



INFO:dash.dash:Dash is running on http://0.0.0.0:8050/



 * Serving Flask app 'shapash.webapp.smart_app'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:8050
 * Running on http://172.28.0.12:8050
INFO:werkzeug:[33mPress CTRL+C to quit[0m


In [33]:
app.kill()