In [1]:
#practice dashboard explainer
#practice with out own data
import pandas as pd
import datetime as dt
from sklearn.ensemble import RandomForestClassifier
from sklearn import tree
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

from explainerdashboard import ClassifierExplainer, ExplainerDashboard


The dash_html_components package is deprecated. Please replace
`import dash_html_components as html` with `from dash import html`

The dash_core_components package is deprecated. Please replace
`import dash_core_components as dcc` with `from dash import dcc`


In [2]:
#import our encoded dataframe
file_path = "Resources/dogs_encoded.csv"

dogs_enc_df = pd.read_csv(file_path)

dogs_enc_df

Unnamed: 0.1,Unnamed: 0,AnimalName,AnimalBirthMonth,BreedName,ZipCode,CommunityDistrict,CensusTract2010,NTA,CityCouncilDistrict,CongressionalDistrict,StateSenatorialDistrict,LicenseIssuedDate,LicenseExpiredDate,gender_enc,Borough_enc
0,0,16555,1,28,11236,318.0,1014.0,25,46.0,8.0,19.0,1.419811e+09,1.454112e+09,0,1
1,1,15530,1,54,11210,314.0,756.0,21,45.0,9.0,17.0,1.420589e+09,1.454112e+09,0,1
2,2,10845,1,174,10464,210.0,516.0,58,13.0,14.0,34.0,1.421453e+09,1.454371e+09,0,0
3,3,14177,1,226,11221,304.0,419.0,38,34.0,7.0,18.0,1.425168e+09,1.459123e+09,1,1
4,4,15626,1,174,10451,201.0,65.0,70,17.0,15.0,32.0,1.425859e+09,1.457482e+09,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
114285,117619,15568,1,52,10022,106.0,98.0,99,4.0,12.0,28.0,1.483142e+09,1.546214e+09,0,2
114286,117620,15778,1,89,11219,311.0,250.0,8,38.0,10.0,22.0,1.483142e+09,1.537229e+09,1,1
114287,117621,19190,1,128,10312,503.0,17008.0,190,51.0,11.0,24.0,1.483142e+09,1.514678e+09,1,4
114288,117622,19262,1,297,10455,201.0,79.0,70,8.0,15.0,29.0,1.483142e+09,1.496707e+09,1,0


In [3]:
#define the features
#NOTE: many of the features directly relate to where the dog is from (ie ZIP code, districts, etc), I've removed these to make it harder for the model to guess
X = dogs_enc_df.copy()
X = X[["AnimalName", "BreedName", "Borough_enc", "ZipCode", "AnimalBirthMonth", "LicenseIssuedDate", "LicenseExpiredDate"]]
y = dogs_enc_df["gender_enc"]

In [4]:
#test train split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [5]:
#create the model
model = RandomForestClassifier(n_estimators=50, max_depth=5)
model.fit(X_train, y_train)

RandomForestClassifier(max_depth=5, n_estimators=50)

In [6]:
#create the explainer instance, needs X_test and y_test
explainer = ClassifierExplainer(
                model, X_test, y_test,
                # optional:
                #cats=['AnimalName', 'BreedName', 'Borough_enc'],
                labels=['Male', 'Female']
)

db = ExplainerDashboard(explainer, title="NYC_Dogs_Dash",
                    whatif=False, # you can switch off tabs with bools
                    shap_interaction=False,
                    decision_trees=False)

Detected RandomForestClassifier model: Changing class type to RandomForestClassifierExplainer...
Note: model_output=='probability', so assuming that raw shap output of RandomForestClassifier is in probability space...
Generating self.shap_explainer = shap.TreeExplainer(model)
Building ExplainerDashboard..
Detected notebook environment, consider setting mode='external', mode='inline' or mode='jupyterlab' to keep the notebook interactive while the dashboard is running...
Generating layout...
Calculating shap values...
Calculating prediction probabilities...
Calculating metrics...
Calculating confusion matrices...
Calculating classification_dfs...
Calculating roc auc curves...
Calculating pr auc curves...
Calculating liftcurve_dfs...
Calculating dependencies...
Calculating permutation importances (if slow, try setting n_jobs parameter)...
Calculating pred_percentiles...
Calculating predictions...
Reminder: you can store the explainer (including calculated dependencies) with explainer.dump

In [None]:
#run the dashboard
db.run(port=8051)

Starting ExplainerDashboard on http://10.0.0.66:8051
Dash is running on http://0.0.0.0:8051/

 * Serving Flask app "explainerdashboard.dashboards" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on all addresses.
 * Running on http://10.0.0.66:8051/ (Press CTRL+C to quit)
10.0.0.66 - - [29/May/2022 16:06:09] "GET / HTTP/1.1" 200 -
10.0.0.66 - - [29/May/2022 16:06:09] "GET /_dash-dependencies HTTP/1.1" 200 -
10.0.0.66 - - [29/May/2022 16:06:09] "GET /_dash-layout HTTP/1.1" 200 -
10.0.0.66 - - [29/May/2022 16:06:09] "GET /_dash-component-suites/dash/dcc/async-dropdown.js HTTP/1.1" 200 -
10.0.0.66 - - [29/May/2022 16:06:10] "GET /_dash-component-suites/dash/dcc/async-graph.js HTTP/1.1" 200 -
10.0.0.66 - - [29/May/2022 16:06:10] "GET /_dash-component-suites/dash/dcc/async-plotlyjs.js HTTP/1.1" 200 -
10.0.0.66 - - [29/May/2022 16:06:10] "POST /_dash-update-component HTTP/1.1" 204 -
10.0.0.66 - - [29/May/2022 16:06:10] "POST /_dash-update-component HTTP/1.1" 200 -
10.0.0.66 - - [29/May/2022 16:06:10] "POST /_dash-update-component HTTP/1.1" 200 -
10.0.0.66 - - [29/May/2022 16:06:10] "POST /_dash-update-component HTTP/1.1" 200 -
10.0.0.66 - - [29/May/2022 16:06:10] "POST /_

In [None]:
#save the dashboard instance
db.to_yaml("templates/dashboard.yaml", explainerfile="templates/explainer.joblib", dump_explainer=True)