# Machine Learning Dashboard with Pima Indians Diabetes Dataset

In [1]:
# https://explainerdashboard.readthedocs.io/en/latest/

from sklearn.ensemble import RandomForestClassifier
from explainerdashboard import ClassifierExplainer, ExplainerDashboard
import pandas as pd


The dash_html_components package is deprecated. Please replace
`import dash_html_components as html` with `from dash import html`

The dash_core_components package is deprecated. Please replace
`import dash_core_components as dcc` with `from dash import dcc`

The dash_table package is deprecated. Please replace
`import dash_table` with `from dash import dash_table`

Also, if you're using any of the table format helpers (e.g. Group), replace 
`from dash_table.Format import Group` with 
`from dash.dash_table.Format import Group`


In [2]:
names = ['Pregnancies', 'Plasma_glucose', 'Blood_pressure', 'Skin_thickness', 'Insulin', 'BMI', 'Diabetes_pedigree', 'Age', 'class']
dataframe = pd.read_csv('diabetes_data.csv', names = names)

In [3]:
#Split X and Y values
array = dataframe.values
X = dataframe.drop('class', axis = 1)
y = dataframe['class']

In [4]:
feature_descriptions = {
    "Pregnancies": "number of times pregnant",
    "Plasma_glucose": "Plasma glucose concentration a 2 hours in an oral glucose tolerance test",
    "Blood_pressure": "Diastolic blood pressure",
    "Skin_thickness": "Triceps skin fold thickness",
    "Insulin": "2-hours serum insulin mulU/ml", 
    "BMI": "Body mass index",
    "Diabetes_pedigree":"Diabetes mellitus history in relatives and the genetic relationship of those relatives to the patient",
    "Age": "Age in years",
}

In [5]:
from sklearn.model_selection import train_test_split
#divide data set into 80% training, 20% testing
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state = 0)


test_names = names[:-1]
train_names = names[:-1]

In [6]:
model = RandomForestClassifier(max_depth=2, random_state=0)
model.fit(X_train, y_train)

RandomForestClassifier(max_depth=2, random_state=0)

In [7]:
explainer = ClassifierExplainer(model, X_test, y_test,  
                                descriptions=feature_descriptions,
                                labels=['No Diabetes', 'Diabetes'], 
                                # idxs = test_names, 
                                # index_name = "Passenger", 
                                target = "class", 
                                )
db = ExplainerDashboard(explainer, 
                        title="Pima Diabetes Explainer", 
                        shap_interaction=False,
                        )
db.run(port=8050)

Detected RandomForestClassifier model: Changing class type to RandomForestClassifierExplainer...
Note: model_output=='probability', so assuming that raw shap output of RandomForestClassifier is in probability space...
Generating self.shap_explainer = shap.TreeExplainer(model)
Building ExplainerDashboard..
Detected notebook environment, consider setting mode='external', mode='inline' or mode='jupyterlab' to keep the notebook interactive while the dashboard is running...
Generating layout...
Calculating shap values...
Calculating prediction probabilities...
Calculating metrics...
Calculating confusion matrices...
Calculating classification_dfs...
Calculating roc auc curves...
Calculating pr auc curves...
Calculating liftcurve_dfs...
Calculating dependencies...
Calculating permutation importances (if slow, try setting n_jobs parameter)...
Calculating pred_percentiles...
Calculating predictions...
Calculating ShadowDecTree for each individual decision tree...
Reminder: you can store the ex

 * Running on all addresses.
 * Running on http://192.168.1.16:8050/ (Press CTRL+C to quit)
192.168.1.16 - - [09/Jan/2022 08:47:04] "GET / HTTP/1.1" 200 -
192.168.1.16 - - [09/Jan/2022 08:47:04] "GET /_dash-layout HTTP/1.1" 200 -
192.168.1.16 - - [09/Jan/2022 08:47:04] "GET /_dash-dependencies HTTP/1.1" 200 -
192.168.1.16 - - [09/Jan/2022 08:47:04] "GET /_dash-component-suites/dash/dcc/async-dropdown.js HTTP/1.1" 200 -
192.168.1.16 - - [09/Jan/2022 08:47:04] "GET /_dash-component-suites/dash/dcc/async-graph.js HTTP/1.1" 200 -
192.168.1.16 - - [09/Jan/2022 08:47:04] "GET /_dash-component-suites/dash/dcc/async-plotlyjs.js HTTP/1.1" 200 -
192.168.1.16 - - [09/Jan/2022 08:47:04] "POST /_dash-update-component HTTP/1.1" 200 -
192.168.1.16 - - [09/Jan/2022 08:47:04] "POST /_dash-update-component HTTP/1.1" 200 -
192.168.1.16 - - [09/Jan/2022 08:47:04] "POST /_dash-update-component HTTP/1.1" 200 -
192.168.1.16 - - [09/Jan/2022 08:47:04] "POST /_dash-update-component HTTP/1.1" 200 -
192.168.1.16


                so no dtreeviz visualisation of decision trees will be shown on the shadow trees tab.

                See https://github.com/parrt/dtreeviz for info on how to properly install graphviz 
                for dtreeviz. 
                
No graphviz 'dot' executable available!


In [None]:
ExplainerDashboard(
    ClassifierExplainer(
        RandomForestClassifier().fit(X_train, y_train),
        X_test, y_test,
        descriptions=feature_descriptions
    )
).run()

Detected RandomForestClassifier model: Changing class type to RandomForestClassifierExplainer...
Note: model_output=='probability', so assuming that raw shap output of RandomForestClassifier is in probability space...
Generating self.shap_explainer = shap.TreeExplainer(model)
Building ExplainerDashboard..
Detected notebook environment, consider setting mode='external', mode='inline' or mode='jupyterlab' to keep the notebook interactive while the dashboard is running...
Generating layout...
Calculating shap values...
Calculating prediction probabilities...
Calculating metrics...
Calculating confusion matrices...
Calculating classification_dfs...
Calculating roc auc curves...
Calculating pr auc curves...
Calculating liftcurve_dfs...
Calculating shap interaction values... (this may take a while)
Reminder: TreeShap computational complexity is O(TLD^2), where T is the number of trees, L is the maximum number of leaves in any tree and D the maximal depth of any tree. So reducing these will s

 * Running on all addresses.
 * Running on http://192.168.1.16:8050/ (Press CTRL+C to quit)
192.168.1.16 - - [09/Jan/2022 08:57:17] "GET / HTTP/1.1" 200 -
192.168.1.16 - - [09/Jan/2022 08:57:18] "GET /_dash-layout HTTP/1.1" 200 -
192.168.1.16 - - [09/Jan/2022 08:57:18] "GET /_dash-dependencies HTTP/1.1" 200 -
192.168.1.16 - - [09/Jan/2022 08:57:18] "GET /_dash-component-suites/dash/dcc/async-dropdown.js HTTP/1.1" 200 -
192.168.1.16 - - [09/Jan/2022 08:57:18] "GET /_dash-component-suites/dash/dcc/async-graph.js HTTP/1.1" 200 -
192.168.1.16 - - [09/Jan/2022 08:57:18] "GET /_dash-component-suites/dash/dcc/async-plotlyjs.js HTTP/1.1" 200 -
192.168.1.16 - - [09/Jan/2022 08:57:18] "POST /_dash-update-component HTTP/1.1" 200 -
192.168.1.16 - - [09/Jan/2022 08:57:18] "POST /_dash-update-component HTTP/1.1" 200 -
192.168.1.16 - - [09/Jan/2022 08:57:18] "POST /_dash-update-component HTTP/1.1" 200 -
192.168.1.16 - - [09/Jan/2022 08:57:18] "POST /_dash-update-component HTTP/1.1" 200 -
192.168.1.16