In [0]:
# Change directory to VSCode workspace root so that relative path loads work correctly. Turn this addition off with the DataScience.changeDirOnImportExport setting
# ms-python.python added
import os
try:
	os.chdir(os.path.join(os.getcwd(), '..'))
	print(os.getcwd())
except:
	pass


In [3]:
import numpy as np
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn_pandas import DataFrameMapper


In [4]:
# We are using the Titanic dataset for this example
data_url = (
    "https://raw.githubusercontent.com/amueller/"
    "scipy-2017-sklearn/091d371/notebooks/datasets/titanic3.csv"
)
data = pd.read_csv(data_url)
# fill missing values
data = data.fillna(method="ffill")
data = data.fillna(method="bfill")



In [5]:
# Model explainer locally with full data
from sklearn.model_selection import train_test_split

numeric_features = ["age", "fare"]
categorical_features = ["embarked", "sex", "pclass"]

y = data["survived"].values
X = data[categorical_features + numeric_features]

x_train, x_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [6]:
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn_pandas import DataFrameMapper

# Impute, standardize the numeric features and one-hot encode the categorical features.

transformations = [
    (
        ["age", "fare"],
        Pipeline(
            steps=[
                ("imputer", SimpleImputer(strategy="median")),
                ("scaler", StandardScaler()),
            ]
        ),
    ),
    (
        ["embarked"],
        Pipeline(
            steps=[
                ("imputer", SimpleImputer(strategy="constant", fill_value="missing")),
                ("encoder", OneHotEncoder(sparse=False)),
            ]
        ),
    ),
    (["sex", "pclass"], OneHotEncoder(sparse=False)),
]


# Append classifier to preprocessing pipeline.
# Now we have a full prediction pipeline.
clf = Pipeline(
    steps=[
        ("preprocessor", DataFrameMapper(transformations)),
        ("classifier", LogisticRegression(solver="lbfgs")),
    ]
)


In [7]:
#  Train a logistic regression  model, which is what we want to explain
model = clf.fit(x_train, y_train)



In [8]:
from azureml.explain.model.tabular_explainer import TabularExplainer
# Explain predictions on the local machine
tabular_explainer = TabularExplainer(
    clf.steps[-1][1],
    initialization_examples=x_train,
    features=x_train.columns,
    transformations=transformations,
)




In [9]:
tabular_explainer

<azureml.explain.model.tabular_explainer.TabularExplainer at 0x10860b7b8>

In [10]:
# Passing in test dataset for evaluation examples - note it must be a representative sample of the original data
# x_train can be passed as well, but with more examples explanations will take longer although they may be more accurate
global_explanation = tabular_explainer.explain_global(x_test)

  "l1_reg=\"auto\" is deprecated and in the next version (v0.29) the behavior will change from a " \
  "l1_reg=\"auto\" is deprecated and in the next version (v0.29) the behavior will change from a " \
  "l1_reg=\"auto\" is deprecated and in the next version (v0.29) the behavior will change from a " \
  "l1_reg=\"auto\" is deprecated and in the next version (v0.29) the behavior will change from a " \
  "l1_reg=\"auto\" is deprecated and in the next version (v0.29) the behavior will change from a " \
  "l1_reg=\"auto\" is deprecated and in the next version (v0.29) the behavior will change from a " \
  "l1_reg=\"auto\" is deprecated and in the next version (v0.29) the behavior will change from a " \
  "l1_reg=\"auto\" is deprecated and in the next version (v0.29) the behavior will change from a " \
  "l1_reg=\"auto\" is deprecated and in the next version (v0.29) the behavior will change from a " \
  "l1_reg=\"auto\" is deprecated and in the next version (v0.29) the behavior will change f

In [11]:
sorted_global_importance_values = global_explanation.get_ranked_global_values()
sorted_global_importance_names = global_explanation.get_ranked_global_names()
dict(zip(sorted_global_importance_names, sorted_global_importance_values))

{'sex': 0.22296422271397756,
 'pclass': 0.11815577424634363,
 'age': 0.039967772698374274,
 'embarked': 0.03433597364011655,
 'fare': 0.0003459386884345606}

In [12]:
# Explain overall model predictions as a collection of local (instance-level) explanations
# explain the first member of the test set
local_explanation = tabular_explainer.explain_local(x_test[:1])


  "l1_reg=\"auto\" is deprecated and in the next version (v0.29) the behavior will change from a " \
100%|██████████| 1/1 [00:00<00:00, 16.45it/s]


In [13]:
# get the prediction for the first member of the test set and explain why model made that prediction
prediction_value = clf.predict(x_test)[0]

sorted_local_importance_values = local_explanation.get_ranked_local_values()[prediction_value]
sorted_local_importance_names = local_explanation.get_ranked_local_names()[prediction_value]

# Sorted local SHAP values
print('ranked local importance values: {}'.format(sorted_local_importance_values))
# Corresponding feature names
print('ranked local importance names: {}'.format(sorted_local_importance_names))

ranked local importance values: [[0.16693835563143788, 0.08783046750083623, 0.017030040312320197, 0.015062827968749568, 0.00025115402120797284]]
ranked local importance names: [['sex', 'pclass', 'age', 'embarked', 'fare']]


In [14]:
# 2. Load visualization dashboard
# Note you will need to have extensions enabled prior to jupyter kernel starting
!jupyter nbextension install --py --sys-prefix azureml.contrib.explain.model.visualize
!jupyter nbextension enable --py --sys-prefix azureml.contrib.explain.model.visualize
# Or, in Jupyter Labs, uncomment below
# jupyter labextension install @jupyter-widgets/jupyterlab-manager
# jupyter labextension install microsoft-mli-widget

Traceback (most recent call last):
  File "/anaconda3/envs/mlops/bin/jupyter-nbextension", line 11, in <module>
    sys.exit(main())
  File "/anaconda3/envs/mlops/lib/python3.7/site-packages/jupyter_core/application.py", line 266, in launch_instance
    return super(JupyterApp, cls).launch_instance(argv=argv, **kwargs)
  File "/anaconda3/envs/mlops/lib/python3.7/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/anaconda3/envs/mlops/lib/python3.7/site-packages/notebook/nbextensions.py", line 988, in start
    super(NBExtensionApp, self).start()
  File "/anaconda3/envs/mlops/lib/python3.7/site-packages/jupyter_core/application.py", line 255, in start
    self.subapp.start()
  File "/anaconda3/envs/mlops/lib/python3.7/site-packages/notebook/nbextensions.py", line 716, in start
    self.install_extensions()
  File "/anaconda3/envs/mlops/lib/python3.7/site-packages/notebook/nbextensions.py", line 695, in install_extensions
  

In [15]:
# 2. Load visualization dashboard
# Note you will need to have extensions enabled prior to jupyter kernel starting
!jupyter nbextension install --py --sys-prefix azureml.contrib.explain.model.visualize
!jupyter nbextension enable --py --sys-prefix azureml.contrib.explain.model.visualize
# Or, in Jupyter Labs, uncomment below
# jupyter labextension install @jupyter-widgets/jupyterlab-manager
# jupyter labextension install microsoft-mli-widget


Failure while loading azureml_run_type_providers. Failed to load entrypoint hyperdrive = azureml.train.hyperdrive:HyperDriveRun._from_run_dto with exception (cryptography 2.4.2 (/anaconda3/envs/mlops/lib/python3.7/site-packages), Requirement.parse('cryptography>=2.5'), {'paramiko'}).
Failure while loading azureml_run_type_providers. Failed to load entrypoint azureml.PipelineRun = azureml.pipeline.core:PipelineRun._from_dto with exception (cryptography 2.4.2 (/anaconda3/envs/mlops/lib/python3.7/site-packages), Requirement.parse('cryptography>=2.5'), {'paramiko'}).
Failure while loading azureml_run_type_providers. Failed to load entrypoint azureml.ReusedStepRun = azureml.pipeline.core:StepRun._from_reused_dto with exception (cryptography 2.4.2 (/anaconda3/envs/mlops/lib/python3.7/site-packages), Requirement.parse('cryptography>=2.5'), {'paramiko'}).
Failure while loading azureml_run_type_providers. Failed to load entrypoint azureml.StepRun = azureml.pipeline.core:StepRun._from_dto wit

In [16]:
# 2. Load visualization dashboard
# Note you will need to have extensions enabled prior to jupyter kernel starting
!jupyter nbextension install --py --sys-prefix azureml.contrib.explain.model.visualize
!jupyter nbextension enable --py --sys-prefix azureml.contrib.explain.model.visualize
# Or, in Jupyter Labs, uncomment below
# jupyter labextension install @jupyter-widgets/jupyterlab-manager
# jupyter labextension install microsoft-mli-widget


Failure while loading azureml_run_type_providers. Failed to load entrypoint hyperdrive = azureml.train.hyperdrive:HyperDriveRun._from_run_dto with exception (cryptography 2.4.2 (/anaconda3/envs/mlops/lib/python3.7/site-packages), Requirement.parse('cryptography>=2.5'), {'paramiko'}).
Failure while loading azureml_run_type_providers. Failed to load entrypoint azureml.PipelineRun = azureml.pipeline.core:PipelineRun._from_dto with exception (cryptography 2.4.2 (/anaconda3/envs/mlops/lib/python3.7/site-packages), Requirement.parse('cryptography>=2.5'), {'paramiko'}).
Failure while loading azureml_run_type_providers. Failed to load entrypoint azureml.ReusedStepRun = azureml.pipeline.core:StepRun._from_reused_dto with exception (cryptography 2.4.2 (/anaconda3/envs/mlops/lib/python3.7/site-packages), Requirement.parse('cryptography>=2.5'), {'paramiko'}).
Failure while loading azureml_run_type_providers. Failed to load entrypoint azureml.StepRun = azureml.pipeline.core:StepRun._from_dto wit

In [17]:
from azureml.contrib.explain.model.visualize import ExplanationDashboard


In [18]:
ExplanationDashboard(global_explanation, model, x_test)

ExplanationWidget(value={'localExplanations': [[[0.015062827968749568, 0.16693835563143788, 0.0878304675008362…

<azureml.contrib.explain.model.visualize.ExplanationDashboard.ExplanationDashboard at 0x10860bcf8>