<a href="https://colab.research.google.com/github/balawillgetyou/dy/blob/master/ModelExplanaibility_Interpret20200223.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

We use the interpretability package of the Azure Machine Learning Python SDK to understand why a model made its predictions for individual observations (local explanations). This package can also produce feature importance for the model (global explanations).

In [0]:
pip install azureml-sdk
pip install azureml-interpret

import warnings
warnings.filterwarnings('ignore')

In [0]:
# load breast cancer dataset, a well-known small dataset that comes with scikit-learn
from sklearn.datasets import load_breast_cancer
from sklearn import svm
from sklearn.model_selection import train_test_split
breast_cancer_data = load_breast_cancer()
classes = breast_cancer_data.target_names.tolist()

# split data into train and test
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(breast_cancer_data.data,            
                                                    breast_cancer_data.target,  
                                                    test_size=0.2,
                                                    random_state=0)
clf = svm.SVC(gamma=0.001, C=100., probability=True)
model = clf.fit(x_train, y_train)

In [11]:
print('These are the labels in the breast cancer dataset', '\n', classes)
print('These are the features in the breast cancer dataset :', '\n', breast_cancer_data.feature_names)

These are the labels in the breast cancer dataset 
 ['malignant', 'benign']
These are the features in the breast cancer dataset : 
 ['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']


In [0]:
#TabularExplainer is a wrapper for SHAP
from interpret.ext.blackbox import TabularExplainer

# "features" and "classes" fields are optional
explainer = TabularExplainer(model, 
                             x_train, 
                             features=breast_cancer_data.feature_names, 
                             classes=classes)

In [12]:
# This provides feature importance across all the observations in a given dataset in sorted order of importance, with importance values
global_explanation = explainer.explain_global(x_train)

sorted_global_importance_values = global_explanation.get_ranked_global_values()
sorted_global_importance_names = global_explanation.get_ranked_global_names()
dict(zip(sorted_global_importance_names, sorted_global_importance_values))

HBox(children=(IntProgress(value=0, max=455), HTML(value='')))




{'area error': 0.043814437624615066,
 'compactness error': 0.0006282321618886724,
 'concave points error': 0.0006271786251183598,
 'concavity error': 0.0005989626101042306,
 'fractal dimension error': 0.0006256656631654055,
 'mean area': 0.0902336612866102,
 'mean compactness': 0.0006812267148136712,
 'mean concave points': 0.0007338528815766646,
 'mean concavity': 0.0005858642551401746,
 'mean fractal dimension': 0.0006878066573047346,
 'mean perimeter': 0.06544750653198986,
 'mean radius': 0.0011602798634139347,
 'mean smoothness': 0.0005753093743510319,
 'mean symmetry': 0.0006781118046351257,
 'mean texture': 0.0008290164646017652,
 'perimeter error': 0.0008015254400993534,
 'radius error': 0.0007144972919556728,
 'smoothness error': 0.0006563992100773597,
 'symmetry error': 0.0007172813284230385,
 'texture error': 0.0005979691022141256,
 'worst area': 0.12203088876772909,
 'worst compactness': 0.0006263388206721814,
 'worst concave points': 0.0007921989579662981,
 'worst concavity

Why was an observation predicted as benign? This local explanation shows the clear difference in support for the malignant v/s benign classes, by feature in the model.

In [22]:
labels = classes
X_test = x_test
tab_explainer = explainer


# Picking an observation to predict and explain
X_explain = X_test[9:10]

# Get predictions
predictions = model.predict(X_explain)

# Get local explanations
local_tab_explanation = tab_explainer.explain_local(X_explain)

# Get feature names and importance for each possible label
local_tab_features = local_tab_explanation.get_ranked_local_names()
local_tab_importance = local_tab_explanation.get_ranked_local_values()

for l in range(len(local_tab_features)):
    print('Support for', labels[l])
    label = local_tab_features[l]
    for o in range(len(label)):
        print("\tObservation", o + 1)
        feature_list = label[o]
        total_support = 0
        for f in range(len(feature_list)):
            print("\t\t", feature_list[f], ':', local_tab_importance[l][o][f])
            total_support += local_tab_importance[l][o][f]
        print("\t\t ----------\n\t\t Total:", total_support, "Prediction:", labels[predictions[o]])

HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Support for malignant
	Observation 1
		 worst fractal dimension : 0.0
		 concavity error : 0.0
		 mean texture : 0.0
		 mean smoothness : 0.0
		 mean compactness : 0.0
		 mean concavity : 0.0
		 mean concave points : 0.0
		 mean symmetry : 0.0
		 mean fractal dimension : 0.0
		 texture error : 0.0
		 perimeter error : 0.0
		 worst symmetry : 0.0
		 compactness error : 0.0
		 smoothness error : 0.0
		 symmetry error : 0.0
		 fractal dimension error : 0.0
		 worst radius : 0.0
		 worst smoothness : 0.0
		 worst compactness : 0.0
		 worst concavity : 0.0
		 worst concave points : 0.0
		 mean radius : 0.0
		 radius error : -0.005893898197237124
		 concave points error : -0.0064490847024228914
		 worst texture : -0.01898206466039562
		 area error : -0.03477485528966803
		 mean area : -0.03525150503279448
		 worst area : -0.04320197799290498
		 mean perimeter : -0.07386858834265941
		 worst perimeter : -0.10596999721881284
		 ----------
		 Total: -0.32439197143689535 Prediction: benign
Suppo

Similarly, this example shows why another observation was marked as malignant.

In [23]:
labels = classes
X_test = x_test
tab_explainer = explainer


# Picking an observation to predict and explain
X_explain = X_test[10:11]

# Get predictions
predictions = model.predict(X_explain)

# Get local explanations
local_tab_explanation = tab_explainer.explain_local(X_explain)

# Get feature names and importance for each possible label
local_tab_features = local_tab_explanation.get_ranked_local_names()
local_tab_importance = local_tab_explanation.get_ranked_local_values()

for l in range(len(local_tab_features)):
    print('Support for', labels[l])
    label = local_tab_features[l]
    for o in range(len(label)):
        print("\tObservation", o + 1)
        feature_list = label[o]
        total_support = 0
        for f in range(len(feature_list)):
            print("\t\t", feature_list[f], ':', local_tab_importance[l][o][f])
            total_support += local_tab_importance[l][o][f]
        print("\t\t ----------\n\t\t Total:", total_support, "Prediction:", labels[predictions[o]])

HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Support for malignant
	Observation 1
		 area error : 0.22988056389531608
		 worst area : 0.17270476815664704
		 worst perimeter : 0.07968193405302809
		 mean perimeter : 0.044044282732817096
		 mean texture : 0.014912476014857838
		 worst smoothness : 0.008197031747257999
		 concave points error : 0.006007558220564299
		 radius error : 0.002792821604301901
		 mean symmetry : 0.0
		 texture error : 0.0
		 mean fractal dimension : 0.0
		 worst fractal dimension : 0.0
		 mean concave points : 0.0
		 mean concavity : 0.0
		 perimeter error : 0.0
		 mean smoothness : 0.0
		 mean compactness : 0.0
		 smoothness error : 0.0
		 worst symmetry : 0.0
		 compactness error : 0.0
		 concavity error : 0.0
		 symmetry error : 0.0
		 fractal dimension error : 0.0
		 worst radius : 0.0
		 worst compactness : 0.0
		 worst concavity : 0.0
		 worst concave points : 0.0
		 mean radius : 0.0
		 worst texture : -0.024772812615167714
		 mean area : -0.04162892819564071
		 ----------
		 Total: 0.49181969561398