# Explainable AI

## SHAP and tree based models

### Let's put it into practice with 🏠🏠🏠 and XGBoost 🚀

First the regular stuff

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import datasets, ensemble
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import xgboost

Let's add SHAP

In [None]:
# !pip install shap
import shap
# Run the following command. Needed for some visualizations.
shap.initjs();

### Fit a model

We are using California house prices from 1990, in $100.000

In [None]:
# Load the data
X, y = shap.datasets.california()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=13)

# Fit our model
model_tree = xgboost.XGBRegressor(n_estimators=100, max_depth=2)
model_tree.fit(X_train, y_train)

# Predict
y_test_pred = model_tree.predict(X_test)

### Let's have a look at the predictions

In [None]:
# The average house price (in $100.000)
print(f"The average house price in the train set is {y_train.mean():.3f}")


# Have a look at the mean squared error on the test set
mse = mean_squared_error(y_test, y_test_pred)
print(f"The Mean Squared Error on the test set is   {mse:.3f}")

In [None]:
# Let's check our predictions on the test set visually
plt.scatter(y_test, y_test_pred)
plt.title("Prediction vs true value")
plt.xlabel("y_test"); plt.ylabel("y_test_predict");

### Local explanations

Select an instance, and calculate the prediction

In [None]:
row_to_show = 24  # Select one instance to explain
feature_values = X_test.iloc[[row_to_show]]
prediction = model_tree.predict(feature_values)
print(f"{'The prediction for this instance:':<35}{prediction[0]:>7.3f}")

Now, calculate the SHAP values

In [None]:
# Step 1: Create a SHAP Explainer
explainer = shap.Explainer(model_tree)

In [None]:
# Step 2: Calculate the SHAP values
# Using the explainer we just created
# and giving as input the feature values for our instance
shap_values_one = explainer(feature_values)

In [None]:
# Have a look at these values
print(f"{'Base value:':<35}{shap_values_one.base_values[0]:>7.3f}")
print(f"{'Sum of SHAP values:':<35}{shap_values_one.values.sum():>7.3f}")
print(f"{'The prediction for this instance:':<35}{prediction[0]:>7.3f}")

Now that we have our SHAP values, let's visualize them

In [None]:
shap.plots.bar(shap_values_one[0])

In [None]:
shap.plots.waterfall(shap_values_one[0])

In [None]:
# # For some tree based models this might not work
# # (encountered this with GradientBoostingRegressor)
# # Here is a workaround:
# # Create a wrapper object, that we will use as input for waterfall
# shap_object = shap.Explanation(
#     base_values = shap_values_one[0][0].base_values,
#     values = shap_values_one[0].values,
#     feature_names = X_train.columns,
#     data = shap_values_one[0].data)
# shap.plots.waterfall(shap_object)

In [None]:
shap.plots.force(shap_values_one[0])

### Global explainability

In [None]:
# Let's calculate SHAP values for our whole test population
# We can use the same explainer
# This time we feed it our whole test population

shap_values = explainer(X_test)

In [None]:
shap.plots.bar(shap_values)

In [None]:
shap.plots.beeswarm(shap_values)

## SHAP and images

### Preparing a model

Let's use a pretrained ResNet50 model

In [None]:
# !pip install opencv-python
import requests
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input, decode_predictions

In [None]:
# Loading the class names from ImageNet 1000
url = "https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json"
class_names = requests.get(url).json().values()
class_names = [value[1] for value in class_names]

In [None]:
# Load the pre-trained model and some sample data
model_deep = ResNet50(weights='imagenet')
X, y = shap.datasets.imagenet50()
X = X.astype(int)

### Obtaining the SHAP values

In [None]:
# Function to preprocess the data and get the model output
# We will use this as input for the SHAP explainer
def model(X):
    X_copy = X.copy()
    X_copy = preprocess_input(X_copy)
    return model_deep(X_copy)

# A masker that will mask out partitions of the input image
masker = shap.maskers.Image("blur(128,128)", X[0].shape)

# Finally create the explainer
explainer = shap.Explainer(model, masker, output_names=class_names)

# Explain some images using 500 evaluations of the model
# to estimate the SHAP values
shap_values = explainer(X[1:5], max_evals=500, batch_size=50,
                        outputs=shap.Explanation.argsort.flip[:4])

### And visualise them

In [None]:
shap.image_plot(shap_values, pixel_values=X[1:5])