**Analyze model and sample explanations from feature importances and SHAP values.**

# IMPORTS

In [1]:
import pandas as pd
import plotly.express as px
import shap

  from .autonotebook import tqdm as notebook_tqdm


# CONFIGS

In [2]:
# note: you must run the explanations job first to generate the output
MODELS_EXPLANATIONS = "../outputs/models_explanations.parquet"
SAMPLES_EXPLANATIONS = "../outputs/samples_explanations.parquet"

# DATASETS

In [3]:
models_explanations = pd.read_parquet(MODELS_EXPLANATIONS).sort_values(
    "importance", ascending=False
)
print(models_explanations.shape)
models_explanations.head()

(20, 2)


Unnamed: 0,feature,importance
19,numericals__registered,0.939335
18,numericals__casual,0.060139
8,numericals__yr,0.000161
16,numericals__hum,6.4e-05
10,numericals__hr,5.9e-05


In [4]:
samples_explanations = pd.read_parquet(SAMPLES_EXPLANATIONS)
print(samples_explanations.shape)
samples_explanations.head()

(100, 20)


Unnamed: 0,categoricals__season_1,categoricals__season_2,categoricals__season_3,categoricals__season_4,categoricals__weathersit_1,categoricals__weathersit_2,categoricals__weathersit_3,categoricals__weathersit_4,numericals__yr,numericals__mnth,numericals__hr,numericals__holiday,numericals__weekday,numericals__workingday,numericals__temp,numericals__atemp,numericals__hum,numericals__windspeed,numericals__casual,numericals__registered
0,0.006652,0.027727,0.006341,-0.004617,0.007796,-0.002922,0.001006,-5.551742e-07,0.079501,-0.032553,0.01973,-0.005449,-0.091014,-0.040552,0.024861,0.003903,-0.012946,0.083744,25.231182,93.615906
1,-6e-06,0.015149,0.013882,-0.003549,0.00962,0.034864,0.00473,-5.551742e-07,0.080114,-0.001672,0.052958,-0.005971,0.013171,-0.020272,0.069661,0.010426,0.060713,-0.05569,11.599885,79.890282
2,0.00151,0.008787,-0.031662,-0.00568,0.003407,0.007366,0.002644,-5.384581e-07,0.075888,-0.039858,0.066534,-0.008654,-0.043778,-0.015179,0.120411,0.004957,0.042915,-0.029475,17.027773,80.330391
3,0.002116,0.000101,-0.030228,-0.008386,-0.00124,-0.001726,0.001956,-5.384581e-07,0.089552,-0.000123,0.059446,-0.009938,0.050032,-0.025889,-0.006929,-0.059649,-0.040841,0.01791,36.772224,106.574913
4,0.012335,0.003437,0.013386,-0.027833,0.021164,-0.005834,-0.004372,-6.086659e-07,0.018141,0.174302,0.120672,-0.003116,0.040308,-0.074956,0.069545,0.015809,0.074836,-0.040882,38.722881,246.85347


In [5]:
shap_values = shap.Explanation(
    samples_explanations, feature_names=samples_explanations.columns.to_list()
)
print("Features:", shap_values.feature_names)
shap_values

Features: ['categoricals__season_1', 'categoricals__season_2', 'categoricals__season_3', 'categoricals__season_4', 'categoricals__weathersit_1', 'categoricals__weathersit_2', 'categoricals__weathersit_3', 'categoricals__weathersit_4', 'numericals__yr', 'numericals__mnth', 'numericals__hr', 'numericals__holiday', 'numericals__weekday', 'numericals__workingday', 'numericals__temp', 'numericals__atemp', 'numericals__hum', 'numericals__windspeed', 'numericals__casual', 'numericals__registered']


.values =
    categoricals__season_1  categoricals__season_2  categoricals__season_3  \
0                 0.006652                0.027727                0.006341   
1                -0.000006                0.015149                0.013882   
2                 0.001510                0.008787               -0.031662   
3                 0.002116                0.000101               -0.030228   
4                 0.012335                0.003437                0.013386   
..                     ...                     ...                     ...   
95                0.023137                0.013332               -0.107473   
96                0.004738               -0.013562               -0.005693   
97               -0.019759                0.010218               -0.009712   
98                0.003229               -0.000393               -0.161296   
99               -0.002694               -0.024382               -0.013047   

    categoricals__season_4  categoricals__weathersit_

# EXPLANATIONS

## Model

In [6]:
px.bar(models_explanations, x="feature", y="importance", title="Feature Importances")

## Samples

In [7]:
px.imshow(
    samples_explanations, height=700, color_continuous_scale="Bluered", title="Sample Explanations"
)