**Analyze model and sample explanations from feature importances and SHAP values.**

# IMPORTS

In [1]:
import shap
import pandas as pd
import plotly.express as px

  from .autonotebook import tqdm as notebook_tqdm


# CONFIGS

In [2]:
# note: you must run the explanations job first to generate the output
MODELS_EXPLANATIONS = '../outputs/models_explanations.parquet'
SAMPLES_EXPLANATIONS = '../outputs/samples_explanations.parquet'

# DATASETS

In [3]:
models_explanations = pd.read_parquet(MODELS_EXPLANATIONS).sort_values(
    "importance", ascending=False
)
print(models_explanations.shape)
models_explanations.head()

(19, 2)


Unnamed: 0,feature,importance
18,numericals__casual,0.579146
10,numericals__hr,0.250726
13,numericals__workingday,0.078542
8,numericals__yr,0.0386
9,numericals__mnth,0.012079


In [4]:
samples_explanations = pd.read_parquet(SAMPLES_EXPLANATIONS)
print(samples_explanations.shape)
samples_explanations.head()

(100, 19)


Unnamed: 0,categoricals__season_1,categoricals__season_2,categoricals__season_3,categoricals__season_4,categoricals__weathersit_1,categoricals__weathersit_2,categoricals__weathersit_3,categoricals__weathersit_4,numericals__yr,numericals__mnth,numericals__hr,numericals__holiday,numericals__weekday,numericals__workingday,numericals__temp,numericals__atemp,numericals__hum,numericals__windspeed,numericals__casual
0,0.169155,-0.12465,-0.203503,-0.758982,-1.474484,-1.076349,0.801956,0.000513,31.495134,2.905728,-74.036369,-0.019947,-7.118968,18.640795,2.613896,1.898717,-1.322058,-0.210363,147.338089
1,0.142295,-0.242301,-0.061819,-0.650959,-1.001739,-0.523719,0.824141,0.000501,32.251492,2.752874,-68.468292,-0.037579,-4.272335,19.723166,1.812452,1.462143,0.369532,0.374412,117.786423
2,0.168699,-0.173419,-0.054359,-0.723623,-1.554552,-1.033979,0.762942,0.000534,32.24543,3.19595,-82.515167,0.009938,-2.856529,19.824303,1.498975,2.04678,1.559438,2.603706,127.694664
3,0.253636,-0.791483,-0.218226,-0.612611,0.499581,0.039798,0.778988,0.000518,30.905354,2.808645,-70.180222,0.09768,-6.699857,19.206472,4.468694,7.614378,-0.776883,1.582501,180.403839
4,0.130902,-0.788029,-0.395371,-0.614574,-1.558409,0.111731,-3.9279,0.000904,47.191067,6.631704,-4.315243,0.029141,-0.15762,29.401426,4.704289,4.135256,-3.774523,0.58394,202.349609


In [5]:
shap_values = shap.Explanation(samples_explanations, feature_names=samples_explanations.columns.to_list())
shap_values.feature_names
shap_values

.values =
    categoricals__season_1  categoricals__season_2  categoricals__season_3  \
0                 0.169155               -0.124650               -0.203503   
1                 0.142295               -0.242301               -0.061819   
2                 0.168699               -0.173419               -0.054359   
3                 0.253636               -0.791483               -0.218226   
4                 0.130902               -0.788029               -0.395371   
..                     ...                     ...                     ...   
95                0.032898                0.004389               -0.022684   
96               -0.039833               -0.035329               -0.022280   
97               -0.029910               -0.054200               -0.015400   
98                0.002795               -0.055408               -0.033671   
99                0.001024               -0.061020               -0.034896   

    categoricals__season_4  categoricals__weathersit_

# EXPLANATIONS

## Model

In [6]:
px.bar(models_explanations, x='feature', y='importance', title='Feature Importances')

## Samples

In [7]:
px.imshow(samples_explanations, height=700, color_continuous_scale='Bluered', title="Sample Explanations")