## 4_model_explainability_with_shapash

A notebook to go through shapash

In [None]:
import random
import pickle

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

import wandb

In [None]:
dfp_train = pd.read_csv('./data/rtu/model_train_data.csv')
dfp_train['date'] = pd.to_datetime(dfp_train['date'])

dfp_test = pd.read_csv('./data/rtu/model_test_data.csv')
dfp_test['date'] = pd.to_datetime(dfp_test['date'])

dfp_2020 = pd.read_csv('./data/rtu/2020_data.csv')
dfp_2020['date'] = pd.to_datetime(dfp_2020['date'])

In [None]:
# dict_features = {
#     'weekday' : 'Day of the week encoded (datetime weekday)',
#     'month' : 'Month',
#     'week_number' : 'Week number of the year',
#     'weighted_t2m' : 'Weighted temperature based on main city and population',
#     'weighted_t2m_min' : 'Weighted minimal temperature based on main city and population', 
#     'weighted_t2m_max' : 'Weighted maximal temperature based on main city and population', 
#     'weighted_prectot' : 'Weighted precipitation based on main city and population', 
# }

# columns_features = ['weekday', 'month', 'week_number']
# columns_features = ['weekday', 'month', 'week_number'] + ['weighted_t2m', 'weighted_t2m_min', 'weighted_t2m_max','weighted_prectot']

columns_weather = [ 't2m_min_bordeaux',
       't2m_bordeaux', 't2m_max_bordeaux', 'prectot_bordeaux', 't2m_min_lille',
       't2m_lille', 't2m_max_lille', 'prectot_lille', 't2m_min_paris',
       't2m_paris', 't2m_max_paris', 'prectot_paris', 't2m_min_rennes',
       't2m_rennes', 't2m_max_rennes', 'prectot_rennes', 't2m_min_nantes',
       't2m_nantes', 't2m_max_nantes', 'prectot_nantes', 't2m_min_toulouse',
       't2m_toulouse', 't2m_max_toulouse', 'prectot_toulouse',
       't2m_min_marseille', 't2m_marseille', 't2m_max_marseille',
       'prectot_marseille', 't2m_min_lyon', 't2m_lyon', 't2m_max_lyon',
       'prectot_lyon', 't2m_min_nice', 't2m_nice', 't2m_max_nice',
       'prectot_nice', 't2m_min_strasbourg', 't2m_strasbourg',
       't2m_max_strasbourg', 'prectot_strasbourg', 't2m_min_montpellier',
       't2m_montpellier', 't2m_max_montpellier', 'prectot_montpellier',
       'weighted_t2m', 'weighted_t2m_min', 'weighted_t2m_max',
       'weighted_prectot']

columns_features = ['weekday', 'month', 'week_number'] + columns_weather

dict_features = {feature : feature for feature in columns_features}

In [None]:
# Keep it simple only date and consumption
column_target = 'daily_electrical_consumption'
X_train, y_train = dfp_train[columns_features], dfp_train[column_target]
X_test, y_test = dfp_test[columns_features], dfp_test[column_target]

X_2020 = dfp_2020[columns_features]

In [19]:
# Load the best model
run = wandb.init(project='french_electrical_consumption', entity='jmdaignan')
model_at = run.use_artifact('best_model_hyperopt:latest')
model_dir = model_at.download()
#model_dir = './data'
with open(model_dir + '/model.pkl', 'rb') as file:
    model = pickle.load(file)

[34m[1mwandb[0m: wandb version 0.10.32 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


In [20]:
y_pred = pd.DataFrame(model.predict(X_test),columns=['pred'],index=X_test.index)

In [None]:
from shapash.explainer.smart_explainer import SmartExplainer
xpl = SmartExplainer(features_dict=dict_features) # Optional parameter, dict specifies label for features name

In [None]:
xpl.compile(
    x=X_test,
    model=model,
    y_pred=y_pred # Optional
)

In [None]:
xpl.plot.features_importance()

In [None]:
subset = X_test[X_test['weekday'].isin([5,6])].index.tolist()
xpl.plot.features_importance(selection=subset)

In [None]:
xpl.plot.contribution_plot("weekday")

In [None]:
xpl.filter(max_contrib=8,threshold=100)

In [None]:
xpl.plot.local_plot(index=364)

In [None]:
app = xpl.run_app(title_story='Electricity consumption forecast')

In [None]:
app.kill()

In [None]:
predictor = xpl.to_smartpredictor()

In [None]:
predictor.add_input(x=X_2020, ypred=pd.DataFrame(model.predict(X_2020),columns=['pred'],index=X_2020.index))

In [None]:
detailed_contributions = predictor.detail_contributions()
predictor.modify_mask(max_contrib=3)

In [None]:
explanation = predictor.summarize()
explanation.head(7)