Ce Notebook utilise la bibliothèque **SHAP** approfondir l'interpretabilité des différents modèles.

# Package & data

In [None]:
import pandas as pd
import joblib
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt
import shap


In [None]:
# Chargement du modèle et des données
best_player_logistic_model = joblib.load("../models/best_player/best_player_logistic_model.joblib")
best_player_decision_tree_model = joblib.load("../models/best_player/best_player_decision_tree_model.joblib")
best_player_adaboost_model = joblib.load("../models/best_player/best_player_adaboost_model.joblib")
best_player_gradient_boosting_model = joblib.load("../models/best_player/best_player_gradient_boosting_model.joblib")
best_player_lightgbm_model = joblib.load("../models/best_player/best_player_lightgbm_model.joblib")
best_player_xgboost_model = joblib.load("../models/best_player/best_player_xgboost_model.joblib")

# Assignation des différents modèles aux variables correspondantes
logistic_model = best_player_logistic_model['best_model']
decision_tree_model = best_player_decision_tree_model['best_model']
adaboost_model = best_player_adaboost_model['best_model']
gradient_boosting_model = best_player_gradient_boosting_model['best_model']
lightgbm_model = best_player_lightgbm_model['best_model']
xgboost_model = best_player_xgboost_model['best_model']

# Load split data
data_split = joblib.load("../models/best_player/best_player_split.joblib")
X_train, X_test, y_train, y_test = data_split['X_train'], data_split['X_test'], data_split['y_train'], data_split['y_test']




# Interpretability 

Dans ce notebook nous nous focaliserons sur l'interpréabilité du X_test. Il serait possible à l'avenir de la comparer avec X_train par exemple.

La méthodologie est la même pour les différents modèles : 
- création d'un explainer
- calcul des shap values
- enregistrement au format joblib

## Logistic model 

In [None]:
# Générer les valeurs SHAP pour les données de test & dump
explainer = shap.Explainer(logistic_model, X_test)
shap_values = explainer(X_test)

joblib.dump(shap_values, '../models/shap_values/shap_values_logistic_model.joblib')


In [None]:
# Import shap values
shap_values = joblib.load('../models/shap_values/shap_values_logistic_model.joblib')

# Global
shap.plots.bar(shap_values)
shap.plots.beeswarm(shap_values)

# Local
shap.plots.waterfall(shap_values[0])
shap.initjs()
shap.plots.force(shap_values[0])



## Decision Tree

In [None]:
# Générer les valeurs SHAP pour les données de test & dump
explainer = shap.TreeExplainer(decision_tree_model)
shap_values = explainer.shap_values(X_test)
joblib.dump(shap_values, '../models/shap_values/shap_values_decision_tree_model.joblib')


In [None]:
# Import shap values
shap_values = joblib.load('../models/shap_values/shap_values_decision_tree_model.joblib')

# global
plt.figure(figsize=(15,12))
plot_tree(decision_tree_model, feature_names=X_test.columns.tolist(), filled=True, max_depth=3)


In [None]:
shap.summary_plot(shap_values, X_test, plot_type="bar", show=True)


## Ada Boost Model

In [None]:
# Global
feature_importance = adaboost_model.feature_importances_
df_feature_importance = pd.DataFrame({'Feature': X_test.columns, 'Importance': feature_importance})
df_feature_importance = df_feature_importance.sort_values(by='Importance', ascending=True).tail(10)

plt.figure(figsize=(10, 6))
plt.barh(df_feature_importance['Feature'], df_feature_importance['Importance'])
plt.tight_layout()
plt.show()

## Gradient Boosting Model

In [None]:
# Générer les valeurs SHAP pour les données de test & dump
explainer = shap.TreeExplainer(gradient_boosting_model, X_test)
shap_values = explainer(X_test)
joblib.dump(shap_values, '../models/shap_values/shap_values_gradient_boosting_model.joblib')

In [None]:
# Import shap values
shap_values = joblib.load('../models/shap_values/shap_values_gradient_boosting_model.joblib')

# Global
shap.plots.bar(shap_values)
shap.plots.beeswarm(shap_values)

# Local
shap.plots.waterfall(shap_values[0])
shap.initjs()
shap.plots.force(shap_values[0])



## Light GBM model

In [None]:
# Générer les valeurs SHAP pour les données de test & dump
explainer = shap.TreeExplainer(lightgbm_model, X_test)
shap_values = explainer(X_test)
joblib.dump(shap_values, '../models/shap_values/shap_values_lightgbm_model.joblib')

In [None]:
# Import shap values
shap_values = joblib.load('../models/shap_values/shap_values_lightgbm_model.joblib')

# Global
shap.plots.bar(shap_values)
shap.plots.beeswarm(shap_values)

# Local
shap.plots.waterfall(shap_values[0])
shap.initjs()
shap.plots.force(shap_values[0])


## XG Boost model

In [None]:
# Générer les valeurs SHAP pour les données de test & dump
explainer = shap.TreeExplainer(xgboost_model, X_test)
shap_values = explainer(X_test)
joblib.dump(shap_values, '../models/shap_values/shap_values_xgboost_model.joblib')

In [None]:
# Import shap values
shap_values = joblib.load('../models/shap_values/shap_values_xgboost_model.joblib')

# Global
shap.plots.bar(shap_values)
shap.plots.beeswarm(shap_values)

# Local
shap.plots.waterfall(shap_values[0])
shap.initjs()
shap.plots.force(shap_values[0])
