In [None]:
import joblib
import pandas as pd
import xgboost
import shap

LABEL_COLUMN = "churned"

shap.initjs() 

# Load dataset

In [None]:
data = pd.read_csv('../data/users_train.csv')
x_test = data.drop(columns=LABEL_COLUMN)
y_test = data[LABEL_COLUMN]

# Load best model

In [None]:
pipeline = joblib.load('models/xgb_model_full.joblib')

In [None]:
preprocesor = pipeline['preprocessor']
model = pipeline['classifier']

In [None]:
# Prepare dataset

In [None]:
observations = preprocesor.transform(x_test)

In [None]:
column_name = (
    preprocesor.transformers_[0][2]
    + list(preprocesor.transformers_[1][1]['one-hot'].get_feature_names(preprocesor.transformers[1][2]))
)

observations_df = pd.DataFrame(observations, columns=column_name)

In [None]:
observations_df

# Interpretability

## Feature importance desde el modelo 

In [None]:
(
    pd.Series(model.feature_importances_, index=observations_df.columns)
    .sort_values(ascending=False)
    .iloc[:10]
    .plot(kind="bar")
)

## Shap Values

### TreeExplainer

In [None]:
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(observations)

In [None]:
shap.summary_plot(shap_values, observations_df, plot_type="bar")

### Explainer

In [None]:
explainer = shap.Explainer(model)
shap_values = explainer(observations_df)

In [None]:
shap_values

#### visualize the first prediction's explanation

In [None]:

shap.plots.waterfall(shap_values[0])

#### visualize all predictiosn

In [None]:
shap.plots.beeswarm(shap_values)

In [None]:
shap.plots.force(shap_values[0])

In [None]:
#shap.plots.force(explainer.expected_value, shap_values.values)