In [3]:
import pandas as pd
import shap
import joblib

train = pd.read_csv("../data/processed/train_pd.csv")
test = pd.read_csv("../data/processed/test_pd.csv")

model = joblib.load("../models/logistic_pd_model.pkl")

feature_names = [c for c in train.columns if c not in ["default","PD","score"]]
X_train = train[feature_names]
X_test = test[feature_names]

# Fill any NaNs (if exist)
X_train = X_train.fillna(0)
X_test = X_test.fillna(0)

In [4]:
explainer = shap.LinearExplainer(model, X_train, feature_perturbation="interventional")
shap_values = explainer.shap_values(X_test)




In [5]:
import matplotlib.pyplot as plt

# Summary plot
shap.summary_plot(shap_values, X_test, plot_type="bar", show=False)
plt.tight_layout()
plt.savefig("../reports/shap_summary_bar.png", bbox_inches='tight')
plt.close()

# Beeswarm plot
shap.summary_plot(shap_values, X_test, show=False)
plt.savefig("../reports/shap_summary_beeswarm.png", bbox_inches='tight')
plt.close()


In [None]:
# Pick first row in test set
shap.force_plot(explainer.expected_value, shap_values[0,:], X_test.iloc[0,:], matplotlib=True, show=False)
plt.savefig("../reports/shap_force_example.png", bbox_inches='tight')
plt.close()
