In [None]:
from pathlib import Path
import joblib
import numpy as np
import pandas as pd
import xgboost as xgb
import shap
import matplotlib.pyplot as plt
import os

DIR = Path(os.getenv("BASE_DIR"))
BASE_DIR = Path(DIR / "Train_Final2")
OUT_DIR  = BASE_DIR / "ml_runs_pu_ensemble_xgb"
SETTING  = "pos1_bg2"

DATA_TABLE = BASE_DIR / "grid_500m_ml_table.parquet"
FEATURE_COLS_TXT = BASE_DIR / "feature_cols.txt"

XAI_DIR = OUT_DIR / "xai_outputs"
XAI_DIR.mkdir(exist_ok=True)

BG_SAMPLE = 20_000

df = pd.read_parquet(DATA_TABLE)

feature_cols = pd.read_csv(FEATURE_COLS_TXT, header=None)[0].tolist()
X = df[feature_cols].astype(np.float32)

model = joblib.load(OUT_DIR / SETTING / "best_model.joblib")
booster = model.get_booster()


try:
    booster.set_param({"predictor": "gpu_predictor"})
except Exception:
    pass


rng = np.random.default_rng(42)
bg_idx = rng.choice(len(X), size=min(BG_SAMPLE, len(X)), replace=False)
X_bg = X.iloc[bg_idx]


dmat = xgb.DMatrix(X_bg, feature_names=feature_cols)
contrib = booster.predict(dmat, pred_contribs=True)

shap_vals = contrib[:, :-1]
base_vals = contrib[:, -1]

# tạo Explanation để plot bằng shap
exp = shap.Explanation(
    values=shap_vals,
    base_values=base_vals,
    data=X_bg.to_numpy(),
    feature_names=feature_cols
)


plt.figure()
shap.summary_plot(exp, plot_type="bar", show=False)
plt.tight_layout()
plt.savefig(XAI_DIR / "shap_global_bar.png", dpi=300)
plt.close()

plt.figure()
shap.summary_plot(exp, show=False)
plt.tight_layout()
plt.savefig(XAI_DIR / "shap_global_beeswarm.png", dpi=300)
plt.close()

print("✅ DONE: Global SHAP saved to", XAI_DIR)


Parameters: { "predictor" } are not used.

  return func(**kwargs)


✅ DONE: Global SHAP saved to C:\Users\Phong\Desktop\GIS\Project 2\Train_Final2\ml_runs_pu_ensemble_xgb\xai_outputs
