In [None]:
import shap, numpy as np, pandas as pd, tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
tf.random.set_seed(42)
# 피마 인디언 데이터셋 데이터를 외부에서 불러옵니다.
URL  = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.csv"
COLS = ["Pregnancies","Glucose","BloodPressure","SkinThickness",
        "Insulin","BMI","DiabetesPedigreeFunction","Age","Outcome"]

df = pd.read_csv(URL, header=None, names=COLS)

In [None]:
# 데이터셋의 전처리를 진행합니다.
X, y = df[COLS[:-1]].values, df["Outcome"].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
                                          stratify=y, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [None]:
# 간단한 딥러닝 모델을 생성하고 학습합니다.
model = tf.keras.Sequential([
    tf.keras.Input(shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(1,  activation='sigmoid')
])
model.compile('adam', 'binary_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=30, batch_size=32, verbose=0)
print("Test accuracy:", model.evaluate(X_test, y_test, verbose=0)[1])

In [None]:
# SHAP의 GradientExplainer 를 사용합니다. (GPU 전용)
background  = X_train[np.random.choice(X_train.shape[0], 100, replace=False)]
explainer   = shap.GradientExplainer(model, background)

raw_shap    = explainer.shap_values(X_test)
shap_vals   = raw_shap.squeeze(-1)

base_value  = model.predict(background, verbose=0).mean()

In [None]:
# 요약플롯을 출력합니다.
shap.summary_plot(shap_vals, X_test, feature_names=COLS[:-1])

In [None]:
# 모든 특성의 플롯을 출력합니다.
sample_idx = 0
exp = shap.Explanation(
    values       = shap_vals[sample_idx],   # 1-D 벡터
    base_values  = base_value,
    data         = X_test[sample_idx],
    feature_names= COLS[:-1]
)
shap.plots.waterfall(exp, max_display=20)