In [None]:
import pandas as pd
import shap
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier


In [None]:
# Load the Fraud_Data.csv (or use creditcard.csv if you prefer)
df = pd.read_csv("../data/Fraud_Data.csv")

# Separate features and target
X = df.drop("class", axis=1)
y = df["class"]


In [None]:
# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(
    X, y, stratify=y, test_size=0.2, random_state=42
)

# Train XGBoost (best model from Task 2)
model = XGBClassifier(use_label_encoder=False, eval_metric="logloss", random_state=42)
model.fit(X_train, y_train)


In [None]:
# Create SHAP Explainer
explainer = shap.Explainer(model, X_train)

# Compute SHAP values for test data
shap_values = explainer(X_test)
shap_values.shape


In [None]:
shap.summary_plot(shap_values, X_test)


In [None]:
# # Pick a sample from the test set
# sample_idx = 0  

# # Display force plot (interactive in Jupyter)
# shap.force_plot(
#     explainer.expected_value, # type: ignore
#     shap_values[sample_idx, :], 
#     X_test.iloc[sample_idx, :]
# )


sample_idx = 0

shap.force_plot(
    shap_values.base_values[sample_idx],  # Use base_values instead of expected_value
    shap_values[sample_idx, :],
    X_test.iloc[sample_idx, :]
)


In [None]:
plt.savefig("../outputs/shap_summary_plot.png", bbox_inches="tight")
