# SHAP Analysis for Fraud Detection Models

In [None]:
import shap
import joblib
import pandas as pd
import matplotlib.pyplot as plt
from src.utils.explainability import ModelExplainer

# Load data and models
X_train, X_test, y_train, y_test = joblib.load('data/processed/processed_data.joblib')
feature_names = joblib.load('data/processed/feature_names.joblib')

# Initialize explainer
explainer = ModelExplainer()

## SHAP Analysis for Random Forest

In [None]:
rf_model = joblib.load('models/random_forest_model.joblib')
rf_shap = explainer.shap_analysis(rf_model, X_train, X_test, 'random_forest')

### Summary Plot

In [None]:
plt.figure()
shap.summary_plot(rf_shap, X_test, feature_names=feature_names, show=False)
plt.title('Random Forest SHAP Summary Plot')
plt.tight_layout()
plt.show()

### Dependence Plots

In [None]:
# Plot dependence plots for top features
top_features = pd.DataFrame({
    'feature': feature_names,
    'mean_abs_shap': np.mean(np.abs(rf_shap.values), axis=0)
}).sort_values('mean_abs_shap', ascending=False).head(5)['feature'].tolist()

for feature in top_features:
    shap.dependence_plot(
        feature,
        rf_shap.values,
        X_test,
        feature_names=feature_names,
        interaction_index=None,
        show=False
    )
    plt.title(f'SHAP Dependence Plot for {feature}')
    plt.tight_layout()
    plt.show()

## SHAP Analysis for XGBoost

In [None]:
xgb_model = joblib.load('models/xgboost_model.joblib')
xgb_shap = explainer.shap_analysis(xgb_model, X_train, X_test, 'xgboost')

### Summary Plot

In [None]:
plt.figure()
shap.summary_plot(xgb_shap, X_test, feature_names=feature_names, show=False)
plt.title('XGBoost SHAP Summary Plot')
plt.tight_layout()
plt.show()

### Force Plot for a Specific Instance

In [None]:
# Find a fraud case
fraud_idx = y_test[y_test == 1].index[0]
fraud_instance = X_test.loc[fraud_idx]

# Generate force plot
shap.force_plot(
    xgb_shap.base_values[0],
    xgb_shap.values[y_test == 1][0],
    fraud_instance,
    feature_names=feature_names,
    matplotlib=True,
    show=False
)
plt.title(f'SHAP Force Plot for Fraud Instance {fraud_idx}')
plt.tight_layout()
plt.show()