In [1]:
import shap
import joblib
import pickle
import matplotlib.pyplot as plt
import os

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def generate_shap_explanations(model, X, output_dir="shap_visualizations"):
    """
    Generates SHAP explanations for the model and saves visualizations.

    Parameters:
        model: Trained model (e.g., XGBoost, RandomForest).
        X: Feature matrix (DataFrame or array).
        output_dir: Directory to save SHAP visualizations.

    Returns:
        shap_values: Computed SHAP values for the dataset.
    """
    os.makedirs(output_dir, exist_ok=True)

    # Use TreeExplainer for tree-based models
    explainer = shap.TreeExplainer(model)
    shap_values = explainer.shap_values(X)

    # Summary Plot
    shap.summary_plot(shap_values, X, show=False)
    summary_path = os.path.join(output_dir, "summary_plot.png")
    plt.savefig(summary_path, bbox_inches="tight")
    plt.clf()  # Clear the plot for the next visualization

    # Feature Importance Plot
    shap.summary_plot(shap_values, X, plot_type="bar", show=False)
    importance_path = os.path.join(output_dir, "feature_importance.png")
    plt.savefig(importance_path, bbox_inches="tight")
    plt.clf()

    print(f"SHAP visualizations saved in {output_dir}")
    return shap_values


In [3]:
def generate_force_plot_html(model, X_sample, shap_values, output_file="shap_visualizations/force_plot.html"):
    """
    Generates an interactive SHAP force plot and saves it as an HTML file.

    Parameters:
        model: Trained model (e.g., XGBoost, RandomForest).
        X_sample: A single sample (DataFrame row) from the dataset.
        shap_values: SHAP values corresponding to the sample.
        output_file: File path to save the interactive force plot.
    """
    explainer = shap.TreeExplainer(model)

    # Generate the force plot
    force_plot = shap.force_plot(
        base_value=explainer.expected_value,
        shap_values=shap_values,
        features=X_sample,
        matplotlib=False
    )

    # Save the force plot as an HTML file
    shap.save_html(output_file, force_plot)
    print(f"Force plot saved to {output_file}")

In [4]:
def generate_dependence_plot(feature, shap_values, X, output_file="shap_visualizations/dependence_plot.png"):
    """
    Generates a dependence plot for a specified feature and saves it as an image.

    Parameters:
        feature: The feature name for which the dependence plot is generated.
        shap_values: SHAP values corresponding to the dataset.
        X: Feature matrix (DataFrame or array).
        output_file: File path to save the dependence plot.
    """
    plt.figure(figsize=(12, 8))  # Adjust size for better visibility
    shap.dependence_plot(feature, shap_values, X, show=False)
    plt.savefig(output_file, bbox_inches="tight")
    plt.close()
    print(f"Dependence plot saved to {output_file}")

In [5]:
if __name__ == "__main__":
    # Load the best model and the test dataset
    best_model = joblib.load('../Models/fraud_detection_xgboost_model.pkl')
    X_test = pickle.load(open('../dataset/splits_pkl/X_test.pkl', 'rb'))

    # Generate and save SHAP visualizations
    shap_values = generate_shap_explanations(best_model, X_test)

    # Force Plot for the first sample
    X_sample = X_test.iloc[0, :]
    generate_force_plot_html(
        model=best_model,
        X_sample=X_sample,
        shap_values=shap_values[0],
        output_file="shap_visualizations/force_plot.html"
    )

    # Dependence Plot for feature "V1"
    generate_dependence_plot(
        feature="V1",
        shap_values=shap_values,
        X=X_test,
        output_file="shap_visualizations/dependence_plot_V1.png"
    )

SHAP visualizations saved in shap_visualizations
Force plot saved to shap_visualizations/force_plot.html
Dependence plot saved to shap_visualizations/dependence_plot_V1.png


<Figure size 800x950 with 0 Axes>

<Figure size 1200x800 with 0 Axes>