# ðŸŒ´ðŸŒŠ SawitFlood Lab - SHAP Analysis

Interpretasi Model Klasifikasi Risiko Banjir menggunakan SHAP (SHapley Additive exPlanations)


In [None]:
import sys
from pathlib import Path
import pickle, json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import shap

PROJECT_ROOT = Path.cwd().parent
sys.path.insert(0, str(PROJECT_ROOT))

In [None]:
# Load model and data
models_dir = PROJECT_ROOT / "models"
model_files = list(models_dir.glob("flood_risk_*.pkl"))
if model_files:
    latest_model = sorted(model_files)[-1]
    with open(latest_model, "rb") as f:
        model = pickle.load(f)
    metadata_path = models_dir / f"{latest_model.stem}_metadata.json"
    if metadata_path.exists():
        with open(metadata_path, "r") as f:
            metadata = json.load(f)
        feature_names = metadata.get("feature_names", [])
    print(f"Loaded model: {latest_model.name}")
else:
    print("No model found. Run 02_modeling_risk.ipynb first.")

In [None]:
# Load dataset
processed_dir = PROJECT_ROOT / "data" / "processed"
if (processed_dir / "analysis_dataset.parquet").exists():
    df = pd.read_parquet(processed_dir / "analysis_dataset.parquet")
elif (processed_dir / "analysis_dataset.csv").exists():
    df = pd.read_csv(processed_dir / "analysis_dataset.csv")
X = df[feature_names].fillna(df[feature_names].median())
print(f"Features shape: {X.shape}")

## Compute SHAP Values


In [None]:
# Compute SHAP values
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X)
print("SHAP values computed!")

In [None]:
# SHAP Summary Plot
shap.summary_plot(shap_values, X, plot_type="bar", show=False)
plt.title("SHAP Feature Importance")
plt.tight_layout()
plt.savefig(PROJECT_ROOT / "outputs" / "figures" / "shap_summary.png", dpi=150)
plt.show()

In [None]:
# SHAP Beeswarm Plot
shap.summary_plot(shap_values, X, show=False)
plt.title("SHAP Feature Impact")
plt.tight_layout()
plt.savefig(PROJECT_ROOT / "outputs" / "figures" / "shap_beeswarm.png", dpi=150)
plt.show()

## Key Insights

Dari analisis SHAP, kita dapat melihat:
- **Faktor dominan** yang mempengaruhi risiko banjir
- **Arah pengaruh** setiap fitur (positif/negatif)
- **Interaksi** antar fitur

---
*SawitFlood Lab - Environmental Risk Analysis*
