# 09 - Explainability & Interpretability

**Overview:** Run SHAP/LIME analyses, attention probing, and create interpretable rule-based checks to validate model behavior.

In [None]:
# Setup: explainability skeleton (SHAP / LIME)
import shap
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from src.utils import set_seed

set_seed(42)

model_name = "distilbert-base-uncased"
print("Loading model and tokenizer for explainability (this may be slow)...")

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Define a prediction function for SHAP

def predict_fn(texts):
    enc = tokenizer(texts, truncation=True, padding=True, return_tensors="pt")
    with torch.no_grad():
        out = model(**{k: v.to(model.device) for k, v in enc.items()})
    probs = torch.nn.functional.softmax(out.logits, dim=-1).cpu().numpy()
    return probs

print("Explainability skeleton ready. Use SHAP/LIME on small subsets for debugging.")

# 09 - Explainability & Interpretability

**Objectives:**
- SHAP / LIME analyses on best models
- Attention visualization and probing classifiers
- Build interpretable rule-based classifier