<a href="https://colab.research.google.com/github/appliedcode/mthree-c422/blob/mthree-c422-Likhitha/AI_Audit_Governance_Practice_3_md.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip -q install shap scikit-learn pandas numpy matplotlib

import json, datetime as dt
import numpy as np
import pandas as pd
import shap
import matplotlib.pyplot as plt

from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (roc_auc_score, confusion_matrix, classification_report)

In [None]:
adult = fetch_openml(name="adult", version=2, as_frame=True)
X = adult.data.copy()
y = adult.target.map({">50K": 1, "<=50K": 0}).astype(int)

print("Dataset shape:", X.shape)
print("Target distribution:\n", y.value_counts())

# Protected attribute for fairness analysis
PROTECTED_COL = "sex"   # Male / Female

# Train/val split (stratify on y to keep class balance)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.30, random_state=42, stratify=y
)

In [None]:
cat_cols = X.select_dtypes(include=["object", "category"]).columns.tolist()
num_cols = [c for c in X.columns if c not in cat_cols]

# NOTE: scikit-learn >=1.2 replaced OneHotEncoder(sparse=...) with sparse_output=...
# We set sparse_output=False so the pipeline returns a dense array (handy for SHAP).
preproc = ColumnTransformer(
    transformers=[
        ("cats", Pipeline(steps=[
            ("imputer", SimpleImputer(strategy="most_frequent")),
            ("onehot", OneHotEncoder(handle_unknown="ignore", sparse_output=False))
        ]), cat_cols),
        ("nums", Pipeline(steps=[
            ("imputer", SimpleImputer(strategy="median"))
        ]), num_cols),
    ],
    remainder="drop",
)

clf = RandomForestClassifier(
    n_estimators=400,
    random_state=42,
    n_jobs=-1,
    class_weight="balanced_subsample",
)

pipe = Pipeline(steps=[("prep", preproc), ("clf", clf)])
pipe.fit(X_train, y_train)

y_prob = pipe.predict_proba(X_test)[:, 1]
y_pred = (y_prob >= 0.5).astype(int)

print("\n=== Performance ===")
print("ROC-AUC:", roc_auc_score(y_test, y_prob))
print("Confusion matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification report:\n", classification_report(y_test, y_pred))

In [None]:
def demographic_parity(y_hat: np.ndarray, group: pd.Series):
    """Returns (DP difference, group positive rates dict)."""
    rates = y_hat.groupby(group).mean() if isinstance(y_hat, pd.Series) else pd.Series(y_hat).groupby(group).mean()
    return float(rates.max() - rates.min()), rates.to_dict()

def equal_opportunity(y_true: np.ndarray, y_hat: np.ndarray, group: pd.Series):
    """TPR difference across groups and TPRs per group."""
    df = pd.DataFrame({"y_true": y_true, "y_hat": y_hat, "group": group.values})
    tprs = {}
    for g, sub in df.groupby("group"):
        # TPR = TP / (TP + FN) among the positives
        pos = sub[sub.y_true == 1]
        tpr = (pos.y_hat == 1).mean() if len(pos) > 0 else np.nan
        tprs[g] = float(tpr if not np.isnan(tpr) else 0.0)
    diff = max(tprs.values()) - min(tprs.values()) if tprs else 0.0
    return float(diff), tprs

def disparate_impact_ratio(y_hat: np.ndarray, group: pd.Series, privileged_value="Male"):
    """min(rate)/max(rate) across groups (or unprivileged/privileged by label)."""
    rates = y_hat.groupby(group).mean() if isinstance(y_hat, pd.Series) else pd.Series(y_hat).groupby(group).mean()
    # General DI: min(rate)/max(rate)
    di = float(rates.min() / rates.max()) if rates.max() > 0 else 0.0
    return di, rates.to_dict()

# Get the protected attribute aligned to test indices
g_test = X_test[PROTECTED_COL].reset_index(drop=True)
y_test_s = pd.Series(y_test).reset_index(drop=True)
y_pred_s = pd.Series(y_pred).reset_index(drop=True)

dp_diff, dp_rates = demographic_parity(y_pred_s, g_test)
eo_diff, eo_tprs  = equal_opportunity(y_test_s, y_pred_s, g_test)
di_ratio, di_rates = disparate_impact_ratio(y_pred_s, g_test)

print("\n=== Fairness (by sex) ===")
print(f"Demographic Parity Difference: {dp_diff:.4f}")
print(f"Equal Opportunity Difference : {eo_diff:.4f}")
print(f"Disparate Impact Ratio       : {di_ratio:.4f}")
print("Group positive rates:", dp_rates)
print("Group TPRs:", eo_tprs)

# Policy thresholds (example values, adjust with Risk/Compliance)
DP_BOUND = 0.10        # <= acceptable DP difference
EO_BOUND = 0.10        # <= acceptable EO difference
DI_LOWER_BOUND = 0.80  # >= acceptable DI ratio (80% rule)
fairness_pass = (dp_diff <= DP_BOUND) and (eo_diff <= EO_BOUND) and (di_ratio >= DI_LOWER_BOUND)


In [None]:
X_test_enc = pipe.named_steps["prep"].transform(X_test)
feature_names = pipe.named_steps["prep"].get_feature_names_out()

# Train a SHAP TreeExplainer on the fitted RandomForest
explainer = shap.TreeExplainer(pipe.named_steps["clf"])
shap_values = explainer.shap_values(X_test_enc)

# Handle possible SHAP return shapes across versions:
# - Newer binary tree models: np.ndarray (n_samples, n_features)
# - Older versions: list [neg, pos] → pick positive class
if isinstance(shap_values, (list, tuple)):
    shap_for_pos = shap_values[1] if len(shap_values) > 1 else shap_values[0]
else:
    shap_for_pos = shap_values

# Sanity check
if shap_for_pos.shape[1] != len(feature_names):
    raise ValueError(
        f"SHAP feature length mismatch: shap_features={shap_for_pos.shape[1]} "
        f"vs names={len(feature_names)}"
    )

# Mean |SHAP| per feature
mean_abs_shap = np.abs(shap_for_pos).mean(axis=0)
top_idx = np.argsort(mean_abs_shap)[::-1][:20]
top_features = pd.DataFrame({
    "feature": np.array(feature_names)[top_idx],
    "mean_abs_shap": mean_abs_shap[top_idx]
})

print("\n=== Top features by mean |SHAP| ===")
print(top_features.to_string(index=False))

# Plot summary bar (compact, stable in notebooks)
print("\nRendering SHAP summary bar plot...")
shap.summary_plot(shap_for_pos, features=X_test_enc, feature_names=feature_names,
                  plot_type="bar", show=True)


In [None]:
X2_train = X_train.drop(columns=[PROTECTED_COL])
X2_test  = X_test.drop(columns=[PROTECTED_COL])

# Rebuild preprocessor for the new schema
cat2 = X2_train.select_dtypes(include=["object", "category"]).columns.tolist()
num2 = [c for c in X2_train.columns if c not in cat2]

preproc2 = ColumnTransformer(
    transformers=[
        ("cats", Pipeline(steps=[
            ("imputer", SimpleImputer(strategy="most_frequent")),
            ("onehot", OneHotEncoder(handle_unknown="ignore", sparse_output=False))
        ]), cat2),
        ("nums", Pipeline(steps=[
            ("imputer", SimpleImputer(strategy="median"))
        ]), num2),
    ]
)

pipe2 = Pipeline(steps=[("prep", preproc2), ("clf", RandomForestClassifier(
    n_estimators=400, random_state=42, n_jobs=-1, class_weight="balanced_subsample"
))])

pipe2.fit(X2_train, y_train)
y2_prob = pipe2.predict_proba(X2_test)[:, 1]
y2_pred = (y2_prob >= 0.5).astype(int)

# Fairness on the same protected attribute (taken from original X_test)
dp2, _ = demographic_parity(pd.Series(y2_pred), g_test)
eo2, _ = equal_opportunity(pd.Series(y_test).reset_index(drop=True), pd.Series(y2_pred), g_test)
di2, _ = disparate_impact_ratio(pd.Series(y2_pred), g_test)

print("\n=== Sex-blind Variant (removed 'sex') ===")
print("ROC-AUC:", roc_auc_score(y_test, y2_prob))
print(f"DP diff: {dp2:.4f}, EO diff: {eo2:.4f}, DI ratio: {di2:.4f}")


In [None]:
audit_log = {
    "timestamp": dt.datetime.utcnow().isoformat() + "Z",
    "dataset": "Adult (Census Income) — OpenML",
    "protected_attribute": PROTECTED_COL,
    "library_versions": {
        "sklearn": __import__("sklearn").__version__,
        "pandas": pd.__version__,
        "numpy": np.__version__,
        "shap": shap.__version__,
    },
    "model": {
        "family": "RandomForestClassifier",
        "n_estimators": 400,
        "class_weight": "balanced_subsample",
        "random_state": 42
    },
    "performance": {
        "roc_auc": float(roc_auc_score(y_test, y_prob)),
        "confusion_matrix": confusion_matrix(y_test, y_pred).tolist(),
        "classification_report": classification_report(y_test, y_pred, output_dict=True),
    },
    "fairness": {
        "demographic_parity_difference": float(dp_diff),
        "equal_opportunity_difference": float(eo_diff),
        "disparate_impact_ratio": float(di_ratio),
        "group_positive_rates": dp_rates,
        "group_tprs": eo_tprs,
        "policy_thresholds": {
            "dp_max": DP_BOUND,
            "eo_max": EO_BOUND,
            "di_min": DI_LOWER_BOUND
        },
        "fairness_gate_pass": bool(fairness_pass),
    },
    "explainability": {
        "top_features_by_mean_abs_shap": top_features.to_dict(orient="records")
    },
    "mitigation_experiment": {
        "sex_blind_variant": {
            "roc_auc": float(roc_auc_score(y_test, y2_prob)),
            "dp_diff": float(dp2),
            "eo_diff": float(eo2),
            "di_ratio": float(di2),
        }
    }
}

print("\n=== Audit Log (preview) ===")
print(json.dumps({
    "timestamp": audit_log["timestamp"],
    "performance": {"roc_auc": audit_log["performance"]["roc_auc"]},
    "fairness": {
        "dp_diff": audit_log["fairness"]["demographic_parity_difference"],
        "eo_diff": audit_log["fairness"]["equal_opportunity_difference"],
        "di_ratio": audit_log["fairness"]["disparate_impact_ratio"],
        "fairness_gate_pass": audit_log["fairness"]["fairness_gate_pass"]
    }
}, indent=2))

# Optional: persist to file (uncomment to save in Colab)
# with open("adult_income_audit_log.json", "w") as f:
#     json.dump(audit_log, f, indent=2)

report = f"""
AI Governance & Compliance Report — Adult Income (>50K) Model

Timestamp (UTC): {audit_log['timestamp']}
Protected Attribute: {PROTECTED_COL}

Performance:
- ROC-AUC: {audit_log['performance']['roc_auc']:.3f}
- Confusion Matrix: {audit_log['performance']['confusion_matrix']}

Fairness:
- Demographic Parity Difference: {audit_log['fairness']['demographic_parity_difference']:.3f} (<= {DP_BOUND} → {'PASS' if dp_diff <= DP_BOUND else 'FAIL'})
- Equal Opportunity Difference : {audit_log['fairness']['equal_opportunity_difference']:.3f} (<= {EO_BOUND} → {'PASS' if eo_diff <= EO_BOUND else 'FAIL'})
- Disparate Impact Ratio       : {audit_log['fairness']['disparate_impact_ratio']:.3f} (>= {DI_LOWER_BOUND} → {'PASS' if di_ratio >= DI_LOWER_BOUND else 'FAIL'})
- Group Positive Rates         : {audit_log['fairness']['group_positive_rates']}
- Group TPRs                   : {audit_log['fairness']['group_tprs']}
- Overall Fairness Gate        : {'PASS' if fairness_pass else 'FAIL'}

Explainability:
- Top features by mean |SHAP| (top 10):
{top_features.head(10).to_string(index=False)}

Mitigation Experiment — sex-blind:
- ROC-AUC: {audit_log['mitigation_experiment']['sex_blind_variant']['roc_auc']:.3f}
- DP diff: {audit_log['mitigation_experiment']['sex_blind_variant']['dp_diff']:.3f}
- EO diff: {audit_log['mitigation_experiment']['sex_blind_variant']['eo_diff']:.3f}
- DI ratio: {audit_log['mitigation_experiment']['sex_blind_variant']['di_ratio']:.3f}

Recommendations:
1) Align fairness thresholds (DP/EO/DI) with your Risk & Compliance policy.
2) If fairness gate fails, consider: (a) removing/regularizing sensitive or proxy features,
   (b) reweighting or resampling to balance subgroups, (c) group-specific decision thresholds.
3) Log this audit (metrics + SHAP evidence) to a model registry and schedule periodic re-audits.
4) Publish a plain-language Model Card describing intended use, limits, and fairness findings.
"""
print("\n" + report)