<a href="https://colab.research.google.com/github/appliedcode/mthree-c422/blob/mthree-c422-Likhitha/AI_Audit_Governance_Practice_4_md.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# -----------------------------
# AI Governance: Bias Detection & SHAP Explainability
# -----------------------------
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import shap

# -----------------------------
# 1. Load Data
# -----------------------------
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data"
cols = [
    "age", "workclass", "fnlwgt", "education", "education_num",
    "marital_status", "occupation", "relationship", "race", "sex",
    "capital_gain", "capital_loss", "hours_per_week", "native_country", "income"
]
df = pd.read_csv(url, header=None, names=cols, na_values=" ?", skipinitialspace=True)

# Drop missing values
df.dropna(inplace=True)

# -----------------------------
# 2. Preprocessing
# -----------------------------
# Binary target encoding
df["income"] = df["income"].apply(lambda x: 1 if ">50K" in x else 0)

# One-hot encode categorical variables
df_encoded = pd.get_dummies(df, drop_first=True)

X = df_encoded.drop("income", axis=1)
y = df_encoded["income"]

# Train-test split
X_train, X_test, y_train, y_test, df_train, df_test = train_test_split(
    X, y, df, test_size=0.2, random_state=42, stratify=y
)

# -----------------------------
# 3. Train Model
# -----------------------------
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# -----------------------------
# 4. Model Performance
# -----------------------------
print("\nModel Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# -----------------------------
# 5. Bias Detection by Demographics
# -----------------------------
test_data = df_test.copy()
test_data["y_true"] = y_test
test_data["y_pred"] = y_pred

bias_stats = test_data.groupby("sex", group_keys=False).apply(
    lambda g: pd.Series({
        "count": len(g),
        "accuracy": accuracy_score(g["y_true"], g["y_pred"]),
        "positive_rate": np.mean(g["y_pred"])
    })
)

print("\nBias Statistics by Sex:\n", bias_stats)

# -----------------------------
# 6. Explainability with SHAP
# -----------------------------
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X_test)

# Handle binary vs multiclass SHAP output shape
if isinstance(shap_values, list):
    # For binary classification: take positive class
    shap_values_to_plot = shap_values[1]
else:
    shap_values_to_plot = shap_values

print("\nGenerating SHAP Summary Plot...")
shap.summary_plot(shap_values_to_plot, X_test, plot_type="bar")



Model Accuracy: 0.8607400583448488

Classification Report:
               precision    recall  f1-score   support

           0       0.89      0.93      0.91      4945
           1       0.75      0.63      0.69      1568

    accuracy                           0.86      6513
   macro avg       0.82      0.78      0.80      6513
weighted avg       0.86      0.86      0.86      6513


Bias Statistics by Sex:
          count  accuracy  positive_rate
sex                                    
Female  2158.0  0.936979       0.084801
Male    4355.0  0.822962       0.263146


  bias_stats = test_data.groupby("sex", group_keys=False).apply(
