In [1]:
!pip install shap scikit-learn pandas -q

In [2]:
# Task: Import Libraries
import json
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import shap
import datetime

In [3]:
# Load Dataset and Extract Sensitive Attribute
# # Load Adult Census dataset
adult_data = fetch_openml(name='adult', version=2, as_frame=True)

# Original DataFrame (to preserve original 'sex' column)
adult_original = adult_data.data.copy()

# Features and target
X_orig = adult_original.copy()
y = adult_data.target.map({'>50K': 1, '<=50K': 0})

print("Dataset shape:", X_orig.shape)
print("Target distribution:\n", y.value_counts())

# Extract sensitive attribute 'sex' (1=Male, 0=Female) for auditing
sensitive_attr = X_orig['sex'].map({'Male': 1, 'Female': 0}).astype(int)

Dataset shape: (48842, 14)
Target distribution:
 class
0    37155
1    11687
Name: count, dtype: int64


In [4]:
#  Encode Features (One-Hot) and Reattach Sensitive Attribute
# Drop the original 'sex' column before encoding so it isn't duplicated
X_to_encode = X_orig.drop(columns=['sex'])

# One-hot encode categorical variables into numeric form
X_encoded = pd.get_dummies(X_to_encode, drop_first=True)

# Reattach sensitive attribute (kept as a single column for bias checks)
X_encoded['sex_attr'] = sensitive_attr.values

# Inspect encoded shape and a small sample
print("Encoded feature shape:", X_encoded.shape)
display(X_encoded.columns[:40])  # first few columns for sanity


Encoded feature shape: (48842, 97)


Index(['age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss',
       'hours-per-week', 'workclass_Local-gov', 'workclass_Never-worked',
       'workclass_Private', 'workclass_Self-emp-inc',
       'workclass_Self-emp-not-inc', 'workclass_State-gov',
       'workclass_Without-pay', 'education_11th', 'education_12th',
       'education_1st-4th', 'education_5th-6th', 'education_7th-8th',
       'education_9th', 'education_Assoc-acdm', 'education_Assoc-voc',
       'education_Bachelors', 'education_Doctorate', 'education_HS-grad',
       'education_Masters', 'education_Preschool', 'education_Prof-school',
       'education_Some-college', 'marital-status_Married-AF-spouse',
       'marital-status_Married-civ-spouse',
       'marital-status_Married-spouse-absent', 'marital-status_Never-married',
       'marital-status_Separated', 'marital-status_Widowed',
       'occupation_Armed-Forces', 'occupation_Craft-repair',
       'occupation_Exec-managerial', 'occupation_Farming-fishing',

In [5]:
# : Train-Test Split (encoded data)
X_train, X_test, y_train, y_test = train_test_split(
    X_encoded, y, test_size=0.3, random_state=42, stratify=y
)

print("Train shape:", X_train.shape, "Test shape:", X_test.shape)


Train shape: (34189, 97) Test shape: (14653, 97)


In [6]:
#  Train RandomForest Model
clf = RandomForestClassifier(n_estimators=200, random_state=42, n_jobs=-1)
clf.fit(X_train, y_train)


In [7]:
#  Model Evaluation
y_pred = clf.predict(X_test)
print("Classification Report:\n")
print(classification_report(y_test, y_pred, digits=4))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


Classification Report:

              precision    recall  f1-score   support

           0     0.8858    0.9307    0.9077     11147
           1     0.7375    0.6187    0.6729      3506

    accuracy                         0.8561     14653
   macro avg     0.8117    0.7747    0.7903     14653
weighted avg     0.8504    0.8561    0.8515     14653

Confusion Matrix:
 [[10375   772]
 [ 1337  2169]]


In [8]:
# Task: Create Audit Log Entry
audit_log = {
    "timestamp": datetime.datetime.now().isoformat(),
    "classification_report": classification_report(y_test, y_pred, output_dict=True),
    "confusion_matrix": confusion_matrix(y_test, y_pred).tolist()
}
print("Audit log created with timestamp:", audit_log["timestamp"])


Audit log created with timestamp: 2025-08-14T08:43:04.189398


In [9]:
#  Bias Detection by Sex (sensitive attribute)

# Compose test results with sensitive attribute
test_results = X_test.copy()

# Ensure numeric type for aggregations
test_results['actual'] = y_test.astype(int).values
test_results['predicted'] = y_pred.astype(int)

# Group by sensitive attribute and compute outcome rates
grouped = test_results.groupby('sex_attr').agg(
    total=('actual', 'count'),
    positive_rate_actual=('actual', lambda x: np.mean(x.astype(int))),
    positive_rate_predicted=('predicted', lambda x: np.mean(x.astype(int)))
)

# Rename index values for clarity
grouped.index = grouped.index.map({0: 'Female', 1: 'Male'})

print("Outcome rates by Sex:\n")
print(grouped)

# Append bias info to audit log
audit_log['bias_by_sex'] = grouped.to_dict()


Outcome rates by Sex:

          total  positive_rate_actual  positive_rate_predicted
sex_attr                                                      
Female     4857              0.113856                 0.084414
Male       9796              0.301450                 0.258371


In [None]:
# Interpret Model with SHAP (explainability)

# Ensure we only use the numeric feature set used for training
# (exclude sensitive columns like 'sex_attr' that were added later)
X_train_features = X_train.drop(columns=['sex_attr'])
X_test_features = X_test.drop(columns=['sex_attr'])

# Make sure all data is float for SHAP compatibility
X_train_features = X_train_features.astype(float)
X_test_features = X_test_features.astype(float)

# Create TreeExplainer
explainer = shap.TreeExplainer(clf, data=X_train_features, model_output="probability")
shap_values = explainer.shap_values(X_test_features)

# Handle SHAP output format differences
if isinstance(shap_values, (list, tuple)):
    shap_vals_for_pos_class = shap_values[1]  # Positive class (>50K)
else:
    shap_vals_for_pos_class = shap_values

# Confirm shapes align
print("SHAP values shape:", getattr(shap_vals_for_pos_class, "shape", None))
print("X_test_features shape:", X_test_features.shape)

# Show SHAP summary plots
shap.summary_plot(shap_vals_for_pos_class, X_test_features, plot_type="bar", show=True)
shap.summary_plot(shap_vals_for_pos_class, X_test_features, show=True)


  5%|=                   | 1528/29306 [04:47<86:57]       

In [10]:
# Task: Generate Compliance Report & Save Audit Artifacts
# Prepare textual compliance note
bias_report_text = grouped.to_string()

compliance_notes = f"""
Model Compliance and Audit Report
--------------------------------
Timestamp: {audit_log['timestamp']}

Bias Detection:
Outcome disparities between sex groups:
{bias_report_text}

Model Performance:
- Classification report available in audit log
- Confusion matrix: {audit_log['confusion_matrix']}

Transparency:
- SHAP explanations generated for the positive class (>50K)
- Top features can be reviewed via SHAP summary plots

Governance & Auditing:
- Audit log contains timestamped metrics, confusion matrix and bias summary
- Artifacts (audit_log.json and compliance_report.txt) saved for review
"""

print(compliance_notes)

# Save audit_log JSON and compliance note text to disk (Colab /drive or local runtime)
with open("audit_log.json", "w") as f:
    json.dump(audit_log, f, indent=2)

with open("compliance_report.txt", "w") as f:
    f.write(compliance_notes)

print("Saved files: audit_log.json, compliance_report.txt")



Model Compliance and Audit Report
--------------------------------
Timestamp: 2025-08-14T08:43:04.189398

Bias Detection:
Outcome disparities between sex groups:
          total  positive_rate_actual  positive_rate_predicted
sex_attr                                                      
Female     4857              0.113856                 0.084414
Male       9796              0.301450                 0.258371

Model Performance:
- Classification report available in audit log
- Confusion matrix: [[10375, 772], [1337, 2169]]

Transparency:
- SHAP explanations generated for the positive class (>50K)
- Top features can be reviewed via SHAP summary plots

Governance & Auditing:
- Audit log contains timestamped metrics, confusion matrix and bias summary
- Artifacts (audit_log.json and compliance_report.txt) saved for review

Saved files: audit_log.json, compliance_report.txt
