In [1]:
import pandas as pd
import numpy as np

np.random.seed(42)

# Synthetic Healthcare Dataset
n = 1000
genders = ["Male", "Female", "Other"]
regions = ["Urban", "Rural"]

data = pd.DataFrame({
    "Patient_ID": range(1, n+1),
    "Age": np.random.randint(18, 90, n),
    "Gender": np.random.choice(genders, n, p=[0.48, 0.48, 0.04]),
    "Region": np.random.choice(regions, n, p=[0.65, 0.35]),
    "BMI": np.round(np.random.normal(27, 5, n), 1),
    "Blood_Pressure": np.random.randint(80, 180, n),
    "Cholesterol": np.random.randint(150, 300, n),
})

# Simulate a diagnosis variable with bias — urban patients are more often diagnosed
data["Heart_Disease"] = np.where(
    (data["Cholesterol"] > 220) & (data["Blood_Pressure"] > 140),
    np.random.choice([1, 0], n, p=[0.7, 0.3]),
    np.random.choice([1, 0], n, p=[0.3, 0.7])
)

# Introduce small bias: slightly higher positive rate for Urban patients
mask_urban = data["Region"] == "Urban"
random_choices_urban = np.random.choice([1, 0], n, p=[0.55, 0.45]) # Generate choices for all rows
data.loc[mask_urban, "Heart_Disease"] = random_choices_urban[mask_urban] # Apply choices only to urban rows


data.head()

Unnamed: 0,Patient_ID,Age,Gender,Region,BMI,Blood_Pressure,Cholesterol,Heart_Disease
0,1,69,Female,Rural,19.9,167,216,0
1,2,32,Male,Urban,28.1,93,220,1
2,3,89,Female,Urban,22.7,113,295,0
3,4,78,Male,Urban,21.8,137,255,1
4,5,38,Male,Urban,24.8,155,156,0


### Part A — Operationalizing Ethics in Development

In [2]:
# Task 1 – Ethics by Design: Bias Detection
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Prepare data
X = pd.get_dummies(data.drop(columns=["Patient_ID", "Heart_Disease"]), drop_first=True)
y = data["Heart_Disease"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train baseline model
model = RandomForestClassifier(n_estimators=50, random_state=42)
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print("Baseline Accuracy:", accuracy_score(y_test, y_pred))

# Bias check by Gender
df_test = X_test.copy()
df_test["y_true"] = y_test
df_test["y_pred"] = y_pred
df_test["Gender"] = data.loc[y_test.index, "Gender"]

bias_gender = df_test.groupby("Gender")["y_pred"].mean()
print("\nPositive prediction rate by Gender:\n", bias_gender)

Baseline Accuracy: 0.62

Positive prediction rate by Gender:
 Gender
Female    0.397163
Male      0.383562
Other     1.000000
Name: y_pred, dtype: float64


In [3]:
# Task 2 – Ethical Test Cases in Development Pipeline
def fairness_check(df, sensitive_attr, threshold=0.1):
    rates = df.groupby(sensitive_attr)["y_pred"].mean()
    disparity = rates.max() - rates.min()
    print(f"{sensitive_attr} Disparity: {disparity:.3f}")
    return disparity <= threshold

# Run check
if not fairness_check(df_test, "Gender"):
    print("❌ Fairness check failed. Mitigation required before deployment.")
else:
    print("✅ Fairness check passed.")

Gender Disparity: 0.616
❌ Fairness check failed. Mitigation required before deployment.


In [4]:
# Task 3 – Ethics-Aware Feature Review Gate
importances = pd.Series(model.feature_importances_, index=X.columns)
importances.sort_values(ascending=False).head(10)

Unnamed: 0,0
Cholesterol,0.250548
Blood_Pressure,0.231488
BMI,0.223515
Age,0.218556
Region_Urban,0.036144
Gender_Male,0.028678
Gender_Other,0.011071


### Part B — Operationalizing Ethics in Deployment

In [5]:
# Task 4 – Logging & Accountability in Predictions
import datetime
prediction_log = []

def predict_and_log(patient_data):
    pred = model.predict(patient_data)[0]
    log_entry = {
        "timestamp": datetime.datetime.now().isoformat(),
        "input": patient_data.to_dict(orient='records')[0],
        "prediction": int(pred)
    }
    prediction_log.append(log_entry)
    return pred

# Simulate one prediction
sample_patient = X_test.iloc[[0]]
predict_and_log(sample_patient)

# Inspect logs
prediction_log[:3]

[{'timestamp': '2025-08-13T09:51:21.951063',
  'input': {'Age': 73,
   'BMI': 35.0,
   'Blood_Pressure': 161,
   'Cholesterol': 205,
   'Gender_Male': False,
   'Gender_Other': False,
   'Region_Urban': True},
  'prediction': 0}]

In [6]:
# Task 5 – Real-Time Ethical Monitoring
# Simulate monitoring loop
latest_preds = y_pred
dp_diff = bias_gender.max() - bias_gender.min()

if dp_diff > 0.1:
    print("⚠️ Alert: Fairness threshold exceeded post-deployment.")
else:
    print("✅ Fairness levels acceptable.")

⚠️ Alert: Fairness threshold exceeded post-deployment.


In [7]:
# Task 6 – Privacy in Deployment
import hashlib
data["Patient_ID"] = data["Patient_ID"].apply(lambda x: hashlib.sha256(str(x).encode()).hexdigest())
data.head()

Unnamed: 0,Patient_ID,Age,Gender,Region,BMI,Blood_Pressure,Cholesterol,Heart_Disease
0,6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d...,69,Female,Rural,19.9,167,216,0
1,d4735e3a265e16eee03f59718b9b5d03019c07d8b6c51f...,32,Male,Urban,28.1,93,220,1
2,4e07408562bedb8b60ce05c1decfe3ad16b72230967de0...,89,Female,Urban,22.7,113,295,0
3,4b227777d4dd1fc61c6f884f48641d02b4d121d3fd328c...,78,Male,Urban,21.8,137,255,1
4,ef2d127de37b942baad06145e54b0c619a1f22327b2ebb...,38,Male,Urban,24.8,155,156,0


In [8]:
# Task 7 – Inclusivity Feedback Loop
# Fake feedback data from underrepresented group
feedback = pd.DataFrame({
    "Gender": ["Other", "Other"],
    "Region": ["Rural", "Rural"],
    "BMI": [25, 28],
    "Blood_Pressure": [130, 150],
    "Cholesterol": [210, 240],
    "Heart_Disease": [1, 1]
})

# Append to dataset for future retraining
data_updated = pd.concat([data, feedback], ignore_index=True)
print("Updated dataset size:", data_updated.shape)

Updated dataset size: (1002, 8)


In [10]:
# === Cell 1 (fixed): Automated Fairness & Feature Review ===
def automated_bias_and_feature_review(model, X_test_model, X_test_labels, y_test, sensitive_attrs, disparity_threshold=0.1):
    """
    Runs fairness checks for multiple sensitive attributes and flags high-importance socio-demographic features.
    """
    import pandas as pd

    # Predictions on model-compatible test data
    y_pred = model.predict(X_test_model)

    # For fairness analysis, attach labels but keep predictions
    df_test = X_test_labels.copy()
    df_test["y_pred"] = y_pred
    df_test["y_true"] = y_test.values

    issues_found = False

    # --- Fairness check ---
    for attr in sensitive_attrs:
        rates = df_test.groupby(attr)["y_pred"].mean()
        disparity = rates.max() - rates.min()
        print(f"{attr} Disparity: {disparity:.3f}")
        if disparity > disparity_threshold:
            print(f"❌ Fairness check failed for {attr}")
            issues_found = True
        else:
            print(f"✅ Fairness check passed for {attr}")

    # --- Feature importance review ---
    importances = pd.Series(model.feature_importances_, index=X_test_model.columns).sort_values(ascending=False)
    socio_features = [f for f in importances.index if "Gender" in f or "Region" in f]
    flagged_features = [f for f in socio_features if importances[f] > 0.05]

    if flagged_features:
        print(f"⚠️ High importance socio-demographic features: {flagged_features}")
        issues_found = True
    else:
        print("✅ No high-importance socio-demographic features detected.")

    return not issues_found


# --- Prepare both datasets ---
# 1. Model-ready features (already one-hot encoded, used for training/prediction)
X_test_model = X_test.copy()

# 2. Label-based dataset for fairness grouping (use original columns from data)
X_test_labels = data.loc[y_test.index, ["Gender", "Region"]].reset_index(drop=True)

# Run automated check
ci_cd_pass = automated_bias_and_feature_review(
    model,
    X_test_model,
    X_test_labels,
    y_test,
    sensitive_attrs=["Gender", "Region"]
)

print("CI/CD Gate:", "✅ Passed" if ci_cd_pass else "❌ Blocked until issues are resolved")


Gender Disparity: 0.616
❌ Fairness check failed for Gender
Region Disparity: 0.394
❌ Fairness check failed for Region
✅ No high-importance socio-demographic features detected.
CI/CD Gate: ❌ Blocked until issues are resolved


In [11]:
# ===  2: Differential Privacy Logging ===
import random

def add_laplace_noise(value, sensitivity=1.0, epsilon=1.0):
    """
    Adds Laplace noise for differential privacy.
    """
    scale = sensitivity / epsilon
    noise = random.uniform(-scale, scale)
    return value + noise

dp_prediction_log = []

def dp_predict_and_log(patient_data):
    pred = model.predict(patient_data)[0]

    # Add noise to numeric fields to protect privacy
    noisy_input = {}
    for k, v in patient_data.to_dict(orient='records')[0].items():
        if isinstance(v, (int, float)):
            noisy_input[k] = round(add_laplace_noise(v, sensitivity=5, epsilon=0.5), 2)
        else:
            noisy_input[k] = v

    log_entry = {
        "timestamp": datetime.datetime.now().isoformat(),
        "input": noisy_input,
        "prediction": int(pred)
    }
    dp_prediction_log.append(log_entry)
    return pred

# Example usage
sample_patient = X_test.iloc[[1]]
dp_predict_and_log(sample_patient)

# Show DP log
dp_prediction_log[:2]


[{'timestamp': '2025-08-13T09:55:13.803819',
  'input': {'Age': 66.67,
   'BMI': 14.08,
   'Blood_Pressure': 164.53,
   'Cholesterol': 151.99,
   'Gender_Male': -3.06,
   'Gender_Other': -6.29,
   'Region_Urban': 8.16},
  'prediction': 0}]