<a href="https://colab.research.google.com/github/appliedcode/mthree-c422/blob/mthree-c422-Likhitha/Production_Ready.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import hashlib
import datetime
import csv
import os
import warnings
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

warnings.filterwarnings("ignore")
np.random.seed(42)

In [2]:
n = 1000
genders = ["Male", "Female", "Other"]
regions = ["Urban", "Rural"]

data = pd.DataFrame({
    "Patient_ID": range(1, n+1),
    "Age": np.random.randint(18, 90, n),
    "Gender": np.random.choice(genders, n, p=[0.48, 0.48, 0.04]),
    "Region": np.random.choice(regions, n, p=[0.65, 0.35]),
    "BMI": np.round(np.random.normal(27, 5, n), 1),
    "Blood_Pressure": np.random.randint(80, 180, n),
    "Cholesterol": np.random.randint(150, 300, n),
})

# baseline label logic
data["Heart_Disease"] = np.where(
    (data["Cholesterol"] > 220) & (data["Blood_Pressure"] > 140),
    np.random.choice([1, 0], n, p=[0.7, 0.3]),
    np.random.choice([1, 0], n, p=[0.3, 0.7])
)

# introduce small bias: slightly higher positive rate for urban patients
mask_urban = data["Region"] == "Urban"
rand_for_all = np.random.choice([1, 0], n, p=[0.55, 0.45])
data.loc[mask_urban, "Heart_Disease"] = rand_for_all[mask_urban]

print("Sample rows:")
print(data.head(), "\n")


Sample rows:
   Patient_ID  Age  Gender Region   BMI  Blood_Pressure  Cholesterol  \
0           1   69  Female  Rural  19.9             167          216   
1           2   32    Male  Urban  28.1              93          220   
2           3   89  Female  Urban  22.7             113          295   
3           4   78    Male  Urban  21.8             137          255   
4           5   38    Male  Urban  24.8             155          156   

   Heart_Disease  
0              0  
1              1  
2              0  
3              1  
4              0   



In [3]:
def pseudonymize_ids(series):
    return series.astype(str).apply(lambda x: hashlib.sha256(x.encode()).hexdigest())

def prepare_features(df, drop_sensitive=False):
    df_proc = df.copy()
    # optionally drop sensitive columns
    sensitive_cols = ["Gender", "Region"]
    if drop_sensitive:
        df_proc = df_proc.drop(columns=sensitive_cols)
    X = pd.get_dummies(df_proc.drop(columns=["Patient_ID","Heart_Disease"]), drop_first=True)
    return X

def train_random_forest(X_train, y_train, n_estimators=100, random_state=42, sample_weight=None):
    clf = RandomForestClassifier(n_estimators=n_estimators, random_state=random_state)
    clf.fit(X_train, y_train, sample_weight)
    return clf

def evaluate_by_group(df_features, y_true, y_pred, original_df, group_col):
    # original_df must align with df_features index
    df_eval = original_df.loc[y_true.index].copy()
    df_eval = df_eval.assign(y_true=y_true.values, y_pred=y_pred)
    rates = df_eval.groupby(group_col)["y_pred"].mean()
    return rates, df_eval

def fairness_check(df_eval, sensitive_attr, threshold=0.1):
    rates = df_eval.groupby(sensitive_attr)["y_pred"].mean()
    disparity = float(rates.max() - rates.min())
    print(f"{sensitive_attr} disparity = {disparity:.4f} (threshold {threshold})")
    return disparity <= threshold, disparity, rates

In [4]:
X = prepare_features(data, drop_sensitive=False)
y = data["Heart_Disease"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42, stratify=y)
print("Train/test sizes:", X_train.shape, X_test.shape)

model = train_random_forest(X_train, y_train, n_estimators=100)
y_pred = model.predict(X_test)
print("\nBaseline accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred, digits=3))

# Map group columns for evaluation (we need original data rows corresponding to test index)
# Construct df_test_original aligned with X_test.index
df_test_original = data.loc[X_test.index]

Train/test sizes: (700, 7) (300, 7)

Baseline accuracy: 0.5466666666666666
              precision    recall  f1-score   support

           0      0.548     0.563     0.556       151
           1      0.545     0.530     0.537       149

    accuracy                          0.547       300
   macro avg      0.547     0.547     0.546       300
weighted avg      0.547     0.547     0.547       300



In [5]:
print("\n-- Bias detection on baseline model --")
rates_gender, df_eval_gender = evaluate_by_group(X_test, y_test, y_pred, data, "Gender")
print("Positive prediction rate by Gender:\n", rates_gender, "\n")
ok_gender, disp_gender, rates_gender = fairness_check(df_eval_gender, "Gender", threshold=0.1)

rates_region, df_eval_region = evaluate_by_group(X_test, y_test, y_pred, data, "Region")
print("Positive prediction rate by Region:\n", rates_region, "\n")
ok_region, disp_region, rates_region = fairness_check(df_eval_region, "Region", threshold=0.1)

if not ok_gender or not ok_region:
    print("\n❌ Fairness check failed — disparity above threshold. Mitigation required before deployment.")
else:
    print("\n✅ Fairness check passed.")


-- Bias detection on baseline model --
Positive prediction rate by Gender:
 Gender
Female    0.490196
Male      0.455224
Other     0.692308
Name: y_pred, dtype: float64 

Gender disparity = 0.2371 (threshold 0.1)
Positive prediction rate by Region:
 Region
Rural    0.203252
Urban    0.677966
Name: y_pred, dtype: float64 

Region disparity = 0.4747 (threshold 0.1)

❌ Fairness check failed — disparity above threshold. Mitigation required before deployment.


In [6]:
print("\n-- Feature importance (top 10) --")
feat_imp = pd.Series(model.feature_importances_, index=X_train.columns).sort_values(ascending=False)
print(feat_imp.head(10).to_string())

# Flag if socio-demographic features are large contributors
sensitive_influence = 0.0
for col in X_train.columns:
    if col.startswith("Gender_") or col.startswith("Region_"):
        sensitive_influence += feat_imp.get(col, 0.0)
print(f"\nTotal importance attributed to Gender/Region columns: {sensitive_influence:.4f}")
if sensitive_influence > 0.05:
    print("Note: Sensitive attributes contribute non-trivially; consider mitigation (remove/reweight/regularize).")
else:
    print("Sensitive attributes contribution low.")


-- Feature importance (top 10) --
BMI               0.231879
Blood_Pressure    0.229189
Cholesterol       0.225865
Age               0.216455
Region_Urban      0.059267
Gender_Male       0.027622
Gender_Other      0.009722

Total importance attributed to Gender/Region columns: 0.0966
Note: Sensitive attributes contribute non-trivially; consider mitigation (remove/reweight/regularize).


In [7]:
print("\n-- Mitigation A: Retrain without sensitive features (Gender, Region) --")
X_nosens = prepare_features(data, drop_sensitive=True)
Xn_train, Xn_test, yn_train, yn_test = train_test_split(X_nosens, y, test_size=0.30, random_state=42, stratify=y)

model_nosens = train_random_forest(Xn_train, yn_train)
yn_pred = model_nosens.predict(Xn_test)
print("Accuracy (no sensitive features):", accuracy_score(yn_test, yn_pred))

# Evaluate disparity
df_eval_nosens = data.loc[Xn_test.index].assign(y_true=yn_test.values, y_pred=yn_pred)
ok_nosens, disp_nosens, rates_nosens = fairness_check(df_eval_nosens, "Gender", threshold=0.1)
print("Rates without sensitive features (by Gender):\n", rates_nosens, "\n")
if ok_nosens:
    print("Removing sensitive features reduced disparity to acceptable level.")
else:
    print("Disparity still beyond threshold. Try alternative mitigation.")


-- Mitigation A: Retrain without sensitive features (Gender, Region) --
Accuracy (no sensitive features): 0.5033333333333333
Gender disparity = 0.0483 (threshold 0.1)
Rates without sensitive features (by Gender):
 Gender
Female    0.509804
Male      0.492537
Other     0.461538
Name: y_pred, dtype: float64 

Removing sensitive features reduced disparity to acceptable level.


In [8]:
print("\n-- Mitigation B: Re-weighting training samples by group (Gender) --")
# compute group-wise label distribution in training set
train_idx = X_train.index
train_original = data.loc[train_idx]
group_pos_rate = train_original.groupby("Gender")["Heart_Disease"].mean()
group_counts = train_original["Gender"].value_counts()

# create weights to upweight underrepresented group's positives/negatives — simple heuristic
weights = np.ones(len(train_original))
gender_map = {"Male":0, "Female":1, "Other":2}  # not used directly but nice to have

# target: equalize overall positive rates across genders by scaling each group's samples
target_rate = train_original["Heart_Disease"].mean()
for g in train_original["Gender"].unique():
    g_mask = (train_original["Gender"] == g).values
    # scale weight for this group's samples
    # if group's positive rate < target, upweight group's positive samples
    grp_pos_rate = train_original.loc[train_original["Gender"]==g, "Heart_Disease"].mean()
    # avoid division by zero
    scale = target_rate / (grp_pos_rate + 1e-6)
    # cap scale to avoid exploding weights
    scale = min(scale, 3.0)
    weights[g_mask] = weights[g_mask] * scale

# Fit model with sample weights
model_rw = RandomForestClassifier(n_estimators=100, random_state=42)
model_rw.fit(X_train, y_train, sample_weight=weights)
y_rw_pred = model_rw.predict(X_test)
print("Accuracy (re-weighted):", accuracy_score(y_test, y_rw_pred))
# Evaluate disparity after reweighting
rates_gender_rw, df_eval_gender_rw = evaluate_by_group(X_test, y_test, y_rw_pred, data, "Gender")
ok_rw, disp_rw, rates_gender_rw = fairness_check(df_eval_gender_rw, "Gender", threshold=0.1)
print("Rates after re-weighting (by Gender):\n", rates_gender_rw, "\n")
if ok_rw:
    print("Re-weighting reduced disparity.")
else:
    print("Re-weighting did not reduce disparity sufficiently. Consider more advanced fairness algorithms (e.g., adversarial debiasing, constraints).")



-- Mitigation B: Re-weighting training samples by group (Gender) --
Accuracy (re-weighted): 0.5233333333333333
Gender disparity = 0.1751 (threshold 0.1)
Rates after re-weighting (by Gender):
 Gender
Female    0.503268
Male      0.440299
Other     0.615385
Name: y_pred, dtype: float64 

Re-weighting did not reduce disparity sufficiently. Consider more advanced fairness algorithms (e.g., adversarial debiasing, constraints).


In [9]:
print("\n-- Deployment: Logging & Pseudonymization --")
# Prepare logging CSV
LOGFILE = "prediction_log.csv"
if os.path.exists(LOGFILE):
    os.remove(LOGFILE)
with open(LOGFILE, "w", newline='') as f:
    writer = csv.DictWriter(f, fieldnames=["timestamp","patient_pseudo_id","input_features","prediction","model_version"])
    writer.writeheader()

# Pseudonymize Patient_IDs in the main dataset for deployment
data_deploy = data.copy()
data_deploy["Patient_PseudoID"] = pseudonymize_ids(data_deploy["Patient_ID"])

# Logging function
def predict_and_log_deploy(model, X_row, patient_id_pseudo, model_version="v1"):
    pred = int(model.predict(X_row)[0])
    entry = {
        "timestamp": datetime.datetime.now().isoformat(),
        "patient_pseudo_id": patient_id_pseudo,
        "input_features": X_row.to_dict(orient='records')[0],
        "prediction": pred,
        "model_version": model_version
    }
    # append to CSV
    with open(LOGFILE, "a", newline='') as f:
        writer = csv.DictWriter(f, fieldnames=entry.keys())
        writer.writerow(entry)
    return pred

# simulate a deployment prediction on first test sample
sample_idx = X_test.index[0]
X_row = X_test.loc[[sample_idx]]
pseudo = data_deploy.loc[sample_idx, "Patient_PseudoID"]
pred_sim = predict_and_log_deploy(model, X_row, pseudo, model_version="baseline-v1")
print("Simulated deploy prediction logged for pseudo id:", pseudo, "prediction:", pred_sim)
print("Check", LOGFILE, "for logs.\n")


-- Deployment: Logging & Pseudonymization --
Simulated deploy prediction logged for pseudo id: 8a8b2d66735ed03d0841027e42d38806eedd8e5bd5da54270f958a55d509091f prediction: 1
Check prediction_log.csv for logs.



In [10]:
print("-- Monitoring: Simulate post-deployment monitoring --")
# for simplicity, compute latest batch predictions (use y_pred from baseline) and group rates
latest_preds = y_pred  # from baseline earlier
# compute group positive rates used earlier: rates_gender
dp_diff = disp_gender
if dp_diff > 0.1:
    print("⚠️ Alert: Fairness threshold exceeded post-deployment. Disparity:", disp_gender)
else:
    print("✅ Fairness levels acceptable post-deployment. Disparity:", disp_gender)

-- Monitoring: Simulate post-deployment monitoring --
⚠️ Alert: Fairness threshold exceeded post-deployment. Disparity: 0.23708381171067738


In [11]:
print("\n-- Privacy note --")
print("Patient IDs pseudonymized; mapping stored separately if needed under strict access controls.\n")


-- Privacy note --
Patient IDs pseudonymized; mapping stored separately if needed under strict access controls.



In [12]:
print("-- Inclusivity feedback loop: appending targeted examples for 'Other' gender in Rural region --")
feedback = pd.DataFrame({
    "Patient_ID": [1001, 1002],
    "Age": [55, 60],
    "Gender": ["Other", "Other"],
    "Region": ["Rural", "Rural"],
    "BMI": [26.5, 29.2],
    "Blood_Pressure": [150, 142],
    "Cholesterol": [230, 245],
    "Heart_Disease": [1, 1]
})
data_updated = pd.concat([data, feedback], ignore_index=True)
print("Updated dataset size:", data_updated.shape)
# prepare and retrain quickly on small combined data (demonstration only)
X_up = prepare_features(data_updated, drop_sensitive=False)
y_up = data_updated["Heart_Disease"]
Xu_train, Xu_test, yu_train, yu_test = train_test_split(X_up, y_up, test_size=0.3, random_state=42, stratify=y_up)
model_updated = train_random_forest(Xu_train, yu_train)
print("Retrained model on dataset with feedback — ready to re-evaluate fairness in next CI run.\n")


-- Inclusivity feedback loop: appending targeted examples for 'Other' gender in Rural region --
Updated dataset size: (1002, 8)
Retrained model on dataset with feedback — ready to re-evaluate fairness in next CI run.



In [13]:
print("=== FINAL SUMMARY ===")
print("- Baseline accuracy and fairness measured.")
print("- Fairness gate implemented and triggered for Gender/Region in baseline.")
print("- Feature importance shown; sensitive features had small but non-zero contribution.")
print("- Mitigations tried: remove sensitive features and re-weighting; neither is guaranteed—use advanced fairness methods if required.")
print("- Deployment logging, pseudonymization, monitoring, and inclusivity feedback loop demonstrated.")
print("\nSuggested next steps:")
print("1) Integrate automated fairness checks into CI/CD to block models failing thresholds.")
print("2) Evaluate advanced fairness algorithms (in-processing or post-processing) and threshold calibration.")
print("3) Use explainability (SHAP/LIME) to provide per-prediction explanations in deployment.")
print("4) Protect logs with encryption and access control; keep pseudonym mapping in secure vault.")

=== FINAL SUMMARY ===
- Baseline accuracy and fairness measured.
- Fairness gate implemented and triggered for Gender/Region in baseline.
- Feature importance shown; sensitive features had small but non-zero contribution.
- Mitigations tried: remove sensitive features and re-weighting; neither is guaranteed—use advanced fairness methods if required.
- Deployment logging, pseudonymization, monitoring, and inclusivity feedback loop demonstrated.

Suggested next steps:
1) Integrate automated fairness checks into CI/CD to block models failing thresholds.
2) Evaluate advanced fairness algorithms (in-processing or post-processing) and threshold calibration.
3) Use explainability (SHAP/LIME) to provide per-prediction explanations in deployment.
4) Protect logs with encryption and access control; keep pseudonym mapping in secure vault.
