<a href="https://colab.research.google.com/github/appliedcode/mthree-c422/blob/mthree-c422-Likhitha/Production_Ready_Practice_2_md.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import hashlib
import datetime
import json

np.random.seed(77)

In [None]:
n = 1500
genders = ["Male", "Female", "Other"]
cities = ["Metro", "Urban", "Town"]

data = pd.DataFrame({
    "Driver_ID": range(1, n+1),
    "Age": np.random.randint(21, 65, n),
    "Gender": np.random.choice(genders, n, p=[0.6, 0.35, 0.05]),
    "City_Type": np.random.choice(cities, n, p=[0.4, 0.45, 0.15]),
    "Trips_Completed": np.random.randint(50, 2000, n),
    "Avg_Rating": np.round(np.random.uniform(3.0, 5.0, n), 2),
    "Complaints": np.random.randint(0, 20, n),
    "Accidents": np.random.randint(0, 5, n)
})

# Label generation (logic + small bias for Metro)
data["Flagged_For_Review"] = np.where(
    (data["Complaints"] > 5) | (data["Accidents"] > 0),
    np.random.choice([1, 0], n, p=[0.7, 0.3]),
    np.random.choice([1, 0], n, p=[0.15, 0.85])
)

mask_metro = data["City_Type"] == "Metro"
# set different distribution only for metro rows (avoid broadcasting bug)
data.loc[mask_metro, "Flagged_For_Review"] = np.random.choice([1, 0], mask_metro.sum(), p=[0.4, 0.6])

print("Dataset created. Sample:")
print(data.head(), "\n")

Dataset created. Sample:
   Driver_ID  Age  Gender City_Type  Trips_Completed  Avg_Rating  Complaints  \
0          1   44   Other     Metro              324        3.88          12   
1          2   52    Male     Urban              323        3.96          17   
2          3   41  Female     Urban              448        3.53           4   
3          4   41  Female      Town             1485        4.83          19   
4          5   64    Male     Urban              501        3.66          15   

   Accidents  Flagged_For_Review  
0          3                   1  
1          3                   0  
2          4                   1  
3          1                   0  
4          2                   0   



In [None]:
model_df = data.copy()
driver_ids = model_df["Driver_ID"].copy()  # keep original ids
y = model_df["Flagged_For_Review"]

X = model_df.drop(columns=["Driver_ID", "Flagged_For_Review"])

# One-hot encode categorical variables safely
X = pd.get_dummies(X, columns=["Gender", "City_Type"], drop_first=True)

# Train/test split
X_train, X_test, y_train, y_test, ids_train, ids_test = train_test_split(
    X, y, driver_ids, test_size=0.30, random_state=42, stratify=y
)

print(f"Train size: {X_train.shape}, Test size: {X_test.shape}\n")

Train size: (1050, 9), Test size: (450, 9)



In [None]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)  # will not error: X numeric only

y_pred = model.predict(X_test)
print("Baseline evaluation (classification report):")
print(classification_report(y_test, y_pred, zero_division=0))


Baseline evaluation (classification report):
              precision    recall  f1-score   support

           0       0.54      0.54      0.54       202
           1       0.63      0.62      0.63       248

    accuracy                           0.59       450
   macro avg       0.58      0.58      0.58       450
weighted avg       0.59      0.59      0.59       450



In [None]:
# attach predictions and original sensitive attributes for analysis
test_df = X_test.copy().reset_index(drop=True)
test_df["y_true"] = y_test.reset_index(drop=True)
test_df["y_pred"] = y_pred
# retrieve Gender and City_Type from original data (we encoded drop_first=True)
# We'll reconstruct human-readable groups using the original test ids mapping:
orig_test_rows = data.loc[ids_test.index]  # careful: ids_test is a Series made by train_test_split
# Better approach: make a df mapping index -> id before split
# We'll instead join by index: create mapping index->sensitive features
full = data.reset_index().loc[X_test.index, ["Gender", "City_Type"]].reset_index(drop=True)
test_df["Gender"] = full["Gender"]
test_df["City_Type"] = full["City_Type"]

def positive_rate_by(group_col):
    rates = test_df.groupby(group_col)["y_pred"].mean()
    return rates

print("\nPositive prediction rates by Gender:")
print(positive_rate_by("Gender"))

print("\nPositive prediction rates by City_Type:")
print(positive_rate_by("City_Type"))

# Fairness disparity function
def fairness_gate(df, sensitive_col, threshold=0.08):
    rates = df.groupby(sensitive_col)["y_pred"].mean()
    disparity = rates.max() - rates.min()
    return float(disparity), (disparity <= threshold)

# Check for Gender & City_Type
disp_gender, pass_gender = fairness_gate(test_df, "Gender", threshold=0.08)
disp_city, pass_city = fairness_gate(test_df, "City_Type", threshold=0.08)
print(f"\nGender disparity = {disp_gender:.3f} -> pass={pass_gender}")
print(f"City_Type disparity = {disp_city:.3f} -> pass={pass_city}")

if not (pass_gender and pass_city):
    print("\n⚠️ Fairness gate failed for at least one sensitive attribute. Mitigation required before deployment.\n")
else:
    print("\n✅ Fairness gate passed.\n")



Positive prediction rates by Gender:
Gender
Female    0.537931
Male      0.551237
Other     0.590909
Name: y_pred, dtype: float64

Positive prediction rates by City_Type:
City_Type
Metro    0.212435
Town     0.771930
Urban    0.810000
Name: y_pred, dtype: float64

Gender disparity = 0.053 -> pass=True
City_Type disparity = 0.598 -> pass=False

⚠️ Fairness gate failed for at least one sensitive attribute. Mitigation required before deployment.



In [None]:
feat_imp = pd.Series(model.feature_importances_, index=X_train.columns).sort_values(ascending=False)
print("Top 10 feature importances:")
print(feat_imp.head(10))

# Flag socio-demographic features if they are high ranked
sensitive_features = [c for c in X_train.columns if c.startswith("Gender_") or c.startswith("City_Type_")]
top_k = feat_imp.head(10).index.tolist()
flagged = [f for f in sensitive_features if f in top_k]
if flagged:
    print("\n⚠️ Sensitive features appearing among top importances:", flagged)
    print("Suggested mitigations: drop or mask sensitive features, reweight samples, add fairness-aware training (e.g., re-sampling), or use post-processing adjustments.")
else:
    print("\nSensitive features not in top 10 importances (good).")

Top 10 feature importances:
Avg_Rating         0.221579
Trips_Completed    0.218803
Age                0.188983
Complaints         0.158824
Accidents          0.089914
City_Type_Urban    0.050205
Gender_Male        0.030451
City_Type_Town     0.029744
Gender_Other       0.011497
dtype: float64

⚠️ Sensitive features appearing among top importances: ['Gender_Male', 'Gender_Other', 'City_Type_Town', 'City_Type_Urban']
Suggested mitigations: drop or mask sensitive features, reweight samples, add fairness-aware training (e.g., re-sampling), or use post-processing adjustments.


In [None]:
print("\nEthics user stories:")
print("1) As a product manager, I want the model to avoid unfairly flagging drivers based on city type or gender, so flagged lists are equitable.")
print("2) As an ops engineer, I want prediction logs (including model version) to be stored securely so investigators can audit decisions.\n")


Ethics user stories:
1) As a product manager, I want the model to avoid unfairly flagging drivers based on city type or gender, so flagged lists are equitable.
2) As an ops engineer, I want prediction logs (including model version) to be stored securely so investigators can audit decisions.



In [None]:
prediction_log = []
MODEL_VERSION = "rf_v1.0"

def hash_id(x):
    return hashlib.sha256(str(x).encode()).hexdigest()

def predict_and_log_single(row_features: pd.Series, driver_id: int):
    """
    row_features: pandas Series containing same columns as model X (without Driver_ID)
    driver_id: original numeric ID (will be hashed before storing)
    """
    # Ensure feature vector has same columns as training (in case of missing columns)
    aligned = pd.DataFrame([row_features]).reindex(columns=X_train.columns, fill_value=0)
    pred = int(model.predict(aligned)[0])
    log_entry = {
        "timestamp": datetime.datetime.utcnow().isoformat(),
        "model_version": MODEL_VERSION,
        "driver_id_hash": hash_id(driver_id),
        "input": aligned.iloc[0].to_dict(),
        "prediction": pred
    }
    prediction_log.append(log_entry)
    return pred

# Example: predict and log for first test row
sample_idx = X_test.index[0]
sample_driver_id = data.loc[sample_idx, "Driver_ID"]
sample_features = X_test.loc[sample_idx]
sample_pred = predict_and_log_single(sample_features, sample_driver_id)
print("Logged one prediction. Sample log entry:")
print(json.dumps(prediction_log[-1], indent=2)[:800], "\n")

Logged one prediction. Sample log entry:
{
  "timestamp": "2025-08-13T08:18:09.225868",
  "model_version": "rf_v1.0",
  "driver_id_hash": "480f5a496560ae4228bb7977ecf29b2c589d7a7aa6b609534566af8cbc229a9e",
  "input": {
    "Age": 40,
    "Trips_Completed": 304,
    "Avg_Rating": 4.09,
    "Complaints": 19,
    "Accidents": 0,
    "Gender_Male": true,
    "Gender_Other": false,
    "City_Type_Town": false,
    "City_Type_Urban": false
  },
  "prediction": 1
} 



In [None]:
def monitor_fairness(log_df_entries, threshold=0.08):
    # We will compute positive rates by pulling predictions and mapping back to gender/city type via hashed id mapping.
    # For this simulation we can use test_df (which has true mapping).
    rates_gender = test_df.groupby("Gender")["y_pred"].mean()
    rates_city = test_df.groupby("City_Type")["y_pred"].mean()
    disp_g = rates_gender.max() - rates_gender.min()
    disp_c = rates_city.max() - rates_city.min()
    alerts = []
    if disp_g > threshold:
        alerts.append(f"Gender disparity {disp_g:.3f} exceeds threshold {threshold}.")
    if disp_c > threshold:
        alerts.append(f"City_Type disparity {disp_c:.3f} exceeds threshold {threshold}.")
    return alerts, disp_g, disp_c

alerts, d_g, d_c = monitor_fairness(prediction_log)
if alerts:
    print("Monitoring Alerts:")
    for a in alerts:
        print(" -", a)
else:
    print("Monitoring: no alerts.")

print(f"\nCurrent (simulated) gender disparity: {d_g:.3f}")
print(f"Current (simulated) city disparity: {d_c:.3f}\n")

Monitoring Alerts:
 - City_Type disparity 0.598 exceeds threshold 0.08.

Current (simulated) gender disparity: 0.053
Current (simulated) city disparity: 0.598



In [None]:
print("Simulating feedback from underrepresented group 'Other' gender in Town city...")

# Create 10 synthetic feedback examples representing underrepresented drivers
feedback_examples = pd.DataFrame({
    "Driver_ID": range(n+1, n+11),
    "Age": np.random.randint(21, 65, 10),
    "Gender": ["Other"]*10,
    "City_Type": ["Town"]*10,
    "Trips_Completed": np.random.randint(50, 500, 10),
    "Avg_Rating": np.round(np.random.uniform(3.0, 5.0, 10), 2),
    "Complaints": np.random.randint(0, 5, 10),
    "Accidents": np.random.randint(0, 2, 10),
    "Flagged_For_Review": [0]*10  # assume feedback says they should not be flagged
})

# Append to original data and re-prepare features
data_aug = pd.concat([data, feedback_examples], ignore_index=True)
print("New dataset size (after feedback):", data_aug.shape)

# Re-prepare X, y with same encoding pipeline (get_dummies) and align columns with previous training
X_aug = data_aug.drop(columns=["Driver_ID", "Flagged_For_Review"])
X_aug = pd.get_dummies(X_aug, columns=["Gender", "City_Type"], drop_first=True)
# Ensure same columns as original X_train
X_aug = X_aug.reindex(columns=X_train.columns, fill_value=0)
y_aug = data_aug["Flagged_For_Review"]

# Retrain on augmented data (quick retrain demonstration)
model2 = RandomForestClassifier(n_estimators=100, random_state=0)
model2.fit(X_aug, y_aug)
print("Retrained model on augmented dataset. (Demonstration only)\n")

Simulating feedback from underrepresented group 'Other' gender in Town city...
New dataset size (after feedback): (1510, 9)
Retrained model on augmented dataset. (Demonstration only)



In [None]:
incident_plan = {
    "detection": "Automated monitoring flagged unusually high flag-rate for 'Town' drivers.",
    "investigation": "Collect logs, run SHAP/LIME explanations for representative flagged drivers, review recent data changes and model version.",
    "communication": "Notify stakeholders, publish transparency report with summary statistics, inform affected driver support team.",
    "remediation": "Temporarily disable automated flagging for group, retrain model with inclusive examples, deploy corrected model, and monitor metrics."
}
print("Mock Incident Response Plan:")
print(json.dumps(incident_plan, indent=2))

Mock Incident Response Plan:
{
  "detection": "Automated monitoring flagged unusually high flag-rate for 'Town' drivers.",
  "investigation": "Collect logs, run SHAP/LIME explanations for representative flagged drivers, review recent data changes and model version.",
  "communication": "Notify stakeholders, publish transparency report with summary statistics, inform affected driver support team.",
  "remediation": "Temporarily disable automated flagging for group, retrain model with inclusive examples, deploy corrected model, and monitor metrics."
}


In [None]:
print("\n--- Summary ---")
print("Model version:", MODEL_VERSION)
print("Number of logged predictions:", len(prediction_log))
print("Fairness checks: gender disparity=%.3f, city disparity=%.3f" % (d_g, d_c))
print("\nYou can inspect 'prediction_log' list variable for stored log entries (with hashed driver IDs).")


--- Summary ---
Model version: rf_v1.0
Number of logged predictions: 1
Fairness checks: gender disparity=0.053, city disparity=0.598

You can inspect 'prediction_log' list variable for stored log entries (with hashed driver IDs).
