In [8]:
!pip install kagglehub pandas scikit-learn fairlearn aif360 --quiet

In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from datetime import datetime

# Load dataset
df = pd.read_csv(r"C:\Users\Arhamsoft\Desktop\Talha Talib Thesis\compas-scores-raw.csv")
print("Original shape:", df.shape)

# ✅ Filtering
df = df[
    (df['IsCompleted'] == 1) &
    (df['ScoreText'].isin(['Low', 'Medium', 'High']))
]

# 🎯 Target variable
df['two_year_recid'] = df['ScoreText'].map({'Low': 0, 'Medium': 1, 'High': 1})

# ✅ Filter to African-American and Caucasian only
df = df[df['Ethnic_Code_Text'].isin(['African-American', 'Caucasian'])]
df['race_binary'] = df['Ethnic_Code_Text'].map({'Caucasian': 0, 'African-American': 1})
df['sex_binary'] = df['Sex_Code_Text'].map({'Male': 1, 'Female': 0})

# 🎂 Derive age
df['DateOfBirth'] = pd.to_datetime(df['DateOfBirth'], errors='coerce')
df['Screening_Date'] = pd.to_datetime(df['Screening_Date'], errors='coerce')
df['age'] = (df['Screening_Date'] - df['DateOfBirth']).dt.days // 365

# Drop rows with missing values in key fields
df = df.dropna(subset=['age', 'LegalStatus', 'CustodyStatus', 'RecSupervisionLevel'])

# One-hot encode categorical variables
df = pd.get_dummies(df, columns=['LegalStatus', 'CustodyStatus', 'RecSupervisionLevel'], drop_first=True)

# Select features
features = ['sex_binary', 'age'] + [col for col in df.columns if col.startswith('LegalStatus_') or col.startswith('CustodyStatus_') or col.startswith('RecSupervisionLevel_')]
X = df[features]
y = df['two_year_recid']

# Normalize features
scaler = MinMaxScaler()
X_scaled = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42, stratify=y)

# Train Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Predict
y_pred_rf = rf_model.predict(X_test)

# Evaluate
acc = accuracy_score(y_test, y_pred_rf)
prec = precision_score(y_test, y_pred_rf)
rec = recall_score(y_test, y_pred_rf)
f1 = f1_score(y_test, y_pred_rf)

print("Random Forest Performance on COMPAS:")
print(f"Accuracy : {acc:.4f}")
print(f"Precision: {prec:.4f}")
print(f"Recall   : {rec:.4f}")
print(f"F1 Score : {f1:.4f}")


Original shape: (60843, 28)


  df['DateOfBirth'] = pd.to_datetime(df['DateOfBirth'], errors='coerce')
  df['Screening_Date'] = pd.to_datetime(df['Screening_Date'], errors='coerce')


Random Forest Performance on COMPAS:
Accuracy : 0.7929
Precision: 0.6849
Recall   : 0.7477
F1 Score : 0.7150


In [12]:
from aif360.datasets import BinaryLabelDataset
from aif360.algorithms.preprocessing import Reweighing
from fairlearn.metrics import demographic_parity_difference, equalized_odds_difference, selection_rate, MetricFrame

# Define protected attribute
protected = df['race_binary']

# Align protected attribute using label-based indexing
protected_train = protected.loc[y_train.index].reset_index(drop=True)
protected_test = protected.loc[y_test.index].reset_index(drop=True)

# Reset index
X_train = X_train.reset_index(drop=True)
X_test = X_test.reset_index(drop=True)
y_train = y_train.reset_index(drop=True)
y_test = y_test.reset_index(drop=True)

# Combine into DataFrames
Xy_train = X_train.copy()
Xy_train['target'] = y_train
Xy_train['race'] = protected_train

Xy_test = X_test.copy()
Xy_test['target'] = y_test
Xy_test['race'] = protected_test

# Convert to BinaryLabelDataset
train_bld = BinaryLabelDataset(df=Xy_train, label_names=['target'], protected_attribute_names=['race'], favorable_label=0, unfavorable_label=1)
test_bld = BinaryLabelDataset(df=Xy_test, label_names=['target'], protected_attribute_names=['race'], favorable_label=0, unfavorable_label=1)

# Apply Reweighing
rw = Reweighing(unprivileged_groups=[{'race': 0}], privileged_groups=[{'race': 1}])
rw.fit(train_bld)
train_rw = rw.transform(train_bld)

# Train Random Forest with reweighing
rf_rw = RandomForestClassifier(n_estimators=100, random_state=42)
rf_rw.fit(X_train, y_train, sample_weight=train_rw.instance_weights)

# Predict
y_pred_rw = rf_rw.predict(X_test)

# Evaluate
acc = accuracy_score(y_test, y_pred_rw)
prec = precision_score(y_test, y_pred_rw)
rec = recall_score(y_test, y_pred_rw)
f1 = f1_score(y_test, y_pred_rw)

print("Reweighed Random Forest Performance:")
print(f"Accuracy : {acc:.4f}")
print(f"Precision: {prec:.4f}")
print(f"Recall   : {rec:.4f}")
print(f"F1 Score : {f1:.4f}")

# Fairness Metrics
fair_metrics = MetricFrame(
    metrics={
        "accuracy": accuracy_score,
        "precision": precision_score,
        "recall": recall_score,
        "f1": f1_score,
        "selection_rate": selection_rate
    },
    y_true=y_test,
    y_pred=y_pred_rw,
    sensitive_features=protected_test
)

spd = demographic_parity_difference(y_test, y_pred_rw, sensitive_features=protected_test)
eod = equalized_odds_difference(y_test, y_pred_rw, sensitive_features=protected_test)
di_ratio = fair_metrics.by_group['selection_rate'].max() / fair_metrics.by_group['selection_rate'].min()

print("\nFairness Metrics (Reweighing):")
print(f"Statistical Parity Difference : {spd:.4f}")
print(f"Equal Opportunity Difference : {eod:.4f}")
print(f"Disparate Impact Ratio       : {di_ratio:.4f}")

Reweighed Random Forest Performance:
Accuracy : 0.7904
Precision: 0.6881
Recall   : 0.7257
F1 Score : 0.7064

Fairness Metrics (Reweighing):
Statistical Parity Difference : 0.2502
Equal Opportunity Difference : 0.1760
Disparate Impact Ratio       : 2.0942


In [15]:
from aif360.datasets import BinaryLabelDataset
from aif360.algorithms.postprocessing import EqOddsPostprocessing
from sklearn.calibration import CalibratedClassifierCV
from fairlearn.metrics import MetricFrame, demographic_parity_difference, equalized_odds_difference, selection_rate
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Split X, y, and protected attribute together to preserve alignment
protected = df['race_binary']
X_train, X_test, y_train, y_test, prot_train, prot_test = train_test_split(
    X_scaled, y, protected, test_size=0.3, random_state=42, stratify=y
)

# Reset indices
X_train = X_train.reset_index(drop=True)
X_test = X_test.reset_index(drop=True)
y_train = y_train.reset_index(drop=True)
y_test = y_test.reset_index(drop=True)
prot_train = prot_train.reset_index(drop=True)
prot_test = prot_test.reset_index(drop=True)

# Wrap test data for aif360
test_df = X_test.copy()
test_df['two_year_recid'] = y_test
test_df['race'] = prot_test

# Predict probabilities using calibrated classifier
calibrated_rf = CalibratedClassifierCV(estimator=rf_model, method='sigmoid', cv='prefit')
calibrated_rf.fit(X_train, y_train)
y_probs = calibrated_rf.predict_proba(X_test)[:, 1]

# Create BinaryLabelDataset with scores
test_bld = BinaryLabelDataset(
    df=test_df,
    label_names=['two_year_recid'],
    protected_attribute_names=['race'],
    favorable_label=0,
    unfavorable_label=1
)
test_bld_pred = test_bld.copy()
test_bld_pred.scores = y_probs.reshape(-1, 1)

# Apply Equalized Odds Postprocessing
eopp = EqOddsPostprocessing(unprivileged_groups=[{'race': 0}], privileged_groups=[{'race': 1}])
eopp = eopp.fit(test_bld, test_bld_pred)
test_eq = eopp.predict(test_bld_pred)
y_pred_eq = test_eq.labels.ravel()

# Evaluate performance
acc = accuracy_score(y_test, y_pred_eq)
prec = precision_score(y_test, y_pred_eq)
rec = recall_score(y_test, y_pred_eq)
f1 = f1_score(y_test, y_pred_eq)

print("Equalized Odds Postprocessing Random Forest Performance:")
print(f"Accuracy : {acc:.4f}")
print(f"Precision: {prec:.4f}")
print(f"Recall   : {rec:.4f}")
print(f"F1 Score : {f1:.4f}")

# Fairness metrics
fair_metrics = MetricFrame(
    metrics={"accuracy": accuracy_score, "precision": precision_score, "recall": recall_score,
             "f1": f1_score, "selection_rate": selection_rate},
    y_true=y_test,
    y_pred=y_pred_eq,
    sensitive_features=prot_test
)
spd = demographic_parity_difference(y_test, y_pred_eq, sensitive_features=prot_test)
eod = equalized_odds_difference(y_test, y_pred_eq, sensitive_features=prot_test)
di_ratio = fair_metrics.by_group['selection_rate'].max() / fair_metrics.by_group['selection_rate'].min()

print("\nFairness Metrics (Equalized Odds Postprocessing):")
print(f"Statistical Parity Difference : {spd:.4f}")
print(f"Equal Opportunity Difference : {eod:.4f}")
print(f"Disparate Impact Ratio       : {di_ratio:.4f}")




Equalized Odds Postprocessing Random Forest Performance:
Accuracy : 1.0000
Precision: 1.0000
Recall   : 1.0000
F1 Score : 1.0000

Fairness Metrics (Equalized Odds Postprocessing):
Statistical Parity Difference : 0.1784
Equal Opportunity Difference : 0.0000
Disparate Impact Ratio       : 1.7159
