In [51]:
# Data Processing
import pandas as pd

# Modelling
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, ConfusionMatrixDisplay
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from aif360.algorithms.preprocessing import Reweighing
from aif360.datasets import BinaryLabelDataset
from aif360.metrics import BinaryLabelDatasetMetric


In [10]:
%pip install aif360[inFairness]

Collecting skorch (from aif360[inFairness])
  Downloading skorch-1.1.0-py3-none-any.whl.metadata (11 kB)
Collecting inFairness>=0.2.2 (from aif360[inFairness])
  Downloading inFairness-0.2.3-py3-none-any.whl.metadata (8.1 kB)
Collecting POT>=0.8.0 (from inFairness>=0.2.2->aif360[inFairness])
  Downloading pot-0.9.5.tar.gz (440 kB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hINFO: pip is looking at multiple versions of infairness to determine which version is compatible with other requirements. This could take a while.
Collecting inFairness>=0.2.2 (from aif360[inFairness])
  Downloading inFairness-0.2.2-py3-none-any.whl.metadata (7.9 kB)
Collecting aif360[inFairness]
  Using cached aif360-0.6.1-py3-none-any.whl.metadata (5.0 kB)
  Downloading aif360-0.6.0-py3-none-any.whl.metadata (4.7 kB)
  Downloading aif360-0.5.0-py3-none-any.whl.metadata (3.8 kB)
Downloading

In [62]:

# Load the dataset
df = pd.read_csv("compas-scores-two-years.csv", index_col=0)
y = y = df['two_year_recid']
keep_cols = [
    'age',             # defendant age
    'sex',             # male/female
    'race',            # categorical
    'priors_count',    # number of prior offenses
    'c_charge_degree'  # felony vs misdemeanor
]

X_safe = df[keep_cols].copy()

# One‐hot encode the categoricals
X= pd.get_dummies(X_safe, drop_first=True)



In [73]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

train_df = pd.concat([
    X_train.reset_index(drop=True), 
    y_train.rename("two_year_recid").reset_index(drop=True),
    df.loc[X_train.index, "race"].rename("race").reset_index(drop=True)
], axis=1)

# after you build train_df …
# Handle missing values before mapping and converting to integer
train_df['race_code'] = (
    train_df['race']
      .map({'African-American': 0, 'Caucasian': 1})
      .fillna(-1)  # Replace NaN values with a default value (e.g., -1)
      .astype(int)
)
# now drop the original string column
train_df = train_df.drop(columns=['race'])


dataset_orig_train = BinaryLabelDataset(
    df=train_df,
    label_names=['two_year_recid'],
    protected_attribute_names=['race_code'],    # numeric now
    favorable_label=1,
    unfavorable_label=0
)

privileged_groups   = [{"race_code": "Caucasian"}]
unprivileged_groups = [{"race_code": "African-American"}]

rw = Reweighing(
    privileged_groups=privileged_groups,
    unprivileged_groups=unprivileged_groups
)
rw.fit(dataset_orig_train)
dataset_transf_train = rw.transform(dataset_orig_train)

X_tr = dataset_transf_train.features
y_tr = dataset_transf_train.labels.ravel().astype(int)
w_tr = dataset_transf_train.instance_weights

from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(random_state=42)
clf.fit(X_tr, y_tr, sample_weight=w_tr)

# Align the features of X_test with X_tr
X_test_aligned = X_test.reindex(columns=dataset_transf_train.feature_names, fill_value=0)

# Make predictions
y_pred = clf.predict(X_test_aligned)
from sklearn.metrics import accuracy_score, precision_score, recall_score
print("Post-Reweighing Accuracy:",  accuracy_score(y_test,  y_pred))
print("Post-Reweighing Precision:", precision_score(y_test, y_pred))
print("Post-Reweighing Recall:",    recall_score(y_test,    y_pred))


  self.w_p_fav = n_fav*n_p / (n*n_p_fav)
  self.w_p_unfav = n_unfav*n_p / (n*n_p_unfav)
  self.w_up_fav = n_fav*n_up / (n*n_up_fav)
  self.w_up_unfav = n_unfav*n_up / (n*n_up_unfav)


Post-Reweighing Accuracy: 0.8345626975763962
Post-Reweighing Precision: 0.5393258426966292
Post-Reweighing Recall: 0.2926829268292683




In [74]:
# rebuild your results DataFrame as before:
test_df = X_test.copy()
test_df['race_code'] = df.loc[X_test.index,'race'].map({'African-American':0,'Caucasian':1})
y_pred = clf.predict(test_df)

results = pd.DataFrame({
    'true': y_test,
    'pred': y_pred,
    'race_code': test_df['race_code']
})

# compute FPR/FNR by race_code
def group_stats(g):
    tp = ((g.true==1)&(g.pred==1)).sum()
    tn = ((g.true==0)&(g.pred==0)).sum()
    fp = ((g.true==0)&(g.pred==1)).sum()
    fn = ((g.true==1)&(g.pred==0)).sum()
    return {
        'FPR': fp/(fp+tn) if (fp+tn)>0 else 0,
        'FNR': fn/(fn+tp) if (fn+tp)>0 else 0
    }

print("Post-RW FPR/FNR by Race:")
print(results.groupby('race_code').apply(lambda g: pd.Series(group_stats(g))))




Post-RW FPR/FNR by Race:
                FPR       FNR
race_code                    
0.0        0.082192  0.659341
1.0        0.053571  0.787234


  print(results.groupby('race_code').apply(lambda g: pd.Series(group_stats(g))))
