In [6]:
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
import pandas as pd
import numpy as np
from tqdm import tqdm
import optuna
optuna.logging.set_verbosity(optuna.logging.WARNING)
import sys


sys.path.append('../scripts')
import data
import models
import utils

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [18]:
param_space_fair = {
    "min_child_weight" : {"type" : "float", "low" : 0.01, "high" : 100, "log" : True},
    "n_estimators" : {"type" : "int", "low" : 10, "high" : 100},
    "eta" : {"type" : "float", "low" : 0.1, "high" : 0.5},
    "max_depth" : {"type" : "int", "low" : 2, "high" : 10},
    "l2_weight" : {"type" : "float", "low" : 0.001, "high" : 1000, "log" : True},
    "fair_weight" : {"type" : "float", "low" : 0.01, "high" : 10, "log" : True}
}
param_space_xgb = param_space_fair.copy()
param_space_xgb["fair_weight"] = {"type" : "float", "low" : 0, "high" : 0}

In [19]:
def run_trial(
        trial,
        X_train,
        Y_train,
        X_val,
        Y_val,
        model_class,
        param_space,
        random_state = None
):  
    params = {}
    for name, values in param_space.items():
        if values["type"] == "int":
            values_cp = {n: v for n, v in values.items() if n != "type"}
            params[name] = trial.suggest_int(name, **values_cp)
        elif values["type"] == "categorical":
            values_cp = {n: v for n, v in values.items() if n != "type"}
            params[name] = trial.suggest_categorical(name, **values_cp)
        elif values["type"] == "float":  # corrected this line
            values_cp = {n: v for n, v in values.items() if n != "type"}
            params[name] = trial.suggest_float(name, **values_cp)

    params["seed"] = random_state
    model = model_class(**params)
    model.fit(X_train, Y_train)
    return model.score(X_val, Y_val)

In [20]:
results = []
cat_features = [
    "CheckingAccount",
    "CreditHistory",
    "Purpose",
    "SavingsAccount",
    "EmploymentSince",
    "Gender",
    "OtherDebtors",
    "Property",
    "OtherInstallmentPlans",
    "Housing",
    "Job",
    "Telephone",
    "ForeignWorker",
]
num_features = [
    'Age',
    'CreditAmount',
    'Dependents',
    'Duration',
    'ExistingCredits',
    'InstallmentRate',
    'ResidenceSince'
]
col_trans = ColumnTransformer(
    [
    ("numeric", StandardScaler(), num_features),
    ("categorical", OneHotEncoder(drop="if_binary", sparse_output=False, handle_unknown="ignore"), cat_features)
    ], 
    verbose_feature_names_out=False,
)
col_trans.set_output(transform="pandas")


for i in tqdm(range(10)):
    # Load and prepare data
    X_train, Y_train, X_val, Y_val, X_test, Y_test = data.get_fold("german2", i, 0)
    
    num_features = X_train.columns.difference(cat_features).tolist()
    # Define sensitive attribute from gender and age
    A_train = X_train.Gender + "_" + (X_train.Age > 50).astype(str)
    A_val = X_val.Gender + "_" + (X_val.Age > 50).astype(str)
    A_test = X_test.Gender + "_" + (X_test.Age > 50).astype(str)
    # create dict of sensitive attribute names and values
    sensitive_map = dict([
        (attr, i)
        for i, attr in enumerate(A_train.unique())
    ])
    A_train = A_train.map(sensitive_map)
    A_val = A_val.map(sensitive_map)
    A_test = A_test.map(sensitive_map)

    preprocess = Pipeline([
      ("preprocess", col_trans)
    ])
    preprocess.fit(X_train)
    X_train_preprocessed = preprocess.transform(X_train)
    X_val_preprocessed = preprocess.transform(X_val)
    X_test_preprocessed = preprocess.transform(X_test)
    X_train_preprocessed.insert(0, "Sensitive", A_train)
    X_val_preprocessed.insert(0, "Sensitive", A_val)
    X_test_preprocessed.insert(0, "Sensitive", A_test)

    study = optuna.create_study(direction="maximize")
    objective = lambda trial : run_trial(trial, X_train_preprocessed, Y_train, X_val_preprocessed, Y_val, models.XtremeFair, param_space_fair)
    study.optimize(objective, n_trials=100)

    model = models.XtremeFair(**study.best_params)
    model.fit(X_train_preprocessed, Y_train)
    roc_fair = model.score(X_test_preprocessed, Y_test)
    loss_fair = utils.equalized_loss_score(Y_test, model.predict_proba(X_test_preprocessed)[:, 1], A_test)
    eop_fair = utils.equal_opportunity_score(Y_test, model.predict(X_test_preprocessed), A_test)


    study = optuna.create_study(direction="maximize")
    objective = lambda trial : run_trial(trial, X_train_preprocessed, Y_train, X_val_preprocessed, Y_val, models.XtremeFair, param_space_xgb)
    study.optimize(objective, n_trials=100)

    model = models.XtremeFair(**study.best_params)
    model.fit(X_train_preprocessed, Y_train)
    roc_xgb = model.score(X_test_preprocessed, Y_test)
    loss_xgb = utils.equalized_loss_score(Y_test, model.predict_proba(X_test_preprocessed)[:, 1], A_test)
    eop_xgb = utils.equal_opportunity_score(Y_test, model.predict(X_test_preprocessed), A_test)
    results.append([i, roc_fair, loss_fair, eop_fair, roc_xgb, loss_xgb, eop_xgb])

results = pd.DataFrame(results, columns=["Fold", "ROC Fair", "Loss Fair", "EOP Fair", "ROC XGB", "Loss XGB", "EOP XGB"])

  0%|          | 0/10 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
100%|██████████| 10/10 [10:00<00:00, 60.10s/it]


In [24]:
results

Unnamed: 0,Fold,ROC Fair,Loss Fair,EOP Fair,ROC XGB,Loss XGB,EOP XGB
0,0,0.66,0.217202,0.315789,0.71,0.966164,0.122807
1,1,0.77,0.952611,0.131579,0.74,0.542583,0.210526
2,2,0.8,0.446129,0.058824,0.75,0.551169,0.235294
3,3,0.77,0.18466,0.142857,0.71,0.231417,0.285714
4,4,0.8,0.755911,0.145833,0.75,0.52161,0.145833
5,5,0.78,0.292255,0.25,0.8,0.475488,0.132353
6,6,0.76,0.166205,0.083333,0.68,0.325211,0.25
7,7,0.72,0.309327,0.179592,0.74,0.3657,0.179592
8,8,0.75,0.347658,0.5,0.74,0.206123,0.230769
9,9,0.8,0.333854,0.142857,0.78,0.400378,0.142857


In [23]:
results.mean(), results.std()

(Fold         4.500000
 ROC Fair     0.761000
 Loss Fair    0.400581
 EOP Fair     0.195066
 ROC XGB      0.740000
 Loss XGB     0.458584
 EOP XGB      0.193575
 dtype: float64,
 Fold         3.027650
 ROC Fair     0.043576
 Loss Fair    0.257243
 EOP Fair     0.130711
 ROC XGB      0.034641
 Loss XGB     0.216710
 EOP XGB      0.056655
 dtype: float64)