In [1]:
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
import pandas as pd
import numpy as np
from tqdm import tqdm
import optuna
optuna.logging.set_verbosity(optuna.logging.WARNING)
import sys


sys.path.append('../scripts')
import data
import models
import utils

%load_ext autoreload
%autoreload 2

In [23]:
param_space_fair = {
    "min_child_weight" : {"type" : "float", "low" : 0.01, "high" : 100, "log" : True},
    "n_estimators" : {"type" : "int", "low" : 10, "high" : 100},
    "eta" : {"type" : "float", "low" : 0.1, "high" : 0.5},
    "max_depth" : {"type" : "int", "low" : 2, "high" : 10},
    "l2_weight" : {"type" : "float", "low" : 0.001, "high" : 1000, "log" : True},
    "fair_weight" : {"type" : "float", "low" : 0.01, "high" : 10, "log" : True}
}
param_space_xgb = param_space_fair.copy()
param_space_xgb["fair_weight"] = {"type" : "float", "low" : 0, "high" : 0}

In [11]:
def run_trial(
        trial,
        X_train,
        Y_train,
        A_train,
        X_val,
        Y_val,
        model_class,
        param_space,
        random_state = None
):  
    params = {}
    for name, values in param_space.items():
        if values["type"] == "int":
            values_cp = {n: v for n, v in values.items() if n != "type"}
            params[name] = trial.suggest_int(name, **values_cp)
        elif values["type"] == "categorical":
            values_cp = {n: v for n, v in values.items() if n != "type"}
            params[name] = trial.suggest_categorical(name, **values_cp)
        elif values["type"] == "float":  # corrected this line
            values_cp = {n: v for n, v in values.items() if n != "type"}
            params[name] = trial.suggest_float(name, **values_cp)

    params["seed"] = random_state
    model = model_class(**params)
    model.fit(X_train, Y_train, A_train)
    return model.score(X_val, Y_val)

In [25]:
results = []
cat_features = [
    "CheckingAccount",
    "CreditHistory",
    "Purpose",
    "SavingsAccount",
    "EmploymentSince",
    "Gender",
    "OtherDebtors",
    "Property",
    "OtherInstallmentPlans",
    "Housing",
    "Job",
    "Telephone",
    "ForeignWorker",
]
num_features = [
    'Age',
    'CreditAmount',
    'Dependents',
    'Duration',
    'ExistingCredits',
    'InstallmentRate',
    'ResidenceSince'
]
col_trans = ColumnTransformer(
    [
    ("numeric", StandardScaler(), num_features),
    ("categorical", OneHotEncoder(drop="if_binary", sparse_output=False, handle_unknown="ignore"), cat_features)
    ], 
    verbose_feature_names_out=False,
)
col_trans.set_output(transform="pandas")


for i in tqdm(range(10)):
    # Load and prepare data
    X_train, Y_train, X_val, Y_val, X_test, Y_test = data.get_fold("german2", i, 0)
    
    num_features = X_train.columns.difference(cat_features).tolist()
    # Define sensitive attribute from gender and age
    A_train = X_train.Gender + "_" + (X_train.Age > 50).astype(str)
    A_val = X_val.Gender + "_" + (X_val.Age > 50).astype(str)
    A_test = X_test.Gender + "_" + (X_test.Age > 50).astype(str)
    # create dict of sensitive attribute names and values
    sensitive_map = dict([
        (attr, i)
        for i, attr in enumerate(A_train.unique())
    ])
    A_train = A_train.map(sensitive_map)
    A_val = A_val.map(sensitive_map)
    A_test = A_test.map(sensitive_map)

    preprocess = Pipeline([
      ("preprocess", col_trans)
    ])
    preprocess.fit(X_train)
    X_train_preprocessed = preprocess.transform(X_train)
    X_val_preprocessed = preprocess.transform(X_val)
    X_test_preprocessed = preprocess.transform(X_test)

    study = optuna.create_study(direction="maximize")
    objective = lambda trial : run_trial(trial, X_train_preprocessed, Y_train, A_train, X_val_preprocessed, Y_val, models.XtremeFair, param_space_fair)
    study.optimize(objective, n_trials=100)

    print(study.best_params)
    model = models.XtremeFair(**study.best_params)
    model.fit(X_train_preprocessed, Y_train, A_train)
    roc_fair = model.score(X_test_preprocessed, Y_test)
    loss_fair = utils.equalized_loss_score(Y_test, model.predict_proba(X_test_preprocessed)[:, 1], A_test)
    eop_fair = utils.equal_opportunity_score(Y_test, model.predict(X_test_preprocessed), A_test)


    study = optuna.create_study(direction="maximize")
    objective = lambda trial : run_trial(trial, X_train_preprocessed, Y_train, A_train, X_val_preprocessed, Y_val, models.XtremeFair, param_space_xgb)
    study.optimize(objective, n_trials=100)

    model = models.XtremeFair(**study.best_params)
    model.fit(X_train_preprocessed, Y_train)
    roc_xgb = model.score(X_test_preprocessed, Y_test)
    loss_xgb = utils.equalized_loss_score(Y_test, model.predict_proba(X_test_preprocessed)[:, 1], A_test)
    eop_xgb = utils.equal_opportunity_score(Y_test, model.predict(X_test_preprocessed), A_test)
    results.append([i, roc_fair, loss_fair, eop_fair, roc_xgb, loss_xgb, eop_xgb])

results = pd.DataFrame(results, columns=["Fold", "ROC Fair", "Loss Fair", "EOP Fair", "ROC XGB", "Loss XGB", "EOP XGB"])

  0%|          | 0/10 [00:00<?, ?it/s]

{'min_child_weight': 0.014823084306325065, 'n_estimators': 84, 'eta': 0.3115799331349146, 'max_depth': 10, 'l2_weight': 0.0016077409311701905, 'fair_weight': 0.06193002739744062}


 10%|█         | 1/10 [00:51<07:44, 51.65s/it]

{'min_child_weight': 0.01923484645141687, 'n_estimators': 32, 'eta': 0.35080556040733, 'max_depth': 8, 'l2_weight': 240.93481057257003, 'fair_weight': 0.11083869922125109}


 20%|██        | 2/10 [01:23<05:21, 40.20s/it]

{'min_child_weight': 0.8243609961356084, 'n_estimators': 46, 'eta': 0.4201642155551818, 'max_depth': 10, 'l2_weight': 137.39977464825606, 'fair_weight': 1.3783340603874408}


 30%|███       | 3/10 [01:52<04:02, 34.71s/it]

{'min_child_weight': 0.1132862182539291, 'n_estimators': 63, 'eta': 0.24060245744204573, 'max_depth': 2, 'l2_weight': 1.7319367521309919, 'fair_weight': 0.11458373820856957}


 40%|████      | 4/10 [02:17<03:05, 30.94s/it]

{'min_child_weight': 2.3129626469943227, 'n_estimators': 52, 'eta': 0.19827032599848257, 'max_depth': 7, 'l2_weight': 0.003456879838933227, 'fair_weight': 0.48497376255171826}


 50%|█████     | 5/10 [02:44<02:27, 29.49s/it]

{'min_child_weight': 0.01121649855969746, 'n_estimators': 92, 'eta': 0.17910332970752632, 'max_depth': 6, 'l2_weight': 54.13933905478327, 'fair_weight': 0.8943145128364184}


 60%|██████    | 6/10 [03:22<02:09, 32.46s/it]

{'min_child_weight': 1.9972757218021935, 'n_estimators': 30, 'eta': 0.22330534870232194, 'max_depth': 5, 'l2_weight': 583.1252471625709, 'fair_weight': 0.5600514232979784}


 70%|███████   | 7/10 [03:45<01:28, 29.43s/it]

{'min_child_weight': 0.11891149158981625, 'n_estimators': 31, 'eta': 0.10012818975944325, 'max_depth': 7, 'l2_weight': 229.7181515353769, 'fair_weight': 3.7073852896972745}


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 80%|████████  | 8/10 [04:10<00:55, 27.92s/it]

{'min_child_weight': 0.39201636511804194, 'n_estimators': 81, 'eta': 0.19169280694125002, 'max_depth': 6, 'l2_weight': 16.188491965847252, 'fair_weight': 0.025463192879419727}


 90%|█████████ | 9/10 [04:49<00:31, 31.39s/it]

{'min_child_weight': 3.560840116729864, 'n_estimators': 90, 'eta': 0.35133091540745387, 'max_depth': 10, 'l2_weight': 358.8032350865685, 'fair_weight': 0.018531350639609392}


100%|██████████| 10/10 [05:21<00:00, 32.20s/it]


In [None]:
results

Unnamed: 0,Fold,ROC Fair,Loss Fair,EOP Fair
0,0,0.73,0.741306,0.315789
1,1,0.71,1.158094,0.184211
2,2,0.82,0.393015,0.12
3,3,0.68,0.134085,0.153846
4,4,0.72,1.065163,0.125
5,5,0.68,0.511463,0.178571
6,6,0.8,0.247018,0.125
7,7,0.68,0.758559,0.263158
8,8,0.57,0.56285,0.384615
9,9,0.75,0.231534,0.190476


In [26]:
results.mean(), results.std()

(Fold         4.500000
 ROC Fair     0.747000
 Loss Fair    0.390157
 EOP Fair     0.172552
 ROC XGB      0.734000
 Loss XGB     0.481428
 EOP XGB      0.296874
 dtype: float64,
 Fold         3.027650
 ROC Fair     0.032335
 Loss Fair    0.364987
 EOP Fair     0.097983
 ROC XGB      0.051683
 Loss XGB     0.250222
 EOP XGB      0.253438
 dtype: float64)

In [10]:
model = models.XtremeFair(colsample_bytree=0.5)
model.fit(X_train_preprocessed, Y_train)

what
(800, 57)
what
(800, 57)
what
(800, 57)
what
(800, 57)
what
(800, 57)
what
(800, 57)
what
(800, 57)
what
(800, 57)
what
(800, 57)
what
(800, 57)
