# CV Routine

In [1]:
%load_ext autoreload
%autoreload 2

In [3]:
%matplotlib inline
# Load all necessary packages
import sys

import pandas as pd

sys.path.append("../")
import matplotlib.pyplot as plt
import tensorflow.compat.v1 as tf
from aif360.algorithms.inprocessing.adversarial_debiasing import AdversarialDebiasing
from aif360.algorithms.inprocessing.fairness_adjuster import FairnessAdjuster
from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions import (
    load_preproc_data_adult,
    load_preproc_data_compas,
    load_preproc_data_german,
)
from aif360.datasets import (
    AdultDataset,
    BinaryLabelDataset,
    CompasDataset,
    GermanDataset,
)
from aif360.metrics import BinaryLabelDatasetMetric, ClassificationMetric
from aif360.metrics.utils import compute_boolean_conditioning_vector
from IPython.display import Markdown, display
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MaxAbsScaler, StandardScaler

tf.disable_eager_execution()

In [4]:
dataset_orig = load_preproc_data_german(["age", "sex"])
privileged_groups = [{"age": 1}]
unprivileged_groups = [{"age": 0}]

  df['sex'] = df['personal_status'].replace(status_map)


In [5]:
import numpy as np

protected_attribute_name = list(unprivileged_groups[0].keys())[0]

In [6]:
from sklearn.model_selection import KFold, cross_val_score, cross_validate

min_max_scaler = MaxAbsScaler()

Prep

In [7]:
dataset_orig.features = min_max_scaler.fit_transform(dataset_orig.features)

CV

In [8]:
def get_metrics(
    dataset_orig_test, dataset_pred_test, unprivileged_groups, privileged_groups
):
    metric_dataset_debiasing_test = BinaryLabelDatasetMetric(
        dataset_orig_test,
        unprivileged_groups=unprivileged_groups,
        privileged_groups=privileged_groups,
    )
    classified_metric_debiasing_test = ClassificationMetric(
        dataset_orig_test,
        dataset_pred_test,
        unprivileged_groups=unprivileged_groups,
        privileged_groups=privileged_groups,
    )
    metrics_dict = {
        "Test Set: mean outcomes difference": metric_dataset_debiasing_test.mean_difference(),
        "Test Set: Classification accuracy": classified_metric_debiasing_test.accuracy(),
        "Test Set: Disparate impact": classified_metric_debiasing_test.disparate_impact(),
        "Test Set: Average odds difference": classified_metric_debiasing_test.average_odds_difference(),
    }
    return metrics_dict

In [27]:
from copy import copy, deepcopy

import numpy as np
from sklearn.metrics import accuracy_score  # Replace with desired metric
from sklearn.model_selection import KFold

protected_attribute_name = "age"


def cross_validate_seeds(
    model_class,
    model_kwargs,
    dataset,
    seeds=5,
    n_folds=5,
):
    """
    Runs cross-validation with multiple seeds.

    Parameters:
        model: The Scikit-learn model to validate.
        X: Feature matrix.
        y: Target vector.
        seeds: Number of seeds for cross-validation.
        folds: Number of folds for each cross-validation.

    Returns:
        results: Dictionary containing accuracy scores for each seed and fold.
    """
    results = {}
    original_df = dataset.convert_to_dataframe()[0]
    for seed in range(seeds):
        print(f"\nSeed {seed + 1}/{seeds}")
        kf = KFold(n_splits=n_folds, shuffle=True, random_state=seed)
        seed_results = {}
        for to_debias in [True, False]:
            model_kwargs["debias"] = to_debias
            fold_results = []
            for fold, (train_idx, test_idx) in enumerate(kf.split(original_df)):
                print(f"{seed=} || {fold=}")
                # Splitting data
                dataset_X_train, dataset_X_test = dataset.subset(
                    train_idx
                ), dataset.subset(test_idx)

                # Training model
                Z = dataset_X_train.protected_attributes[
                    :,
                    dataset_X_train.protected_attribute_names.index(
                        protected_attribute_name
                    ),
                ]
                model_kwargs["protected_group_vector"] = Z
                model = model_class(**model_kwargs)
                model.fit(dataset_X_train)

                # Making predictions
                dataset_preds = model.predict(dataset_X_test)

                # Evaluating model
                metrics = get_metrics(
                    dataset_X_test,
                    dataset_preds,
                    model_kwargs["unprivileged_groups"],
                    model_kwargs["privileged_groups"],
                )
                fold_results.append(metrics)

                # end session
            seed_results[f"{to_debias=}"] = fold_results
        results[f"Seed {seed}"] = seed_results

    return results

Fairness adjuster

In [23]:
from _utils import _XGBAdversarialDebiasing, _XGBFairnessAdjuster

model_class = _XGBFairnessAdjuster
model_kwargs = {
    "privileged_groups": privileged_groups,
    "unprivileged_groups": unprivileged_groups,
    "adversary_loss_weight": 10,
    "seed": 1234,
}

In [24]:
results = cross_validate_seeds(
    model_class,
    model_kwargs,
    dataset_orig,
    seeds=3,
    n_folds=10,
)


Seed 1/3
seed=0 || fold=0
seed=0 || fold=1
seed=0 || fold=2
seed=0 || fold=3
seed=0 || fold=4
seed=0 || fold=5
seed=0 || fold=6
seed=0 || fold=7
seed=0 || fold=8
seed=0 || fold=9
seed=0 || fold=0
seed=0 || fold=1
seed=0 || fold=2
seed=0 || fold=3
seed=0 || fold=4
seed=0 || fold=5
seed=0 || fold=6
seed=0 || fold=7
seed=0 || fold=8
seed=0 || fold=9

Seed 2/3
seed=1 || fold=0
seed=1 || fold=1
seed=1 || fold=2
seed=1 || fold=3
seed=1 || fold=4
seed=1 || fold=5
seed=1 || fold=6
seed=1 || fold=7
seed=1 || fold=8
seed=1 || fold=9
seed=1 || fold=0
seed=1 || fold=1
seed=1 || fold=2
seed=1 || fold=3
seed=1 || fold=4
seed=1 || fold=5
seed=1 || fold=6
seed=1 || fold=7
seed=1 || fold=8
seed=1 || fold=9

Seed 3/3
seed=2 || fold=0
seed=2 || fold=1
seed=2 || fold=2
seed=2 || fold=3
seed=2 || fold=4
seed=2 || fold=5
seed=2 || fold=6
seed=2 || fold=7
seed=2 || fold=8
seed=2 || fold=9
seed=2 || fold=0
seed=2 || fold=1
seed=2 || fold=2
seed=2 || fold=3
seed=2 || fold=4
seed=2 || fold=5
seed=2 || fold=6
s

In [25]:
import pickle

with open("./data/xgb_adjuster_group_cv_results.pickle", "wb") as path:
    pickle.dump(results, path, protocol=pickle.HIGHEST_PROTOCOL)

In [26]:
aggs = {}
for seed, res_dict in results.items():
    aggs[seed] = {}
    for to_debias, fold_res_dict in res_dict.items():
        res = pd.DataFrame(fold_res_dict).quantile([0.025, 0.975]).T
        res["means"] = pd.DataFrame(fold_res_dict).mean()
        aggs[seed][to_debias] = res.apply(
            lambda x: f"{x.means:.4f} ({x[0.025]:.4f},{x[0.975]:.4f})", axis=1
        )

reform = {
    (outerKey, innerKey): values
    for outerKey, innerDict in aggs.items()
    for innerKey, values in innerDict.items()
}
pd.DataFrame(reform).T

Unnamed: 0,Unnamed: 1,Test Set: mean outcomes difference,Test Set: Classification accuracy,Test Set: Disparate impact,Test Set: Average odds difference
Seed 0,to_debias=True,"-0.1388 (-0.2802,0.0504)","0.6870 (0.6322,0.7478)","0.6951 (0.5072,0.8638)","-0.3178 (-0.5917,-0.1021)"
Seed 0,to_debias=False,"-0.1388 (-0.2802,0.0504)","0.6870 (0.6245,0.7578)","0.6423 (0.4947,0.8182)","-0.3770 (-0.6058,-0.1918)"
Seed 1,to_debias=True,"-0.1554 (-0.3321,-0.0244)","0.6780 (0.5725,0.7355)","0.7261 (0.5624,0.9194)","-0.2905 (-0.5152,-0.0834)"
Seed 1,to_debias=False,"-0.1554 (-0.3321,-0.0244)","0.6880 (0.6135,0.7355)","0.6353 (0.4026,0.8333)","-0.3800 (-0.6328,-0.1922)"
Seed 2,to_debias=True,"-0.1534 (-0.3167,-0.0148)","0.6830 (0.6168,0.7810)","0.7871 (0.5835,1.0607)","-0.2267 (-0.4368,0.0691)"
Seed 2,to_debias=False,"-0.1534 (-0.3167,-0.0148)","0.6990 (0.6445,0.7878)","0.6545 (0.4548,0.7738)","-0.3685 (-0.5950,-0.2118)"


AD

In [28]:
model_class = _XGBAdversarialDebiasing
model_kwargs = {
    "privileged_groups": privileged_groups,
    "unprivileged_groups": unprivileged_groups,
    "adversary_loss_weight": 10,
    "seed": 1234,
}

In [29]:
results = cross_validate_seeds(
    model_class,
    model_kwargs,
    dataset_orig,
    seeds=3,
    n_folds=10,
)


Seed 1/3
seed=0 || fold=0
debug=False
seed=0 || fold=1
debug=False
seed=0 || fold=2
debug=False
seed=0 || fold=3
debug=False
seed=0 || fold=4
debug=False
seed=0 || fold=5
debug=False
seed=0 || fold=6
debug=False
seed=0 || fold=7
debug=False
seed=0 || fold=8
debug=False
seed=0 || fold=9
debug=False
seed=0 || fold=0
seed=0 || fold=1
seed=0 || fold=2
seed=0 || fold=3
seed=0 || fold=4
seed=0 || fold=5
seed=0 || fold=6
seed=0 || fold=7
seed=0 || fold=8
seed=0 || fold=9

Seed 2/3
seed=1 || fold=0
debug=False
seed=1 || fold=1
debug=False
seed=1 || fold=2
debug=False
seed=1 || fold=3
debug=False
seed=1 || fold=4
debug=False
seed=1 || fold=5
debug=False
seed=1 || fold=6
debug=False
seed=1 || fold=7
debug=False
seed=1 || fold=8
debug=False
seed=1 || fold=9
debug=False
seed=1 || fold=0
seed=1 || fold=1
seed=1 || fold=2
seed=1 || fold=3
seed=1 || fold=4
seed=1 || fold=5
seed=1 || fold=6
seed=1 || fold=7
seed=1 || fold=8
seed=1 || fold=9

Seed 3/3
seed=2 || fold=0
debug=False
seed=2 || fold=1
debu

In [30]:
import pickle

with open("./data/xgb_ad_group_cv_results.pickle", "wb") as path:
    pickle.dump(results, path, protocol=pickle.HIGHEST_PROTOCOL)

In [31]:
aggs = {}
for seed, res_dict in results.items():
    aggs[seed] = {}
    for to_debias, fold_res_dict in res_dict.items():
        res = pd.DataFrame(fold_res_dict).quantile([0.025, 0.975]).T
        res["means"] = pd.DataFrame(fold_res_dict).mean()
        aggs[seed][to_debias] = res.apply(
            lambda x: f"{x.means:.4f} ({x[0.025]:.4f},{x[0.975]:.4f})", axis=1
        )

reform = {
    (outerKey, innerKey): values
    for outerKey, innerDict in aggs.items()
    for innerKey, values in innerDict.items()
}
pd.DataFrame(reform).T

Unnamed: 0,Unnamed: 1,Test Set: mean outcomes difference,Test Set: Classification accuracy,Test Set: Disparate impact,Test Set: Average odds difference
Seed 0,to_debias=True,"-0.1388 (-0.2802,0.0504)","0.6660 (0.6200,0.7000)","1.0351 (0.9553,1.1110)","0.0275 (-0.0356,0.0991)"
Seed 0,to_debias=False,"-0.1388 (-0.2802,0.0504)","0.6870 (0.6245,0.7578)","0.6423 (0.4947,0.8182)","-0.3770 (-0.6058,-0.1918)"
Seed 1,to_debias=True,"-0.1554 (-0.3321,-0.0244)","0.6650 (0.5658,0.7077)","1.0473 (0.9592,1.1031)","0.0358 (-0.0398,0.0781)"
Seed 1,to_debias=False,"-0.1554 (-0.3321,-0.0244)","0.6880 (0.6135,0.7355)","0.6353 (0.4026,0.8333)","-0.3800 (-0.6328,-0.1922)"
Seed 2,to_debias=True,"-0.1534 (-0.3167,-0.0148)","0.6730 (0.6190,0.7588)","1.0334 (0.9040,1.0948)","0.0319 (-0.1010,0.0939)"
Seed 2,to_debias=False,"-0.1534 (-0.3167,-0.0148)","0.6990 (0.6445,0.7878)","0.6545 (0.4548,0.7738)","-0.3685 (-0.5950,-0.2118)"
