In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import warnings

warnings.filterwarnings('ignore')

In [3]:
from tqdm import tqdm
import os
import data_utils
import model_utils
from attack_utils import get_CSMIA_case_by_case_results, CSMIA_attack, LOMIA_attack
from data_utils import oneHotCatVars, filter_random_data_by_conf_score
from experiment_utils import MIAExperiment
from disparity_inference_utils import get_confidence_array, draw_confidence_array_scatter, get_indices_by_group_condition, get_corr_btn_sens_and_out_per_subgroup, get_slopes, get_angular_difference, calculate_stds, get_mutual_info_btn_sens_and_out_per_subgroup
from bcorr_utils import bcorr_sampling, evaluate, MLPClassifierFC
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.neural_network._base import ACTIVATIONS
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.metrics import roc_curve, auc, roc_auc_score, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
from sklearn.decomposition import PCA
from sklearn.inspection import permutation_importance
from fairlearn.metrics import equalized_odds_difference, demographic_parity_difference
import matplotlib.pyplot as plt
import seaborn as sns
import tabulate
import pickle
import copy

import matplotlib as mpl

# Load Dataset

In [4]:
experiments = {}

i = -0.4
j = -0.1
experiment = MIAExperiment(sampling_condition_dict = 
    {
            'subgroup_col_name': 'SEX',
            'n': 25000,
            'correlation_by_subgroup_values': [i, j],
    }, shortname = f"Corr_btn_sens_and_output_for_male_({i})_for_female_({j})", random_state = 0
)
experiments[experiment.name] = experiment

experiment_texas = MIAExperiment(sampling_condition_dict =
    {
            'subgroup_col_name': 'SEX_CODE',
            'n': 25000,
            'correlation_by_subgroup_values': [i, j],
    }, shortname = f"Corr_btn_sens_and_output_for_male_({i})_for_female_({j})", random_state = 0, name = "Texas100", sensitive_column = 'ETHNICITY'
)
experiments[experiment_texas.name] = experiment_texas

i = 0
experiment_multi_valued = MIAExperiment(sampling_condition_dict = 
        {
                'subgroup_col_name': 'ST',
                'n': 1000,
        }, random_state = i,
        shortname = f"Corr_btn_sens_and_output_for_ST_ranging_from_0_to_-0.5_random_state_{i}"
    )
experiments[f"{experiment_multi_valued.name}_multi_valued"] = experiment_multi_valued

subgroup_vals = [1, 2, 3, 4, 6, 20, 50, 51, 62, 63]
experiment_multi_valued_texas = MIAExperiment(sampling_condition_dict = 
    {
            'subgroup_col_name': 'PAT_STATUS',
            'subgroup_values': subgroup_vals,
            'n': 5000
    }, shortname = f"Corr_btn_sens_and_output_for_PAT_STATUS_ranging_from_0_to_-0.5", name='Texas100', sensitive_column='SEX_CODE'
)
experiments[f"{experiment_multi_valued_texas.name}_multi_valued"] = experiment_multi_valued_texas

# Train/Load Model

In [5]:
save_model=True
for experiment_key in experiments:
    experiment = experiments[experiment_key]
    print(f"Training classifier for experiment: {experiment}")
    try:
        experiment.clf = model_utils.load_model(f'<PATH_TO_MODEL>/{experiment.ds.ds.filenameroot}_target_model.pkl')
        print(f"Loaded classifier for experiment from file: {experiment}")
    except:
        base_model = model_utils.get_model(max_iter=500)
        experiment.clf = copy.deepcopy(base_model)
        experiment.clf.fit(experiment.X_train, experiment.y_tr_onehot)

        if save_model:
            model_utils.save_model(experiment.clf, f'<PATH_TO_MODEL>/{experiment.ds.ds.filenameroot}_target_model.pkl')

Training classifier for experiment: Census19_subgroup_col_name_SEX_n_25000_correlation_by_subgroup_values_[-0.4, -0.1]_rs0
Loaded classifier for experiment from file: Census19_subgroup_col_name_SEX_n_25000_correlation_by_subgroup_values_[-0.4, -0.1]_rs0
Training classifier for experiment: Texas100_subgroup_col_name_SEX_CODE_n_25000_correlation_by_subgroup_values_[-0.4, -0.1]_rs0
Loaded classifier for experiment from file: Texas100_subgroup_col_name_SEX_CODE_n_25000_correlation_by_subgroup_values_[-0.4, -0.1]_rs0
Training classifier for experiment: Census19_subgroup_col_name_ST_n_1000_rs0
Loaded classifier for experiment from file: Census19_subgroup_col_name_ST_n_1000_rs0
Training classifier for experiment: Texas100_subgroup_col_name_PAT_STATUS_subgroup_values_[1, 2, 3, 4, 6, 20, 50, 51, 62, 63]_n_5000_rs42
Loaded classifier for experiment from file: Texas100_subgroup_col_name_PAT_STATUS_subgroup_values_[1, 2, 3, 4, 6, 20, 50, 51, 62, 63]_n_5000_rs42


# Balancing Correlation

In [6]:
for experiment_key in experiments:
    experiment = experiments[experiment_key]
    experiment.subgroup_col_name = experiment.sampling_condition_dict['subgroup_col_name']
    experiment.subgroup_vals = [col.split('_')[-1] for col in experiment.X_train.columns if col.startswith(experiment.subgroup_col_name)]
    p = -0.1 if len(experiment.subgroup_vals) == 2 else 0
    experiment.X_train_balanced_corr, experiment.y_tr_balanced_corr, experiment.y_tr_onehot_balanced_corr = bcorr_sampling(experiment, experiment.X_train, experiment.y_tr, experiment.y_tr_onehot, subgroup_col_name=experiment.subgroup_col_name, p=p)

[15000, 22500]
{0: {(0, 1): 4125, (0, 0): 3375, (1, 1): 3375, (1, 0): 4125}, 1: {(0, 1): 6187, (0, 0): 5062, (1, 1): 5063, (1, 0): 6188}}


100%|██████████| 2/2 [00:00<00:00, 22.05it/s]


[15000, 22500]
{0: {(0, 1): 4125, (0, 0): 3375, (1, 1): 3375, (1, 0): 4125}, 1: {(0, 1): 6187, (0, 0): 5062, (1, 1): 5063, (1, 0): 6188}}


100%|██████████| 2/2 [00:00<00:00, 23.57it/s]


[1000, 500, 980, 968, 960, 948, 940, 876, 920, 908, 900, 892, 656, 872, 860, 852, 840, 832, 820, 652, 800, 792, 784, 772, 764, 752, 504, 732, 724, 712, 704, 696, 684, 676, 480, 656, 644, 636, 624, 616, 604, 304, 588, 576, 568, 404, 548, 536, 528, 516, 348]
{0: {(0, 1): 250, (0, 0): 250, (1, 1): 250, (1, 0): 250}, 1: {(0, 1): 125, (0, 0): 125, (1, 1): 125, (1, 0): 125}, 2: {(0, 1): 245, (0, 0): 245, (1, 1): 245, (1, 0): 245}, 3: {(0, 1): 242, (0, 0): 242, (1, 1): 242, (1, 0): 242}, 4: {(0, 1): 240, (0, 0): 240, (1, 1): 240, (1, 0): 240}, 5: {(0, 1): 237, (0, 0): 237, (1, 1): 237, (1, 0): 237}, 6: {(0, 1): 235, (0, 0): 235, (1, 1): 235, (1, 0): 235}, 7: {(0, 1): 219, (0, 0): 219, (1, 1): 219, (1, 0): 219}, 8: {(0, 1): 230, (0, 0): 230, (1, 1): 230, (1, 0): 230}, 9: {(0, 1): 227, (0, 0): 227, (1, 1): 227, (1, 0): 227}, 10: {(0, 1): 225, (0, 0): 225, (1, 1): 225, (1, 0): 225}, 11: {(0, 1): 223, (0, 0): 223, (1, 1): 223, (1, 0): 223}, 12: {(0, 1): 164, (0, 0): 164, (1, 1): 164, (1, 0): 164}

100%|██████████| 51/51 [00:02<00:00, 22.43it/s]


[5000, 4228, 4500, 3260, 4000, 3748, 1988, 1852, 3000, 2748]
{0: {(0, 1): 1250, (0, 0): 1250, (1, 1): 1250, (1, 0): 1250}, 1: {(0, 1): 1057, (0, 0): 1057, (1, 1): 1057, (1, 0): 1057}, 2: {(0, 1): 1125, (0, 0): 1125, (1, 1): 1125, (1, 0): 1125}, 3: {(0, 1): 815, (0, 0): 815, (1, 1): 815, (1, 0): 815}, 4: {(0, 1): 1000, (0, 0): 1000, (1, 1): 1000, (1, 0): 1000}, 5: {(0, 1): 937, (0, 0): 937, (1, 1): 937, (1, 0): 937}, 6: {(0, 1): 497, (0, 0): 497, (1, 1): 497, (1, 0): 497}, 7: {(0, 1): 463, (0, 0): 463, (1, 1): 463, (1, 0): 463}, 8: {(0, 1): 750, (0, 0): 750, (1, 1): 750, (1, 0): 750}, 9: {(0, 1): 687, (0, 0): 687, (1, 1): 687, (1, 0): 687}}


100%|██████████| 10/10 [00:00<00:00, 27.41it/s]


In [7]:
for experiment_key in experiments:
    experiment = experiments[experiment_key]
    print(f"\nDataset: {experiment.name}, Subgroup: {experiment.subgroup_col_name}")
    correlations_dict_before = {val: round(get_corr_btn_sens_and_out_per_subgroup(experiment, experiment.X_train, experiment.y_tr, {experiment.subgroup_col_name: val}), 2) for val in experiment.subgroup_vals}
    print(f"Correlations before balancing: {correlations_dict_before}")
    correlations_dict_after = {val: round(get_corr_btn_sens_and_out_per_subgroup(experiment, experiment.X_train_balanced_corr, experiment.y_tr_balanced_corr, {experiment.subgroup_col_name: val}), 2) for val in experiment.subgroup_vals}
    print(f"Correlations after balancing: {correlations_dict_after}")


Dataset: Census19, Subgroup: SEX
Correlations before balancing: {'0': -0.4, '1': -0.1}
Correlations after balancing: {'0': -0.1, '1': -0.1}

Dataset: Texas100, Subgroup: SEX_CODE
Correlations before balancing: {'0': -0.4, '1': -0.1}
Correlations after balancing: {'0': -0.1, '1': -0.1}

Dataset: Census19, Subgroup: ST
Correlations before balancing: {'0': 0.0, '1': -0.01, '2': -0.02, '3': -0.03, '4': -0.04, '5': -0.05, '6': -0.06, '7': -0.07, '8': -0.08, '9': -0.09, '10': -0.1, '11': -0.11, '12': -0.12, '13': -0.13, '14': -0.14, '15': -0.15, '16': -0.16, '17': -0.17, '18': -0.18, '19': -0.18, '20': -0.2, '21': -0.21, '22': -0.21, '23': -0.23, '24': -0.23, '25': -0.25, '26': -0.25, '27': -0.27, '28': -0.27, '29': -0.29, '30': -0.29, '31': -0.3, '32': -0.31, '33': -0.32, '34': -0.33, '35': -0.34, '36': -0.35, '37': -0.36, '38': -0.37, '39': -0.38, '40': -0.39, '41': -0.38, '42': -0.41, '43': -0.42, '44': -0.43, '45': -0.44, '46': -0.45, '47': -0.46, '48': -0.47, '49': -0.48, '50': -0.49}


In [8]:
save_model=True
for experiment_key in experiments:
    experiment = experiments[experiment_key]
    print(f"Training classifier for experiment: {experiment}")
    try:
        experiment.clf_balanced_corr = model_utils.load_model(f'<PATH_TO_MODEL>/{experiment.ds.ds.filenameroot}_target_model_bcorr.pkl')
        print(f"Loaded classifier for experiment from file: {experiment}")
    except:
        base_model = model_utils.get_model(max_iter=500)
        experiment.clf_balanced_corr = copy.deepcopy(base_model)
        experiment.clf_balanced_corr.fit(experiment.X_train_balanced_corr, experiment.y_tr_balanced_corr)

        if save_model:
            model_utils.save_model(experiment.clf_balanced_corr, f'<PATH_TO_MODEL>/{experiment.ds.ds.filenameroot}_target_model_bcorr.pkl')

Training classifier for experiment: Census19_subgroup_col_name_SEX_n_25000_correlation_by_subgroup_values_[-0.4, -0.1]_rs0
Loaded classifier for experiment from file: Census19_subgroup_col_name_SEX_n_25000_correlation_by_subgroup_values_[-0.4, -0.1]_rs0
Training classifier for experiment: Texas100_subgroup_col_name_SEX_CODE_n_25000_correlation_by_subgroup_values_[-0.4, -0.1]_rs0
Loaded classifier for experiment from file: Texas100_subgroup_col_name_SEX_CODE_n_25000_correlation_by_subgroup_values_[-0.4, -0.1]_rs0
Training classifier for experiment: Census19_subgroup_col_name_ST_n_1000_rs0
Loaded classifier for experiment from file: Census19_subgroup_col_name_ST_n_1000_rs0
Training classifier for experiment: Texas100_subgroup_col_name_PAT_STATUS_subgroup_values_[1, 2, 3, 4, 6, 20, 50, 51, 62, 63]_n_5000_rs42


# Fairness Constraint Baseline

In [9]:
from fairlearn.reductions import ExponentiatedGradient, DemographicParity, EqualizedOdds, ErrorRate

for experiment_key in experiments:
    experiment = experiments[experiment_key]

    if len(experiment.subgroup_vals) > 2:
        continue

    try:
        print(f"Loading mitigator for experiment: {experiment}")
        experiment.mitigator = model_utils.load_model(f'<PATH_TO_MODEL>/{experiment.ds.ds.filenameroot}_target_model_fairness_constraints.pkl')
    except:
        print(f"Training mitigator for experiment: {experiment}")
        clf2 = MLPClassifierFC(max_iter=500)
        clf2.coefs_ = experiment.clf.coefs_
        clf2.intercepts_ = experiment.clf.intercepts_
        constraint = EqualizedOdds()
        experiment.mitigator = ExponentiatedGradient(clf2, constraint)

        experiment.mitigator.fit(experiment.X_train, experiment.y_tr, sensitive_features=experiment.X_train[f'{experiment.subgroup_col_name}_0'])

        model_utils.save_model(experiment.mitigator, f'<PATH_TO_MODEL>/{experiment.ds.ds.filenameroot}_target_model_fairness_constraints.pkl')

Loading mitigator for experiment: Census19_subgroup_col_name_SEX_n_25000_correlation_by_subgroup_values_[-0.4, -0.1]_rs0
Loading mitigator for experiment: Texas100_subgroup_col_name_SEX_CODE_n_25000_correlation_by_subgroup_values_[-0.4, -0.1]_rs0


# Evaluation

In [10]:
for experiment_key in experiments:
    experiment = experiments[experiment_key]
    res_dict = {
        'w/o BCorr': evaluate(experiment, experiment.clf, experiment.X_train, experiment.y_tr, experiment.X_test, experiment.y_te, subgroup_col_name=experiment.subgroup_col_name),
        'w Bcorr': evaluate(experiment, experiment.clf_balanced_corr, experiment.X_train_balanced_corr, experiment.y_tr_balanced_corr, experiment.X_test, experiment.y_te, subgroup_col_name=experiment.subgroup_col_name)
    }
    if len(experiment.subgroup_vals) == 2:
        res_dict['FC'] = evaluate(experiment, experiment.mitigator, experiment.X_train, experiment.y_tr, experiment.X_test, experiment.y_te, subgroup_col_name=experiment.subgroup_col_name)
    res_dict_df = pd.DataFrame.from_dict(res_dict, orient='index')
    print(f"Dataset: {experiment.name}, Subgroup: {experiment.subgroup_col_name}")
    print(res_dict_df)

Dataset: Census19, Subgroup: SEX
           ASRD_CSMIA  ASRD_LOMIA     EOD     DPD      MA
w/o BCorr       11.80       14.65  0.0726  0.1284  73.904
w Bcorr          0.42        1.01  0.0438  0.0914  73.770
FC               8.94       13.97  0.0566  0.1059  70.598
Dataset: Texas100, Subgroup: SEX_CODE
           ASRD_CSMIA  ASRD_LOMIA     EOD     DPD      MA
w/o BCorr       12.91       15.45  0.1768  0.1007  72.080
w Bcorr          1.34        0.87  0.0120  0.0701  74.624
FC              11.65       11.65  0.1012  0.0288  70.578
Dataset: Census19, Subgroup: ST
           ASRD_CSMIA  ASRD_LOMIA  EOD     DPD         MA
w/o BCorr       23.22       25.78  1.0  0.1757  73.576448
w Bcorr         14.12       10.89  1.0  0.1471  71.908570
Dataset: Texas100, Subgroup: PAT_STATUS
           ASRD_CSMIA  ASRD_LOMIA     EOD     DPD         MA
w/o BCorr       17.06       19.00  0.8184  0.2681  74.544221
w Bcorr          8.68        4.74  0.7886  0.1906  74.290342
