## Pre-processing Mitigation Methods: AIF360



In [1]:
import numpy as np
from numpy.random import choice as np_choice
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
import seaborn as sns
from IPython.display import Markdown, display
# from sklearn.decomposition import PCA

import fairness_helpers as fh
import global_variables as gv
from xgboost import XGBClassifier
import utilities
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, QuantileTransformer, RobustScaler, StandardScaler,MinMaxScaler

from aif360.algorithms.preprocessing import DisparateImpactRemover
from aif360.algorithms.preprocessing.reweighing import Reweighing
from aif360.datasets import BinaryLabelDataset
from aif360.metrics import BinaryLabelDatasetMetric

#### get data

In [2]:
X, X1 = fh.get_aif360_data()

In [7]:
X

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,1319-0.0,1408-0.0,1329-0.0,1448-0.0,1538-0.0,6142-0.0,2050-0.0,1508-0.0,1339-0.0,30710-0.0,1349-0.0,30750-0.0,1468-0.0,20117-0.0,30740-0.0,1160-0.0,2090-0.0,31-0.0,1488-0.0,30850-0.0,4080-0.0,1369-0.0,21000-0.0,1200-0.0,1289-0.0,30790-0.0,845-0.0,48-0.0,30630-0.0,1299-0.0,1220-0.0,1548-0.0,1528-0.0,23099-0.0,49-0.0,30690-0.0,1389-0.0,2654-0.0,1249-0.0,1309-0.0,1379-0.0,1239-0.0,21003-0.0,30780-0.0,1438-0.0,30870-0.0,1359-0.0,30770-0.0,21001-0.0,1458-0.0,23100-0.0,6138-0.0,1418-0.0,1478-0.0,4079-0.0,30760-0.0,23101-0.0,2100-0.0,1428-0.0,30640-0.0,hypertension,CVD,sex-binary,race-binary,race-grouped,age-binary
sex-binary,race-binary,race-grouped,age-binary,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1
0,1,0,1,0.0,1.0,2.0,3.0,2.0,1.0,2.0,3.0,2.0,0.3400,1.0,34.937,3.0,2.0,5.62200,7.0,1.0,0.0,6.00,0.50800,110.0,1.0,1001.0,3.0,6.0,54.4035,20.90,74.0,1.5930,10.0,0.0,2.0,2.00,35.6,102.0,6.47700,1.0,6.0,1.0,2.0,1.0,0.0,54.0,3.88800,10.0,0.97700,2.0,26.33900,24.5790,3.86,25.0,1.0,3.0,1.0,77.0,1.70600,45.2,1.0,0.0,1.21100,0,1,0,1,0,1
1,1,0,1,0.0,3.0,2.0,1.0,0.0,1.0,1.0,2.0,2.0,3.9400,4.0,40.900,5.0,2.0,5.05200,9.0,0.0,1.0,2.00,13.08800,166.0,2.0,1001.0,2.0,2.0,15.4000,16.00,120.0,1.3900,2.0,0.0,2.0,2.47,36.5,113.0,5.51200,1.0,7.0,1.0,1.0,2.0,0.0,65.0,3.52000,12.0,2.35800,3.0,10.70100,35.0861,7.00,42.9,3.0,2.0,1.0,91.0,1.17300,74.6,0.0,1.0,1.01900,1,0,1,1,0,1
0,1,0,1,0.0,3.0,3.0,2.0,1.0,2.0,1.0,2.0,2.0,0.5500,1.0,40.000,1.0,0.0,5.31000,5.0,0.0,0.0,0.00,0.51500,132.0,1.0,1001.0,3.0,2.0,32.1000,16.00,66.0,2.0050,4.0,0.0,1.0,1.00,29.5,88.0,7.07900,1.0,7.0,3.0,4.0,2.0,0.0,69.0,4.22700,8.0,0.65500,2.0,10.69300,19.3835,7.00,15.2,3.0,2.0,1.0,67.0,2.49000,36.3,0.0,1.0,1.09700,0,0,0,1,0,1
1,1,0,1,3.0,3.0,3.0,3.0,0.0,2.0,1.0,2.0,2.0,0.4500,2.0,37.300,4.0,2.0,4.44900,7.0,0.0,1.0,5.00,4.67500,178.0,2.0,1001.0,1.0,3.0,43.5620,18.00,110.0,1.4740,2.0,0.0,1.0,2.00,28.5,117.0,5.02800,0.0,7.0,1.0,1.0,2.0,1.0,66.0,3.04100,10.0,3.10800,2.0,25.31700,35.1281,7.00,31.7,3.0,2.0,1.0,84.0,1.16900,79.6,0.0,3.0,0.92300,0,0,1,1,0,1
1,1,0,0,0.0,3.0,2.0,1.0,0.0,5.0,2.0,2.0,2.0,0.7500,2.0,32.200,1.0,2.0,4.61600,6.0,0.0,1.0,3.04,20.16200,178.0,1.0,1001.0,3.0,1.0,71.1100,22.38,94.0,2.1490,1.0,0.0,2.0,2.00,24.8,100.0,7.95800,1.0,7.0,2.0,1.0,1.0,0.0,48.0,4.98300,8.0,1.17300,1.0,26.52300,25.8866,1.00,20.1,1.0,2.0,1.0,88.0,2.05300,61.0,0.0,3.0,1.44300,0,0,1,1,0,0
1,1,0,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1,1,0,1,0.0,3.0,1.0,1.0,2.0,1.0,1.0,1.0,2.0,1.4500,3.0,28.000,1.0,2.0,4.50900,7.0,0.0,1.0,0.00,17.13700,147.0,1.0,1001.0,2.0,2.0,5.3000,16.00,110.0,1.3450,0.0,0.0,2.0,0.00,27.0,113.0,5.75400,1.0,4.0,1.0,2.0,1.0,2.0,63.0,3.84300,4.0,3.82000,2.0,20.77700,30.6094,7.00,29.8,3.0,1.0,3.0,91.0,1.02600,80.7,0.0,3.0,1.13700,0,0,1,1,0,1
0,1,0,1,0.0,1.0,1.0,3.0,2.0,2.0,1.0,1.0,1.0,2.7500,1.0,33.200,1.0,1.0,5.67300,6.0,1.0,0.0,0.00,0.72500,148.0,2.0,1001.0,3.0,4.0,5.0900,16.00,102.0,1.3650,3.0,0.0,3.0,3.00,46.3,110.0,4.66400,2.0,7.0,1.0,6.0,2.0,0.0,64.0,2.86300,4.0,2.13400,3.0,23.64700,31.4652,7.00,38.7,3.0,3.0,2.0,73.0,1.15900,44.9,0.0,1.0,0.89800,1,0,0,1,0,1
1,1,0,0,2.0,1.0,1.0,2.0,2.0,1.0,4.0,2.0,2.0,0.8500,3.0,31.500,3.0,2.0,4.85100,7.0,1.0,1.0,20.00,12.04000,133.0,2.0,1001.0,2.0,2.0,37.5575,22.08,99.0,1.2410,2.0,1.0,3.0,1.00,25.7,98.0,5.18400,1.0,7.0,1.0,2.0,2.0,0.0,43.0,3.35800,20.0,2.54700,3.0,22.50700,28.6196,10.00,23.6,1.0,1.0,1.0,81.0,1.04300,68.1,1.0,1.0,0.98500,1,1,1,1,0,0
0,1,0,1,3.0,3.0,1.0,3.0,0.0,1.0,1.0,2.0,2.0,3.5700,2.0,45.700,1.0,2.0,5.50600,6.0,0.0,0.0,4.00,1.18648,138.0,1.0,1001.0,3.0,4.0,27.4800,19.64,86.0,1.6690,2.0,0.0,2.0,2.00,41.3,111.0,6.82700,1.0,7.0,4.0,1.0,1.0,0.0,56.0,4.20000,12.0,2.09000,3.0,30.49800,27.3702,7.00,32.7,1.0,2.0,1.0,83.0,1.58900,46.4,0.0,0.0,1.06300,0,0,0,1,0,1


#### Step 1. Convert Pandas DataFrame into [BinaryLabelDataset datatype](https://aif360.readthedocs.io/en/latest/modules/generated/aif360.datasets.BinaryLabelDataset.html#aif360.datasets.BinaryLabelDataset)

In [3]:
dataset1b = BinaryLabelDataset(df=X1, 
                          label_names=['CVD'], 
                          protected_attribute_names=['sex-binary'])

dataset2b = BinaryLabelDataset(df=X1, 
                          label_names=['CVD'], 
                          protected_attribute_names=['race-binary'])

dataset3b = BinaryLabelDataset(df=X1, 
                          label_names=['CVD'], 
                          protected_attribute_names=['age-binary'])

#### XGBoost Model

In [5]:
df = pd.read_csv('data/entire_imputed.csv')
pd.set_option('display.max_columns', None)
df.drop('Unnamed: 0', axis=1, inplace=True)

X_train, X_val, X_test, y_train, y_val, y_test = utilities.process_features(df, 'CVD', QuantileTransformer(output_distribution='uniform'), one_hot=True, val=True)
X_train, y_train= utilities.resample_data(X_train, y_train, 'under')

n_estimators = 1000

clf_xgb = XGBClassifier(max_depth=8,
    learning_rate=0.01,
    n_estimators=n_estimators,
    verbosity=0,
    silent=None,
    objective="binary:logistic",
    booster='gbtree',
    n_jobs=-1,
    nthread=None,
    gamma=0,
    min_child_weight=1,
    max_delta_step=0,
    subsample=0.7,
    colsample_bytree=1,
    colsample_bylevel=1,
    colsample_bynode=1,
    reg_alpha=0,
    reg_lambda=1,
    scale_pos_weight=1,
    base_score=0.5,
    random_state=0,
    seed=None,)

clf_xgb.fit(X_train, y_train,
            eval_set=[(X_val, y_val)],
            early_stopping_rounds=40,
            verbose=20)

`early_stopping_rounds` in `fit` method is deprecated for better compatibility with scikit-learn, use `early_stopping_rounds` in constructor or`set_params` instead.


[0]	validation_0-logloss:0.69122
[20]	validation_0-logloss:0.65926
[40]	validation_0-logloss:0.63727
[60]	validation_0-logloss:0.62186
[80]	validation_0-logloss:0.61093
[100]	validation_0-logloss:0.60308
[120]	validation_0-logloss:0.59756
[140]	validation_0-logloss:0.59357
[160]	validation_0-logloss:0.59082
[180]	validation_0-logloss:0.58874
[200]	validation_0-logloss:0.58703
[220]	validation_0-logloss:0.58590
[240]	validation_0-logloss:0.58501
[260]	validation_0-logloss:0.58439
[280]	validation_0-logloss:0.58393
[300]	validation_0-logloss:0.58340
[320]	validation_0-logloss:0.58312
[340]	validation_0-logloss:0.58289
[360]	validation_0-logloss:0.58262
[380]	validation_0-logloss:0.58243
[400]	validation_0-logloss:0.58231
[420]	validation_0-logloss:0.58213
[440]	validation_0-logloss:0.58211
[460]	validation_0-logloss:0.58206
[480]	validation_0-logloss:0.58182
[500]	validation_0-logloss:0.58167
[520]	validation_0-logloss:0.58170
[540]	validation_0-logloss:0.58160
[560]	validation_0-logloss

XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,
              colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
              early_stopping_rounds=None, enable_categorical=False,
              eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',
              importance_type=None, interaction_constraints='',
              learning_rate=0.01, max_bin=256, max_cat_to_onehot=4,
              max_delta_step=0, max_depth=8, max_leaves=0, min_child_weight=1,
              missing=nan, monotone_constraints='()', n_estimators=1000,
              n_jobs=-1, nthread=-1, num_parallel_tree=1, predictor='auto',
              random_state=0, reg_alpha=0, ...)

### Method 1. Disparate Impact Remover

Edits feature values to increase group fairness while preserving rank-ordering. Must specify the <font color='blue'>repair_level</font> variable to indicate how much group distributions should overlap.

We want the individual rankings within their group to be preserved after repair ie if an individual has the highest ranking in group Q, it will still have the highest ranking in group Q after repair

In [None]:
# build DisparateRemoverObject

# binaryLabelDataset = aif360.datasets.BinaryLabelDataset(
#     favorable_label=1,
#     unfavorable_label=0,
#     df=encoded_df,
#     label_names=['Loan_Status'],
#     protected_attribute_names=['Gender'])

In [6]:
# transform original dataset

di = DisparateImpactRemover(repair_level = 1.0)
dataset_transf_train = di.fit_transform(dataset1b)
transformed = dataset_transf_train.convert_to_dataframe()[0]

In [8]:
transformed

Unnamed: 0,1319-0.0,1408-0.0,1329-0.0,2050-0.0,1339-0.0,30710-0.0,1349-0.0,30750-0.0,30740-0.0,1160-0.0,1488-0.0,30850-0.0,4080-0.0,1369-0.0,1200-0.0,1289-0.0,30790-0.0,845-0.0,48-0.0,30630-0.0,1299-0.0,1220-0.0,1548-0.0,1528-0.0,23099-0.0,49-0.0,30690-0.0,1389-0.0,1249-0.0,1309-0.0,1379-0.0,21003-0.0,30780-0.0,1438-0.0,30870-0.0,1359-0.0,30770-0.0,21001-0.0,1458-0.0,23100-0.0,1418-0.0,1478-0.0,4079-0.0,30760-0.0,23101-0.0,30640-0.0,1428-0.0_1.0,1428-0.0_2.0,1428-0.0_3.0,20117-0.0_1.0,20117-0.0_2.0,2100-0.0_1.0,2654-0.0_4.0,2654-0.0_5.0,2654-0.0_6.0,2654-0.0_7.0,2654-0.0_8.0,2654-0.0_9.0,21000-0.0_2.0,21000-0.0_3.0,21000-0.0_4.0,21000-0.0_5.0,21000-0.0_6.0,21000-0.0_1001.0,21000-0.0_1002.0,21000-0.0_1003.0,21000-0.0_2001.0,21000-0.0_2002.0,21000-0.0_2003.0,21000-0.0_2004.0,21000-0.0_3001.0,21000-0.0_3002.0,21000-0.0_3003.0,21000-0.0_3004.0,21000-0.0_4001.0,21000-0.0_4002.0,21000-0.0_4003.0,1538-0.0_1.0,1538-0.0_2.0,31-0.0_1.0,6138-0.0_2.0,6138-0.0_3.0,6138-0.0_4.0,6138-0.0_5.0,6138-0.0_6.0,2090-0.0_1.0,1508-0.0_2.0,1508-0.0_3.0,1508-0.0_4.0,6142-0.0_2.0,6142-0.0_3.0,6142-0.0_4.0,6142-0.0_5.0,6142-0.0_6.0,6142-0.0_7.0,1468-0.0_2.0,1468-0.0_3.0,1468-0.0_4.0,1468-0.0_5.0,1239-0.0_1.0,1239-0.0_2.0,1448-0.0_2.0,1448-0.0_3.0,1448-0.0_4.0,hypertension_1,sex-binary,race-binary,race-grouped,age-binary,CVD
0,0.00,1.0,2.0,2.0,2.0,-0.498860,1.0,-0.053673,1.096931,-0.02,1.000000,-0.121472,-1.2000,1.0,3.0,3.86,0.407447,0.659091,-0.855615,-0.340867,4.000,0.0,2.0,-0.090000,-0.187619,-0.239273,0.505548,1.0,1.0,-0.075,1.0,-0.307692,0.273567,0.000000,-0.478302,2.0,0.758646,-0.376206,-0.279412,-0.036814,3.0,1.0,-0.357143,0.136137,-0.306878,0.603390,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0
1,0.00,3.0,2.0,1.0,2.0,1.215453,4.0,0.772041,0.087948,2.00,-0.413333,-0.059231,0.9276,2.0,2.0,-0.26,-0.458336,-0.472727,1.342246,-0.420054,0.000,0.0,2.0,0.156667,0.452381,1.000000,-0.163662,1.0,1.0,-0.500,2.0,0.461538,-0.066543,-0.343333,0.062906,3.0,-1.602060,1.453725,0.394608,1.743363,2.0,1.0,0.642857,-0.525476,0.289894,-0.075458,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0
2,0.00,3.0,3.0,1.0,2.0,-0.394387,1.0,0.979592,0.592892,-2.00,-1.000000,-0.120790,-0.2400,1.0,3.0,-0.14,-0.095674,-0.454545,-1.283422,0.784959,0.945,0.0,1.0,-0.426667,-0.196190,-1.272727,0.873786,1.0,3.0,0.995,2.0,0.846154,0.586876,-0.222222,-0.814151,2.0,-1.543929,-1.281062,0.490196,-1.090973,2.0,1.0,-1.357143,1.385699,-0.777778,0.216949,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
3,2.91,3.0,3.0,1.0,2.0,-0.463292,2.0,0.325510,-0.798061,0.00,0.556667,-0.084841,1.0200,2.0,1.0,0.59,0.182055,-0.031818,0.775401,-0.192412,0.000,0.0,1.0,0.000000,-0.182540,1.363636,-0.499307,0.0,1.0,-0.500,2.0,0.538462,-0.509242,-0.521111,0.401274,2.0,0.516703,1.461040,0.394608,0.752212,2.0,1.0,0.092857,-0.533261,0.775079,-0.372881,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0
4,0.00,3.0,2.0,2.0,2.0,-0.332368,2.0,-0.738163,-0.533118,-1.05,-0.076667,-0.035390,1.0200,1.0,3.0,-1.15,0.808483,0.936364,-0.295027,1.636856,-0.500,0.0,2.0,0.000000,-0.476190,-0.181818,1.532594,1.0,2.0,-0.500,1.0,-0.769231,1.285582,-0.674444,-0.448953,1.0,0.666519,-0.271518,-0.980392,-0.274336,2.0,1.0,0.426429,1.187186,0.085503,1.389831,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
502476,0.00,3.0,1.0,1.0,2.0,-0.016621,3.0,-1.469388,-0.701131,0.00,-1.000000,-0.046770,0.1884,1.0,2.0,-0.26,-0.688005,-0.472727,0.775401,-0.542005,-1.000,0.0,2.0,-0.666667,-0.301587,1.000000,0.004161,1.0,1.0,0.000,1.0,0.307692,0.120545,-0.895556,0.680189,2.0,-0.217224,0.674057,0.394608,0.584071,1.0,3.0,0.642857,-0.811568,0.953810,0.179593,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0
502477,0.00,1.0,1.0,1.0,1.0,0.617248,1.0,-0.408163,1.179321,-1.00,-1.000000,-0.100350,0.4000,2.0,3.0,1.86,-0.696873,-0.454545,0.641711,-0.858618,0.435,0.0,3.0,0.260000,0.191825,0.322364,-1.284327,2.0,1.0,2.000,2.0,0.461538,-1.065619,-0.666667,0.613208,3.0,0.362472,0.749068,0.490196,0.491504,3.0,2.0,-0.642857,-0.941458,-0.322751,-1.021356,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0
502478,1.94,1.0,1.0,4.0,2.0,-0.291338,3.0,-0.859184,-0.208433,0.00,3.666667,-0.064753,-0.4760,2.0,2.0,-0.26,0.045515,0.868182,-0.208289,-0.823848,0.000,1.0,3.0,-0.333333,-0.404762,-0.518182,-0.391123,1.0,1.0,0.000,2.0,-1.153846,-0.216266,0.388889,0.144623,3.0,0.077263,0.295186,1.225490,0.035398,1.0,1.0,-0.154286,-0.778483,0.162275,-0.162712,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0
502479,3.00,3.0,1.0,1.0,2.0,0.927723,2.0,2.142857,0.909532,-1.00,0.333333,-0.055433,0.0000,1.0,3.0,1.86,-0.211657,0.372727,-0.213904,-0.093008,-0.075,0.0,2.0,-0.090000,-0.174524,0.350000,0.747573,1.0,4.0,-0.610,1.0,-0.153846,0.561922,0.222222,0.571698,3.0,1.370714,0.109913,0.490196,0.456637,2.0,1.0,0.071429,-0.204624,-0.243386,0.101695,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0


In [None]:
# retrain model after processing

DIs = []
for level in np.linspace(0., 1., 11):
    di = DisparateImpactRemover(repair_level=level)
    
    # transform original dataset
    dataset_transf_train = di.fit_transform(dataset1b)
    transformed = dataset_transf_train.convert_to_dataframe()[0]
    tranformed_df = transformed.drop(['sex-binary', 'race-binary', 'race-grouped', 'age-binary'], axis=1)
    X_train, X_val, X_test, y_train, y_val, y_test = utilities.process_features(transformed_df, 'CVD', QuantileTransformer(output_distribution='uniform'), one_hot=True, val=True)
    X_train, y_train= utilities.resample_data(X_train, y_train, 'under')

    clf_xgb.fit(X_train, y_train,
                eval_set=[(X_val, y_val)],
                early_stopping_rounds=40,
                verbose=20)
    
    y_pred_proba = clf_xgb.predict_proba(X_test)[:,1]
    y_pred = np.where(y_pred_proba > threshold, 1,0)
    
    p, u =  get_att_privilege_groups('sex-binary')

    dataset_pred = transformed.copy()
    dataset_pred.labels = y_pred

    cm = BinaryLabelDatasetMetric(dataset_pred, privileged_groups=p, unprivileged_groups=u)
    DIs.append(cm.disparate_impact())

  0%|                                                                                           | 0/11 [00:00<?, ?it/s]

In [None]:
# bias evaluation
%matplotlib notebook

plt.plot(np.linspace(0, 1, 11), DIs, marker='o')
plt.plot([0, 1], [1, 1], 'g')
plt.plot([0, 1], [0.8, 0.8], 'r')
plt.ylim([0.4, 1.2])
plt.ylabel('Disparate Impact (DI)')
plt.xlabel('repair level')
plt.show()

### Method 2. Learning Fair Representations

Finds the latent representation which encodes the data well but obfuscated information about protected attributes

### Method 3. Optimized Preprocessing

Learns a probabilistic transformation that edits the features and labels in the data with group fairness, individual distortion, and dad fidelity constraints and objectives

### Method 4. Reweighing 

Weights the examples in each (group, label) combination differently to ensure fairness before classification

In [None]:
RW = Reweighing(unprivileged_groups=unprivileged_groups,
               privileged_groups=privileged_groups)
RW.fit(dataset_orig_train)
dataset_transf_train = RW.transform(dataset1b)