In [1]:
import numpy as np
import pandas as pd
import os
import sys

from measures.fairness_measures import *
from data.process_data import WrapData

In [2]:
def print_score_df(score_df):
    nrow = score_df.shape[0]
    acc = score_df['acc'].to_numpy() * 100.0
    proc_acc = score_df['proc_acc'].to_numpy() * 100.0
    dp = score_df['dp'].to_numpy() * 100.0
    eo = score_df['eo'].to_numpy() * 100.0
    eos_neg = score_df['eos_neg'].to_numpy() * 100.0
    method = score_df['method'].to_numpy()
    
    for i in range(nrow):
        print('{:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {}'.format(acc[i], proc_acc[i], dp[i], eo[i], eos_neg[i], method[i]))

## Dataset

In [3]:
wrap = WrapData('./data')

In [4]:
X_train_adult, y_train_adult, X_test_adult, y_test_adult, protected_train_adult, protected_test_adult = wrap.wrap_data('adult')
X_train_compas, y_train_compas, X_test_compas, y_test_compas, protected_train_compas, protected_test_compas = wrap.wrap_data('compas')

In [5]:
X_train_adult.shape

(32561, 122)

In [6]:
X_test_adult.shape

(16281, 122)

In [7]:
X_train_compas.shape

(3483, 27)

In [8]:
X_test_compas.shape

(1795, 27)

In [9]:
proc_indicator_adult_train = protected_train_adult[0]
proc_indicator_adult = protected_test_adult[0]
proc_indicator_compas_train = protected_train_compas[0]
proc_indicator_compas = protected_test_compas[0]

In [10]:
proc_indicator_adult[0:10]

array([False, False, False, False,  True, False, False, False,  True,
       False])

In [11]:
proc_indicator_compas[0:10]

array([ True,  True, False,  True,  True, False,  True, False,  True,
       False])

In [12]:
indicator_dicn = {'Adult': proc_indicator_adult,
                  'COMPAS': proc_indicator_compas}
label_dicn = {'Adult': y_test_adult,
              'COMPAS': y_test_compas}

In [13]:
# adult
[demographic_parity(y_train_adult, None, proc_indicator_adult_train), demographic_parity(y_test_adult, None, proc_indicator_adult)]

[0.13134896882844013, 0.12739026791068742]

In [14]:
# compas
[demographic_parity(y_train_compas, None, proc_indicator_compas_train), demographic_parity(y_test_compas, None, proc_indicator_compas)]

[0.054274682437339794, 0.049765301686998664]

## Thresholding

In [15]:
all_score_df = pd.DataFrame()

In [16]:
def get_thresholding_df(data_folder, data_name, model_name):
    file_name = data_name+'_thresholding_'+model_name+'.csv'
    print('Processing {} =====>'.format(file_name))
    file_name = os.path.join(data_folder, file_name)
    
    res_df = pd.read_csv(file_name)
    
    return res_df

def get_thresholding_score(res_df, method_name, y_true, indicator):
    # print('scores for column {}:'.format(method_name))
    y_pred = res_df[method_name].to_numpy()
    all_scores = get_all_scores(y_pred, y_true, indicator)
    
    # print(all_scores)
    return all_scores

def process_thresholding_measures(data_folder, data_name, model_name,
                                  y_true, indicator):
    score_dicn = {'acc': [],
                  'proc_acc': [],
                  'dp': [],
                  'eo': [],
                  'eos_neg': [],
                 'method': []}
    
    res_df = get_thresholding_df(data_folder, data_name, model_name)
    
    method_lst = ['y_pred', 'y_pred_even', 'y_pred_protected', 'y_pred_unprotected']
    for method_name in method_lst:
        acc, proc_acc, dp, eo, eos_neg = get_thresholding_score(res_df, method_name, y_true, indicator)
        score_dicn['acc'].append(acc)
        score_dicn['proc_acc'].append(proc_acc)
        score_dicn['dp'].append(dp)
        score_dicn['eo'].append(eo)
        score_dicn['eos_neg'].append(eos_neg)
        score_dicn['method'].append(method_name)
        
    return pd.DataFrame(score_dicn)
        

    
data_folder = './thresholding/results'
data_lst = ['Adult', 'COMPAS']
model_lst = ['LogReg', 'MLP', 'SVM']


curr_score_df = pd.DataFrame()
for data_name in data_lst:
    for model_name in model_lst:
        score_dicn = process_thresholding_measures(data_folder, data_name, model_name,
                                  label_dicn[data_name], indicator_dicn[data_name])
        print(score_dicn.columns.to_numpy())
        print_score_df(score_dicn)
        
        # save to the current result frame
        score_dicn['data'] = data_name.lower()
        score_dicn['model'] = model_name.lower()
        curr_score_df = pd.concat([curr_score_df, score_dicn], ignore_index=True)
        
curr_score_df['fair_method'] = 'thresholding'
all_score_df = pd.concat([all_score_df, curr_score_df], ignore_index=True)

Processing Adult_thresholding_LogReg.csv =====>
['acc' 'proc_acc' 'dp' 'eo' 'eos_neg' 'method']
85.84 & 92.73 & 11.72 & 8.57 & 4.25 & y_pred
85.02 & 91.03 & 5.14 & -11.73 & -0.01 & y_pred_even
84.59 & 88.99 & 5.39 & -13.81 & -0.29 & y_pred_protected
84.54 & 92.73 & 4.92 & -8.56 & 0.65 & y_pred_unprotected
Processing Adult_thresholding_MLP.csv =====>
['acc' 'proc_acc' 'dp' 'eo' 'eos_neg' 'method']
82.78 & 90.19 & 11.85 & 5.66 & 5.23 & y_pred
82.18 & 88.01 & 5.07 & -13.14 & 0.46 & y_pred_even
80.79 & 84.21 & 5.09 & -10.55 & -0.30 & y_pred_protected
82.30 & 90.19 & 5.00 & -9.86 & 1.06 & y_pred_unprotected
Processing Adult_thresholding_SVM.csv =====>
['acc' 'proc_acc' 'dp' 'eo' 'eos_neg' 'method']
75.16 & 85.10 & 15.27 & 10.83 & 9.95 & y_pred
74.93 & 81.61 & 8.69 & -0.86 & 4.34 & y_pred_even
72.84 & 78.14 & 8.25 & -3.09 & 3.95 & y_pred_protected
76.80 & 85.10 & 8.74 & 0.48 & 4.61 & y_pred_unprotected
Processing COMPAS_thresholding_LogReg.csv =====>
['acc' 'proc_acc' 'dp' 'eo' 'eos_neg' 'me

## Label Debias

In [17]:
def get_thresholding_df(data_folder, data_name, model_name, measure):
    file_name = model_name+'_'+ measure+'_'+data_name+'_test.csv'
    print('Processing {} =====>'.format(file_name))
    file_name = os.path.join(data_folder, model_name, file_name)
    
    res_df = pd.read_csv(file_name)
    
    return res_df

def get_thresholding_score(res_df):
    y_pred = res_df['y_pred'].to_numpy()
    y_true = res_df['label'].to_numpy()
    indicator = res_df['protected'].to_numpy()
    
    all_scores = get_all_scores(y_pred, y_true, indicator)
    
    # print(all_scores)
    return all_scores

def process_thresholding_measures(data_folder, data_name, model_name):
    score_dicn = {'acc': [],
                  'proc_acc': [],
                  'dp': [],
                  'eo': [],
                  'eos_neg': [],
                 'method': []}
    method_lst = ['DP', 'EO', 'EOs']
    
    for method_name in method_lst:
        res_df = get_thresholding_df(data_folder, data_name, model_name, method_name)
        acc, proc_acc, dp, eo, eos_neg = get_thresholding_score(res_df)
        score_dicn['acc'].append(acc)
        score_dicn['proc_acc'].append(proc_acc)
        score_dicn['dp'].append(dp)
        score_dicn['eo'].append(eo)
        score_dicn['eos_neg'].append(eos_neg)
        score_dicn['method'].append(method_name)
        
    return pd.DataFrame(score_dicn)

data_folder = './label_debias/results'
data_lst = ['Adult', 'COMPAS']
model_lst = ['LogReg', 'MLP', 'SVM']

curr_score_df = pd.DataFrame()
for data_name in data_lst:
    for model_name in model_lst:
        score_dicn = process_thresholding_measures(data_folder, data_name, model_name)
        print(score_dicn.columns.to_numpy())
        print_score_df(score_dicn)
        
        # save to the current result frame
        score_dicn['data'] = data_name.lower()
        score_dicn['model'] = model_name.lower()
        curr_score_df = pd.concat([curr_score_df, score_dicn], ignore_index=True)
        
curr_score_df['fair_method'] = 'labelDebias'
all_score_df = pd.concat([all_score_df, curr_score_df], ignore_index=True)

Processing LogReg_DP_Adult_test.csv =====>
Processing LogReg_EO_Adult_test.csv =====>
Processing LogReg_EOs_Adult_test.csv =====>
['acc' 'proc_acc' 'dp' 'eo' 'eos_neg' 'method']
84.45 & 90.78 & 2.35 & -19.83 & -1.52 & DP
85.55 & 92.34 & 9.37 & -0.40 & 2.87 & EO
85.65 & 92.44 & 8.20 & -3.60 & 2.12 & EOs
Processing MLP_DP_Adult_test.csv =====>
Processing MLP_EO_Adult_test.csv =====>
Processing MLP_EOs_Adult_test.csv =====>
['acc' 'proc_acc' 'dp' 'eo' 'eos_neg' 'method']
83.24 & 90.41 & 11.08 & 6.65 & 4.44 & DP
82.80 & 90.09 & 12.64 & 9.76 & 5.61 & EO
83.33 & 90.32 & 9.97 & 4.49 & 3.62 & EOs
Processing SVM_DP_Adult_test.csv =====>
Processing SVM_EO_Adult_test.csv =====>
Processing SVM_EOs_Adult_test.csv =====>
['acc' 'proc_acc' 'dp' 'eo' 'eos_neg' 'method']
68.72 & 78.01 & 10.70 & 5.55 & 7.75 & DP
54.86 & 42.63 & -16.14 & -5.98 & -17.28 & EO
77.46 & 86.90 & 14.62 & 17.99 & 8.58 & EOs
Processing LogReg_DP_COMPAS_test.csv =====>
Processing LogReg_EO_COMPAS_test.csv =====>
Processing LogReg_

## Laftr Repr

In [18]:
def get_thresholding_df(data_folder, data_name, model_name):
    file_name = data_name+'_transfer_'+model_name+'_test.csv'
    print('Processing {} =====>'.format(file_name))
    file_name = os.path.join(data_folder, data_name.lower(), 'transfer',file_name)
    
    res_df = pd.read_csv(file_name)
    
    return res_df

def get_thresholding_score(res_df):
    y_pred = res_df['y_pred'].to_numpy()
    y_true = res_df['label'].to_numpy()
    indicator = res_df['protected'].to_numpy()
    
    all_scores = get_all_scores(y_pred, y_true, indicator)
    
    # print(all_scores)
    return all_scores

def process_thresholding_measures(data_folder, data_name, model_name):
    score_dicn = {'acc': [],
                  'proc_acc': [],
                  'dp': [],
                  'eo': [],
                  'eos_neg': [],
                 'method': []}
    method_lst = ['transfer']
    
    for method_name in method_lst:
        res_df = get_thresholding_df(data_folder, data_name, model_name)
        acc, proc_acc, dp, eo, eos_neg = get_thresholding_score(res_df)
        score_dicn['acc'].append(acc)
        score_dicn['proc_acc'].append(proc_acc)
        score_dicn['dp'].append(dp)
        score_dicn['eo'].append(eo)
        score_dicn['eos_neg'].append(eos_neg)
        score_dicn['method'].append(method_name)
        
    return pd.DataFrame(score_dicn)

data_folder = './laftr/experiments/laftr_new'
data_lst = ['Adult', 'COMPAS']
model_lst = ['logreg', 'MLP', 'SVM']

curr_score_df = pd.DataFrame()
for data_name in data_lst:
    for model_name in model_lst:
        score_dicn = process_thresholding_measures(data_folder, data_name, model_name)
        print(score_dicn.columns.to_numpy())
        print_score_df(score_dicn)
        
        # save to the current result frame
        score_dicn['data'] = data_name.lower()
        score_dicn['model'] = model_name.lower()
        curr_score_df = pd.concat([curr_score_df, score_dicn], ignore_index=True)
        
curr_score_df['fair_method'] = 'laftr'
all_score_df = pd.concat([all_score_df, curr_score_df], ignore_index=True)

Processing Adult_transfer_logreg_test.csv =====>
['acc' 'proc_acc' 'dp' 'eo' 'eos_neg' 'method']
82.66 & 90.28 & 3.09 & -15.40 & -0.31 & transfer
Processing Adult_transfer_MLP_test.csv =====>
['acc' 'proc_acc' 'dp' 'eo' 'eos_neg' 'method']
84.56 & 90.97 & 9.52 & -0.07 & 3.00 & transfer
Processing Adult_transfer_SVM_test.csv =====>
['acc' 'proc_acc' 'dp' 'eo' 'eos_neg' 'method']
42.66 & 42.18 & 9.94 & 12.57 & 8.02 & transfer
Processing COMPAS_transfer_logreg_test.csv =====>
['acc' 'proc_acc' 'dp' 'eo' 'eos_neg' 'method']
67.63 & 66.42 & 4.82 & 3.57 & 2.75 & transfer
Processing COMPAS_transfer_MLP_test.csv =====>
['acc' 'proc_acc' 'dp' 'eo' 'eos_neg' 'method']
65.68 & 65.22 & 6.42 & 4.46 & 5.48 & transfer
Processing COMPAS_transfer_SVM_test.csv =====>
['acc' 'proc_acc' 'dp' 'eo' 'eos_neg' 'method']
68.02 & 67.53 & 4.89 & 2.59 & 3.87 & transfer


## Fair Constraints

In [19]:
def get_thresholding_df(data_folder, data_name, model_name):
    file_name = 'constraints_'+model_name+'_'+data_name+'.csv'
    print('Processing {} =====>'.format(file_name))
    file_name = os.path.join(data_folder, file_name)
    
    res_df = pd.read_csv(file_name)
    return res_df

def get_thresholding_score(res_df, method_name):
    # print('scores for column {}:'.format(method_name))
    y_pred = res_df[method_name].to_numpy()
    y_true = res_df['label'].to_numpy()
    indicator = res_df['protected'].to_numpy()
    
    all_scores = get_all_scores(y_pred, y_true, indicator)
    
    # print(all_scores)
    return all_scores

def process_thresholding_measures(data_folder, data_name, model_name):
    score_dicn = {'acc': [],
                  'proc_acc': [],
                  'dp': [],
                  'eo': [],
                  'eos_neg': [],
                 'method': []}
    res_df = get_thresholding_df(data_folder, data_name, model_name)
    
    method_lst = ['unconstrained_pred', 'pure_fair_pred', 'part_fair_pred', 'fine_part_fair_pred']
    for method_name in method_lst:
        acc, proc_acc, dp, eo, eos_neg = get_thresholding_score(res_df, method_name)
        score_dicn['acc'].append(acc)
        score_dicn['proc_acc'].append(proc_acc)
        score_dicn['dp'].append(dp)
        score_dicn['eo'].append(eo)
        score_dicn['eos_neg'].append(eos_neg)
        score_dicn['method'].append(method_name)
        
    return pd.DataFrame(score_dicn)

data_folder = './fair_constraints/results'
data_lst = ['Adult', 'COMPAS']
model_lst = ['LogReg', 'SVM']

curr_score_df = pd.DataFrame()
for data_name in data_lst:
    for model_name in model_lst:
        score_dicn = process_thresholding_measures(data_folder, data_name, model_name)
        print(score_dicn.columns.to_numpy())
        print_score_df(score_dicn)

        # save to the current result frame
        score_dicn['data'] = data_name.lower()
        score_dicn['model'] = model_name.lower()
        curr_score_df = pd.concat([curr_score_df, score_dicn], ignore_index=True)
        
curr_score_df['fair_method'] = 'fairCons'
all_score_df = pd.concat([all_score_df, curr_score_df], ignore_index=True)

Processing constraints_LogReg_Adult.csv =====>
['acc' 'proc_acc' 'dp' 'eo' 'eos_neg' 'method']
85.80 & 92.75 & 11.04 & 6.10 & 3.87 & unconstrained_pred
85.80 & 92.75 & 11.04 & 6.10 & 3.87 & pure_fair_pred
80.92 & 91.09 & -2.74 & -37.76 & -2.43 & part_fair_pred
59.57 & 46.71 & -6.38 & -12.53 & -10.99 & fine_part_fair_pred
Processing constraints_SVM_Adult.csv =====>
['acc' 'proc_acc' 'dp' 'eo' 'eos_neg' 'method']
85.63 & 92.64 & 11.02 & 7.59 & 3.86 & unconstrained_pred
85.61 & 92.60 & 11.03 & 7.64 & 3.86 & pure_fair_pred
80.92 & 91.50 & -2.05 & -32.77 & -1.96 & part_fair_pred
85.64 & 92.64 & 10.93 & 7.41 & 3.79 & fine_part_fair_pred
Processing constraints_LogReg_COMPAS.csv =====>
['acc' 'proc_acc' 'dp' 'eo' 'eos_neg' 'method']
67.74 & 67.77 & 11.59 & 9.55 & 10.14 & unconstrained_pred
54.60 & 54.76 & -0.14 & -1.75 & 0.78 & pure_fair_pred
58.22 & 55.96 & 1.16 & -0.25 & 1.55 & part_fair_pred
54.93 & 49.95 & 0.00 & 0.00 & 0.00 & fine_part_fair_pred
Processing constraints_SVM_COMPAS.csv =====

## print

In [20]:
def print_score_df_selected(score_df):
    nrow = score_df.shape[0]
    
    acc = score_df['acc'].to_numpy() * 100.0
    proc_acc = score_df['proc_acc'].to_numpy() * 100.0
    dp = score_df['dp'].to_numpy() * 100.0
    eo = score_df['eo'].to_numpy() * 100.0
    eos_neg = score_df['eos_neg'].to_numpy() * 100.0
    fair_method = score_df['fair_method'].to_numpy()
    
    for i in range(nrow):
        print('{} & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f}'.format(fair_method[i], acc[i], proc_acc[i], dp[i], eo[i], eos_neg[i]))

In [51]:
def generate_latex_line(adult_score_df, compas_df, method_column):
    # for each model under the same method (fair method or sub method)
    nrow = adult_score_df.shape[0]
    
    acc = adult_score_df['acc'].to_numpy() * 100.0
    proc_acc = adult_score_df['proc_acc'].to_numpy() * 100.0
    dp = adult_score_df['dp'].to_numpy() * 100.0
    eo = adult_score_df['eo'].to_numpy() * 100.0
    eos_neg = adult_score_df['eos_neg'].to_numpy() * 100.0
    
    method = adult_score_df[method_column].to_numpy()
    
    acc_com = compas_df['acc'].to_numpy() * 100.0
    proc_acc_com = compas_df['proc_acc'].to_numpy() * 100.0
    dp_com = compas_df['dp'].to_numpy() * 100.0
    eo_com = compas_df['eo'].to_numpy() * 100.0
    eos_neg_com = compas_df['eos_neg'].to_numpy() * 100.0
    
    for i in range(nrow):
        adult_str = '{} & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} '.format(method[i], acc[i], proc_acc[i], dp[i], eo[i], eos_neg[i])
        compas_str = '& {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} '.format(acc_com[i], proc_acc_com[i], dp_com[i], eo_com[i], eos_neg_com[i])
        print(adult_str + compas_str)

In [21]:
all_score_df.to_csv('./all_fairness_score.csv', index=False)

In [22]:
# scores to use for each fairness-enhancing method
print_lst = ['fine_part_fair_pred', 'transfer', 'DP', 'y_pred_even']
all_score_df_print = all_score_df[all_score_df['method'].isin(print_lst)]

custom_order = ['laftr', 'labelDebias', 'fairCons', 'thresholding']
all_score_df_print['fair_method'] = pd.Categorical(all_score_df_print['fair_method'],
                                                   categories=custom_order, ordered=True)
all_score_df_print = all_score_df_print.sort_values('fair_method')

In [24]:
all_score_df_print.to_csv('./all_fairness_score_main.csv', index=False)

In [25]:
# for each model and then dataset, print the scores across each technique
for model in ['logreg', 'svm', 'mlp']:
    for dataset in ['adult', 'compas']:
        sub_df = all_score_df_print[(all_score_df_print['model']==model) & (all_score_df_print['data']==dataset)]
        print('scores for {} on set {}'.format(model, dataset))
        print_score_df_selected(sub_df)
        print()
        print()

scores for logreg on set adult
laftr & 82.66 & 90.28 & 3.09 & -15.40 & -0.31
labelDebias & 84.45 & 90.78 & 2.35 & -19.83 & -1.52
fairCons & 59.57 & 46.71 & -6.38 & -12.53 & -10.99
thresholding & 85.02 & 91.03 & 5.14 & -11.73 & -0.01


scores for logreg on set compas
laftr & 67.63 & 66.42 & 4.82 & 3.57 & 2.75
labelDebias & 67.80 & 68.33 & 1.48 & -1.02 & 0.48
fairCons & 54.93 & 49.95 & 0.00 & 0.00 & 0.00
thresholding & 67.80 & 67.77 & 6.74 & 4.61 & 5.41


scores for svm on set adult
laftr & 42.66 & 42.18 & 9.94 & 12.57 & 8.02
labelDebias & 68.72 & 78.01 & 10.70 & 5.55 & 7.75
fairCons & 85.64 & 92.64 & 10.93 & 7.41 & 3.79
thresholding & 74.93 & 81.61 & 8.69 & -0.86 & 4.34


scores for svm on set compas
laftr & 68.02 & 67.53 & 4.89 & 2.59 & 3.87
labelDebias & 66.57 & 67.22 & -0.25 & -2.87 & -0.86
fairCons & 65.52 & 64.17 & 10.09 & 8.96 & 8.34
thresholding & 66.24 & 65.19 & 5.30 & 3.68 & 3.92


scores for mlp on set adult
laftr & 84.56 & 90.97 & 9.52 & -0.07 & 3.00
labelDebias & 83.24 & 90.

## Experiments

TO DO:
* Wait for the SVM from fairConstr
* For each technique, choose just one method to display in the main text
* and put the other into appendix
* Put detailed hyperparameters into the appendix

### LabelDebias

In [31]:
# get different strategies from LabelDebias
labelDebias_score_df = all_score_df[all_score_df['fair_method']=='labelDebias']
labelDebias_score_df.pop('fair_method')

custom_order = ['DP', 'EO', 'EOs']
labelDebias_score_df['method'] = pd.Categorical(labelDebias_score_df['method'],
                                                categories=custom_order, ordered=True)

In [33]:
labelDebias_score_df.to_csv('./LabelDebias_fairness_score.csv', index=False)

In [34]:
# for each model and then dataset, print the scores across each technique
for model in ['logreg', 'svm', 'mlp']:
    for dataset in ['adult', 'compas']:
        sub_df = labelDebias_score_df[(labelDebias_score_df['model']==model) & (labelDebias_score_df['data']==dataset)]
        print('scores for {} on set {}'.format(model, dataset))
        print_score_df(sub_df)
        print()
        print()

scores for logreg on set adult
84.45 & 90.78 & 2.35 & -19.83 & -1.52 & DP
85.55 & 92.34 & 9.37 & -0.40 & 2.87 & EO
85.65 & 92.44 & 8.20 & -3.60 & 2.12 & EOs


scores for logreg on set compas
67.80 & 68.33 & 1.48 & -1.02 & 0.48 & DP
67.97 & 68.14 & 4.65 & 2.30 & 3.50 & EO
67.69 & 68.24 & 3.06 & 0.45 & 2.21 & EOs


scores for svm on set adult
68.72 & 78.01 & 10.70 & 5.55 & 7.75 & DP
54.86 & 42.63 & -16.14 & -5.98 & -17.28 & EO
77.46 & 86.90 & 14.62 & 17.99 & 8.58 & EOs


scores for svm on set compas
66.57 & 67.22 & -0.25 & -2.87 & -0.86 & DP
65.52 & 64.17 & 10.09 & 8.96 & 8.34 & EO
65.52 & 64.17 & 10.09 & 8.96 & 8.34 & EOs


scores for mlp on set adult
83.24 & 90.41 & 11.08 & 6.65 & 4.44 & DP
82.80 & 90.09 & 12.64 & 9.76 & 5.61 & EO
83.33 & 90.32 & 9.97 & 4.49 & 3.62 & EOs


scores for mlp on set compas
65.63 & 64.36 & 10.85 & 10.51 & 8.11 & DP
64.51 & 62.88 & 10.82 & 10.91 & 7.87 & EO
66.07 & 64.73 & 11.01 & 10.61 & 8.23 & EOs




In [52]:
# for each model and then dataset, print the scores across each technique
for model in ['logreg', 'svm', 'mlp']:
    print('Evaluation on model {}'.format(model))
    adult_subset = labelDebias_score_df[(labelDebias_score_df['model']==model) & (labelDebias_score_df['data']=='adult')]
    compas_subset = labelDebias_score_df[(labelDebias_score_df['model']==model) & (labelDebias_score_df['data']=='compas')]
    generate_latex_line(adult_subset, compas_subset, 'method')
    print()
    print()

Evaluation on model logreg
DP & 84.45 & 90.78 & 2.35 & -19.83 & -1.52 & 67.80 & 68.33 & 1.48 & -1.02 & 0.48 
EO & 85.55 & 92.34 & 9.37 & -0.40 & 2.87 & 67.97 & 68.14 & 4.65 & 2.30 & 3.50 
EOs & 85.65 & 92.44 & 8.20 & -3.60 & 2.12 & 67.69 & 68.24 & 3.06 & 0.45 & 2.21 


Evaluation on model svm
DP & 68.72 & 78.01 & 10.70 & 5.55 & 7.75 & 66.57 & 67.22 & -0.25 & -2.87 & -0.86 
EO & 54.86 & 42.63 & -16.14 & -5.98 & -17.28 & 65.52 & 64.17 & 10.09 & 8.96 & 8.34 
EOs & 77.46 & 86.90 & 14.62 & 17.99 & 8.58 & 65.52 & 64.17 & 10.09 & 8.96 & 8.34 


Evaluation on model mlp
DP & 83.24 & 90.41 & 11.08 & 6.65 & 4.44 & 65.63 & 64.36 & 10.85 & 10.51 & 8.11 
EO & 82.80 & 90.09 & 12.64 & 9.76 & 5.61 & 64.51 & 62.88 & 10.82 & 10.91 & 7.87 
EOs & 83.33 & 90.32 & 9.97 & 4.49 & 3.62 & 66.07 & 64.73 & 11.01 & 10.61 & 8.23 




### FairConstr

In [42]:
# get different strategies from LabelDebias
fairCons_score_df = all_score_df[all_score_df['fair_method']=='fairCons']
fairCons_score_df.pop('fair_method')

custom_order = ['unconstrained_pred', 'pure_fair_pred', 'part_fair_pred', 'fine_part_fair_pred']
fairCons_score_df['method'] = pd.Categorical(fairCons_score_df['method'],
                                            categories=custom_order, ordered=True)

In [43]:
fairCons_score_df.to_csv('./FairConstr_fairness_score.csv', index=False)

In [44]:
# for each model and then dataset, print the scores across each technique
for model in ['logreg', 'svm']:
    for dataset in ['adult', 'compas']:
        sub_df = fairCons_score_df[(fairCons_score_df['model']==model) & (fairCons_score_df['data']==dataset)]
        print('scores for {} on set {}'.format(model, dataset))
        print_score_df(sub_df)
        print()
        print()

scores for logreg on set adult
85.80 & 92.75 & 11.04 & 6.10 & 3.87 & unconstrained_pred
85.80 & 92.75 & 11.04 & 6.10 & 3.87 & pure_fair_pred
80.92 & 91.09 & -2.74 & -37.76 & -2.43 & part_fair_pred
59.57 & 46.71 & -6.38 & -12.53 & -10.99 & fine_part_fair_pred


scores for logreg on set compas
67.74 & 67.77 & 11.59 & 9.55 & 10.14 & unconstrained_pred
54.60 & 54.76 & -0.14 & -1.75 & 0.78 & pure_fair_pred
58.22 & 55.96 & 1.16 & -0.25 & 1.55 & part_fair_pred
54.93 & 49.95 & 0.00 & 0.00 & 0.00 & fine_part_fair_pred


scores for svm on set adult
85.63 & 92.64 & 11.02 & 7.59 & 3.86 & unconstrained_pred
85.61 & 92.60 & 11.03 & 7.64 & 3.86 & pure_fair_pred
80.92 & 91.50 & -2.05 & -32.77 & -1.96 & part_fair_pred
85.64 & 92.64 & 10.93 & 7.41 & 3.79 & fine_part_fair_pred


scores for svm on set compas
65.52 & 64.17 & 10.09 & 8.96 & 8.34 & unconstrained_pred
55.04 & 54.39 & -0.04 & -1.37 & 0.62 & pure_fair_pred
55.60 & 51.25 & 2.33 & 4.08 & -0.09 & part_fair_pred
65.52 & 64.17 & 10.09 & 8.96 & 8.34 

In [53]:
# for each model and then dataset, print the scores across each technique
for model in ['logreg', 'svm', 'mlp']:
    print('Evaluation on model {}'.format(model))
    adult_subset = fairCons_score_df[(fairCons_score_df['model']==model) & (fairCons_score_df['data']=='adult')]
    compas_subset = fairCons_score_df[(fairCons_score_df['model']==model) & (fairCons_score_df['data']=='compas')]
    generate_latex_line(adult_subset, compas_subset, 'method')
    print()
    print()

Evaluation on model logreg
unconstrained_pred & 85.80 & 92.75 & 11.04 & 6.10 & 3.87 & 67.74 & 67.77 & 11.59 & 9.55 & 10.14 
pure_fair_pred & 85.80 & 92.75 & 11.04 & 6.10 & 3.87 & 54.60 & 54.76 & -0.14 & -1.75 & 0.78 
part_fair_pred & 80.92 & 91.09 & -2.74 & -37.76 & -2.43 & 58.22 & 55.96 & 1.16 & -0.25 & 1.55 
fine_part_fair_pred & 59.57 & 46.71 & -6.38 & -12.53 & -10.99 & 54.93 & 49.95 & 0.00 & 0.00 & 0.00 


Evaluation on model svm
unconstrained_pred & 85.63 & 92.64 & 11.02 & 7.59 & 3.86 & 65.52 & 64.17 & 10.09 & 8.96 & 8.34 
pure_fair_pred & 85.61 & 92.60 & 11.03 & 7.64 & 3.86 & 55.04 & 54.39 & -0.04 & -1.37 & 0.62 
part_fair_pred & 80.92 & 91.50 & -2.05 & -32.77 & -1.96 & 55.60 & 51.25 & 2.33 & 4.08 & -0.09 
fine_part_fair_pred & 85.64 & 92.64 & 10.93 & 7.41 & 3.79 & 65.52 & 64.17 & 10.09 & 8.96 & 8.34 


Evaluation on model mlp




In [45]:
# get different strategies from LabelDebias
thresh_score_df = all_score_df[all_score_df['fair_method']=='thresholding']
thresh_score_df.pop('fair_method')

custom_order = ['y_pred', 'y_pred_even', 'y_pred_protected', 'y_pred_unprotected']
thresh_score_df['method'] = pd.Categorical(thresh_score_df['method'],
                                            categories=custom_order, ordered=True)

In [46]:
thresh_score_df.to_csv('./Threshod_fairness_score.csv', index=False)

In [47]:
# for each model and then dataset, print the scores across each technique
for model in ['logreg', 'svm', 'mlp']:
    for dataset in ['adult', 'compas']:
        sub_df = thresh_score_df[(thresh_score_df['model']==model) & (thresh_score_df['data']==dataset)]
        print('scores for {} on set {}'.format(model, dataset))
        print_score_df(sub_df)
        print()
        print()

scores for logreg on set adult
85.84 & 92.73 & 11.72 & 8.57 & 4.25 & y_pred
85.02 & 91.03 & 5.14 & -11.73 & -0.01 & y_pred_even
84.59 & 88.99 & 5.39 & -13.81 & -0.29 & y_pred_protected
84.54 & 92.73 & 4.92 & -8.56 & 0.65 & y_pred_unprotected


scores for logreg on set compas
67.69 & 67.50 & 11.36 & 9.55 & 9.71 & y_pred
67.80 & 67.77 & 6.74 & 4.61 & 5.41 & y_pred_even
68.30 & 68.61 & 7.43 & 4.53 & 6.84 & y_pred_protected
67.69 & 67.50 & 7.69 & 6.21 & 5.63 & y_pred_unprotected


scores for svm on set adult
75.16 & 85.10 & 15.27 & 10.83 & 9.95 & y_pred
74.93 & 81.61 & 8.69 & -0.86 & 4.34 & y_pred_even
72.84 & 78.14 & 8.25 & -3.09 & 3.95 & y_pred_protected
76.80 & 85.10 & 8.74 & 0.48 & 4.61 & y_pred_unprotected


scores for svm on set compas
65.52 & 64.17 & 10.09 & 8.96 & 8.34 & y_pred
66.24 & 65.19 & 5.30 & 3.68 & 3.92 & y_pred_even
66.24 & 65.37 & 5.59 & 3.29 & 5.00 & y_pred_protected
65.57 & 64.17 & 6.03 & 5.31 & 3.77 & y_pred_unprotected


scores for mlp on set adult
82.78 & 90.19 & 11

In [54]:
# for each model and then dataset, print the scores across each technique
for model in ['logreg', 'svm', 'mlp']:
    print('Evaluation on model {}'.format(model))
    adult_subset = thresh_score_df[(thresh_score_df['model']==model) & (thresh_score_df['data']=='adult')]
    compas_subset = thresh_score_df[(thresh_score_df['model']==model) & (thresh_score_df['data']=='compas')]
    generate_latex_line(adult_subset, compas_subset, 'method')
    print()
    print()

Evaluation on model logreg
y_pred & 85.84 & 92.73 & 11.72 & 8.57 & 4.25 & 67.69 & 67.50 & 11.36 & 9.55 & 9.71 
y_pred_even & 85.02 & 91.03 & 5.14 & -11.73 & -0.01 & 67.80 & 67.77 & 6.74 & 4.61 & 5.41 
y_pred_protected & 84.59 & 88.99 & 5.39 & -13.81 & -0.29 & 68.30 & 68.61 & 7.43 & 4.53 & 6.84 
y_pred_unprotected & 84.54 & 92.73 & 4.92 & -8.56 & 0.65 & 67.69 & 67.50 & 7.69 & 6.21 & 5.63 


Evaluation on model svm
y_pred & 75.16 & 85.10 & 15.27 & 10.83 & 9.95 & 65.52 & 64.17 & 10.09 & 8.96 & 8.34 
y_pred_even & 74.93 & 81.61 & 8.69 & -0.86 & 4.34 & 66.24 & 65.19 & 5.30 & 3.68 & 3.92 
y_pred_protected & 72.84 & 78.14 & 8.25 & -3.09 & 3.95 & 66.24 & 65.37 & 5.59 & 3.29 & 5.00 
y_pred_unprotected & 76.80 & 85.10 & 8.74 & 0.48 & 4.61 & 65.57 & 64.17 & 6.03 & 5.31 & 3.77 


Evaluation on model mlp
y_pred & 82.78 & 90.19 & 11.85 & 5.66 & 5.23 & 64.18 & 62.60 & 9.21 & 9.29 & 6.33 
y_pred_even & 82.18 & 88.01 & 5.07 & -13.14 & 0.46 & 64.01 & 63.80 & 5.08 & 3.76 & 3.63 
y_pred_protected & 80.79 