In [1]:
import pandas as pd
import numpy as np
import time
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression

In [2]:
from fairlearn.reductions import ExponentiatedGradient, EqualizedOdds, TruePositiveRateParity, ErrorRateParity

In [3]:
#Helper functions for computing fairness/accuracy
def compute_TPR_GAP(preds, Y, group):
    res = {}
    res['TPR'] = sum(preds[Y])/len(preds[Y])
    res['TPR_1'] = sum(preds[Y & group])/len(preds[Y & group])
    res['TPR_2'] = sum(preds[Y & ~group])/len(preds[Y & ~group])
    res['TPR_GAP'] = abs(res['TPR_1'] - res['TPR_2'])
    return res

def compute_TNR_GAP(preds, Y, group):
    res = {}
    res['TNR'] = sum(~preds[~Y])/len(preds[~Y])
    res['TNR_1'] = sum(~preds[~Y & group])/len(preds[~Y & group])
    res['TNR_2'] = sum(~preds[~Y & ~group])/len(preds[~Y & ~group])
    res['TNR_GAP'] = abs(res['TNR_1'] - res['TNR_2'])
    return res

def compute_ACC_GAP(preds, Y, group):
    res = {}
    res['ACC'] = sum(preds == Y)/len(Y)
    res['ACC_1'] = sum(preds[group] == Y[group])/len(Y[group])
    res['ACC_2'] = sum(preds[~group] == Y[~group])/len(Y[~group])
    res['ACC_GAP'] = abs(res['ACC_1'] - res['ACC_2'])
    return res
    
def compute_EqOpp(preds, Y, group):
    return compute_TPR_GAP(preds, Y, group)

def compute_EqOd(preds, Y, group):
    return compute_TPR_GAP(preds, Y, group).update(compute_TNR_GAP(preds, Y, group) )

def compute_AccDisp(preds, Y, group):
    return compute_ACC_GAP(preds, Y, group)

def compute_fairness(predss,Y,group):
    res = compute_TPR_GAP(preds, Y, group)
    res.update(compute_TNR_GAP(preds, Y, group))
    res.update(compute_ACC_GAP(preds,Y,group))
    return res


In [4]:
#Equalized Odds
results = []
datasets = ['default','adult','compas']
protected_features = {'compas': 'race', 
                      'adult': 'gender',
                      'default': 'X2'
                     }
fairness_metrics = ['EqualizedOdds', 'EqOp','AccDisp']

for dataset in datasets:
    print('**** DATASET %s ******'%dataset)
    group_var = protected_features[dataset]
    for i in range(1):
        print('***** FOLD %d ******'%i)
        train  = pd.read_csv('split_data/'+dataset+'_train_%d.csv'%i).assign(train = True)
        test = pd.read_csv('split_data/'+dataset+'_test_%d.csv'%i).assign(train = False)
        
        full = pd.get_dummies(train.append(test))
        
        train = full.query('train').drop('train',axis=1)
        test = full.query('train == False').drop('train',axis=1)

        for fairMet in fairness_metrics:
            for eps in np.linspace(0,0.4,20):
                for hp in [0.01,0.1,1,10]:
                    print('Epsilon: %f'%eps)
                    
                    if fairMet == 'EqualizedOdds':
                        constraint = EqualizedOdds(difference_bound = eps)
                    elif fairMet == 'EqOp':
                        constraint = TruePositiveRateParity(difference_bound = eps)
                    elif fairMet == 'AccDisp':
                        constraint = ErrorRateParity(difference_bound = eps)
                    
                    classifier = LogisticRegression(max_iter=1e4, C=hp)
                    mitigator = ExponentiatedGradient(classifier, constraint)

                    start_time = time.time()
                    mitigator.fit(train.drop('Y',axis=1), train['Y'], sensitive_features=train[group_var])
                    end_time = time.time() - start_time

                    preds = mitigator.predict(test.drop('Y',axis=1)).astype(np.bool)
                    y = test['Y'].to_numpy()
                    group = test[group_var].to_numpy()

                    res = compute_fairness(preds, y, group)
                    res['epsilon'] = eps
                    res['fold'] = i
                    res['fairnessCriteria'] = fairMet
                    res['algo'] = 'exp_gradient_log_reg'
                    res['hp'] = hp
                    res['data_set'] = dataset
                    res['train_time'] = end_time
                    results.append(res)
pd.DataFrame.from_records(results).to_csv('expgradient_logreg_results.csv', index=False)

**** DATASET default ******
***** FOLD 0 ******
Epsilon: 0.000000


KeyboardInterrupt: 

In [None]:
#Equalized Odds
results = []
datasets = ['default','adult','compas']
protected_features = {'compas': 'race', 
                      'adult': 'gender',
                      'default': 'X2'
                     }
fairness_metrics = ['EqualizedOdds', 'EqOp','AccDisp']

for dataset in datasets:
    print('**** DATASET %s ******'%dataset)
    group_var = protected_features[dataset]
    for i in range(1):
        print('***** FOLD %d ******'%i)
        train  = pd.read_csv('split_data/'+dataset+'_train_%d.csv'%i).assign(train = True)
        test = pd.read_csv('split_data/'+dataset+'_test_%d.csv'%i).assign(train = False)
        
        full = pd.get_dummies(train.append(test))
        
        train = full.query('train').drop('train',axis=1)
        test = full.query('train == False').drop('train',axis=1)

        for fairMet in fairness_metrics:
            for eps in np.linspace(0,0.4,20):
                for hp in np.linspace(1, 21, 11):
                    print('Epsilon: %f'%eps)
                    
                    if fairMet == 'EqualizedOdds':
                        constraint = EqualizedOdds(difference_bound = eps)
                    elif fairMet == 'EqOp':
                        constraint = TruePositiveRateParity(difference_bound = eps)
                    elif fairMet == 'AccDisp':
                        constraint = ErrorRateParity(difference_bound = eps)
                    
                    classifier = DecisionTreeClassifier(max_depth=hp)
                    mitigator = ExponentiatedGradient(classifier, constraint)

                    start_time = time.time()
                    mitigator.fit(train.drop('Y',axis=1), train['Y'], sensitive_features=train[group_var])
                    end_time = time.time() - start_time

                    preds = mitigator.predict(test.drop('Y',axis=1)).astype(np.bool)
                    y = test['Y'].to_numpy()
                    group = test[group_var].to_numpy()

                    res = compute_fairness(preds, y, group)
                    res['epsilon'] = eps
                    res['fold'] = i
                    res['fairnessCriteria'] = fairMet
                    res['algo'] = 'exp_gradient_decision_tree'
                    res['hp'] = hp
                    res['data_set'] = dataset
                    res['train_time'] = end_time
                    results.append(res)
pd.DataFrame.from_records(results).to_csv('expgradient_dt_results.csv', index=False)