In [None]:
import pandas as pd
import numpy as np
import time

In [None]:
import os,sys
sys.path.insert(0, '../fair-classification/fair_classification/') # the code for fair classification is in this directory
import utils as ut
import funcs_disp_mist as fdm

In [None]:
#Helper functions for computing fairness/accuracy
def compute_TPR_GAP(preds, Y, group):
    res = {}
    res['TPR'] = sum(preds[Y])/len(preds[Y])
    res['TPR_1'] = sum(preds[Y & group])/len(preds[Y & group])
    res['TPR_2'] = sum(preds[Y & ~group])/len(preds[Y & ~group])
    res['TPR_GAP'] = abs(res['TPR_1'] - res['TPR_2'])
    return res

def compute_TNR_GAP(preds, Y, group):
    res = {}
    res['TNR'] = sum(~preds[~Y])/len(preds[~Y])
    res['TNR_1'] = sum(~preds[~Y & group])/len(preds[~Y & group])
    res['TNR_2'] = sum(~preds[~Y & ~group])/len(preds[~Y & ~group])
    res['TNR_GAP'] = abs(res['TNR_1'] - res['TNR_2'])
    return res

def compute_ACC_GAP(preds, Y, group):
    res = {}
    res['ACC'] = sum(preds == Y)/len(Y)
    res['ACC_1'] = sum(preds[group] == Y[group])/len(Y[group])
    res['ACC_2'] = sum(preds[~group] == Y[~group])/len(Y[~group])
    res['ACC_GAP'] = abs(res['ACC_1'] - res['ACC_2'])
    return res
    
def compute_EqOpp(preds, Y, group):
    return compute_TPR_GAP(preds, Y, group)

def compute_EqOd(preds, Y, group):
    return compute_TPR_GAP(preds, Y, group).update(compute_TNR_GAP(preds, Y, group) )

def compute_AccDisp(preds, Y, group):
    return compute_ACC_GAP(preds, Y, group)

def compute_fairness(predss,Y,group):
    res = compute_TPR_GAP(preds, Y, group)
    res.update(compute_TNR_GAP(preds, Y, group))
    res.update(compute_ACC_GAP(preds,Y,group))
    return res


In [None]:
#Equalized Odds
results = []
datasets = ['adult','default','compas']
protected_features = {'compas': 'race', 
                      'adult': 'gender',
                      'default': 'X2'
                     }

tau_dict = {'compas': 5,
       'adult': 20,
       'default': 0.05
      }

fairness_metrics = ['EqualizedOdds', 'EqOp']

for dataset in datasets:
    print('**** DATASET %s ******'%dataset)
    group_var = protected_features[dataset]
    for i in range(10):
        print('***** FOLD %d ******'%i)
        train  = pd.read_csv('split_data/'+dataset+'_train_%d.csv'%i).assign(train = True)
        test = pd.read_csv('split_data/'+dataset+'_test_%d.csv'%i).assign(train = False)
        
        full = pd.get_dummies(train.append(test))
        
        train = full.query('train').drop('train',axis=1)
        test = full.query('train == False').drop('train',axis=1)

        for fairMet in fairness_metrics:
            for eps in np.concatenate([np.linspace(0,1,10),[1]]):
                for hp in [1]:
                    print('Tau: %f'%hp)
                    
                    if fairMet == 'EqualizedOdds':
                        constraint = 4
                    elif fairMet == 'EqOp':
                        constraint = 2
                                        
                    
                    cons_type = constraint 
                    tau = tau_dict[dataset]
                    mu = 1.2
                    sensitive_attrs_to_cov_thresh = {group_var: {0:{0:0, 1:eps}, 1:{0:0, 1:eps}, 2:{0:0, 1:eps}}} # zero covariance threshold, means try to get the fairest solution
                    cons_params = {"cons_type": cons_type, 
                                   "tau": tau, 
                                   "mu": mu, 
                                   "sensitive_attrs_to_cov_thresh": sensitive_attrs_to_cov_thresh}
                    
                    try:
                        start_time = time.time()
                        w= fdm.train_model_disp_mist(train.drop('Y',axis=1).to_numpy(), 
                          train['Y'].to_numpy()*2 - 1, 
                          {group_var: train[group_var].astype(np.int).to_numpy()}, 
                          "logreg", 1e-6, cons_params=cons_params)

                        end_time = time.time() - start_time

                        preds = (np.sign(fdm.get_distance_boundary(w, 
                                                                  test.drop('Y',axis=1).to_numpy(), 
                                                                  test[group_var].astype(np.int).to_numpy())))
                        preds = (preds/2+0.5).astype(np.bool)

                        y = test['Y'].to_numpy()
                        group = test[group_var].to_numpy()

                        res = compute_fairness(preds, y, group)
                        res['fold'] = i
                        res['fairnessCriteria'] = fairMet
                        res['algo'] = 'zafar17'
                        res['eps'] = eps
                        res['data_set'] = dataset
                        res['train_time'] = end_time
                        results.append(res)
                    except:
                        print('failed')
                        continue
pd.DataFrame.from_records(results).to_csv('zafar_results_jan21.csv', index=False)