# Accuracy vs. Hamming Loss Trials

This notebook goes over sample code for testing the difference between optimizing for accuracy vs hamming loss. Note this notebook used pre-mined rules (included in the rules folder), and our pre-split and binerized data (included in data_split folder).

In [None]:
import pandas as pd
import numpy as np
import time
from master_model import *
from DNFRuleModel import DNFRuleModel
from scipy.stats import bernoulli
from fairness_modules import *
import seaborn as sns

### Experiments

In [None]:
#Helper functions for computing fairness/accuracy
def compute_TPR_GAP(preds, Y, group):
    res = {}
    res['TPR'] = sum(preds[Y])/len(preds[Y])
    res['TPR_1'] = sum(preds[Y & group])/len(preds[Y & group])
    res['TPR_2'] = sum(preds[Y & ~group])/len(preds[Y & ~group])
    res['TPR_GAP'] = abs(res['TPR_1'] - res['TPR_2'])
    return res

def compute_TNR_GAP(preds, Y, group):
    res = {}
    res['TNR'] = sum(~preds[~Y])/len(preds[~Y])
    res['TNR_1'] = sum(~preds[~Y & group])/len(preds[~Y & group])
    res['TNR_2'] = sum(~preds[~Y & ~group])/len(preds[~Y & ~group])
    res['TNR_GAP'] = abs(res['TNR_1'] - res['TNR_2'])
    return res

def compute_ACC_GAP(preds, Y, group):
    res = {}
    res['ACC'] = sum(preds == Y)/len(Y)
    res['ACC_1'] = sum(preds[group] == Y[group])/len(Y[group])
    res['ACC_2'] = sum(preds[~group] == Y[~group])/len(Y[~group])
    res['ACC_GAP'] = abs(res['ACC_1'] - res['ACC_2'])
    return res
    
def compute_EqOpp(preds, Y, group):
    return compute_TPR_GAP(preds, Y, group)

def compute_EqOd(preds, Y, group):
    return compute_TPR_GAP(preds, Y, group).update(compute_TNR_GAP(preds, Y, group) )

def compute_AccDisp(preds, Y, group):
    return compute_ACC_GAP(preds, Y, group)

def compute_fairness(preds,Y,group):
    res = compute_TPR_GAP(preds, Y, group)
    res.update(compute_TNR_GAP(preds, Y, group))
    res.update(compute_ACC_GAP(preds,Y,group))
    return res

In [None]:
#Helpre function for evaluating diffrent IP Models
def eval_model(model, ruleMod, rules, X_tr, Y_tr, X_tst, Y_tst):
    #Initialize model with rule sest
    model.addRule(rules)
    
    #Solve the model
    start = time.perf_counter()
    results = model.solve(verbose = True, relax = False)
    end = time.perf_counter() - start
    
    #Extract predictions for tesst and train data
    fitRules = results['ruleSet']
    
    if len(fitRules) > 0:
        preds_tr = ruleMod.predict(X_tr, fitRules)
        preds = ruleMod.predict(X_tst, fitRules)
    else:
        preds_tr = (np.zeros(Y_tr.shape)).astype(np.bool)
        preds = (np.zeros(Y_tst.shape)).astype(np.bool)
    
    #Compute metrics on performance of final rule setss
    tr_acc = np.mean(preds_tr == Y_tr)
    tst_acc = np.mean(preds == Y_tst)
    complexity = len(fitRules) + np.sum(fitRules)
    
    res = {'time': end,
           'tr_acc': tr_acc,
           'tst_acc': tst_acc,
           'complexity': complexity,
           'obj': results['obj']
          }
    return res, preds

In [None]:
'''
Run Experiment
'''
results = []

#Model Parameters
test_params = {
            'price_limit': 45,
            'train_limit': 300,
            'fixed_model_params': {
                'ruleGenerator': 'Hybrid',
                'masterSolver':'barrierCrossover',
                'numRulesToReturn': 100,
                'fairness_module': 'EqOfOp',
                'IP_time_limit': 600
            },
        }

protected_features = {'compas': 'race', 
                      'adult': 'gender',
                      'default': 'X2'
                     }
group_var = 'race'
data_set = 'compas'

#Loop over folds
for i in range(10):
    
    #Extract data
    print('***** FOLD %d ******'%i)
    train  = pd.read_csv('data_split/bin_'+data_set+'_train_%d.csv'%i)
    test = pd.read_csv('data_split/bin_'+data_set+'_test_%d.csv'%i)
    X_tr = train.drop('Y',axis=1).to_numpy()
    Y_tr = train['Y'].to_numpy()
    X_tst = test.drop('Y',axis=1).to_numpy()
    Y_tst = test['Y'].to_numpy()
    
    #Load in rule sets - for these experiments we use rules from rfs and our faircg process
    isFirst = True
    for ruleset in ['faircg','rf']:
        print('***** Ruleset %s ******'%ruleset)
        new_rules = np.load('rules/'+ruleset+'_%s_fold_%d.npy'%('compas', i)).astype(int)
        if isFirst:
            rules = new_rules
            isFirst = False
        else:
            rules = np.concatenate([rules,new_rules])
    
    #For range of epsilon and complexities run the mdoels
    for eps in [0.01, 0.05, 0.1, 0.2, 1]:
        for C in [10, 20, 30]:
            print('***** EPS %f ******'%eps)
            
            #Set model parameters
            test_params = test_params.copy()
            test_params['fixed_model_params']['epsilon'] = eps
            test_params['fixed_model_params']['ruleComplexity'] = C
            test_params['fixed_model_params']['group'] = train[group_var].to_numpy()
            
            #Create Rule + fairness Modules
            ruleMod = DNFRuleModel(X_tr, Y_tr)
            fairMod = EqualityOfOpportunity.EqualityOfOpportunity(test_params['fixed_model_params'])
            
            #Evaluate Hamming Loss Master Model
            res,preds = eval_model(CompactDoubleSidedMaster.CompactDoubleSidedMaster(ruleMod, fairMod, 
                                                                                     test_params['fixed_model_params']),
                                   ruleMod, 
                                   rules, 
                                   train.drop('Y',axis=1).to_numpy(), train['Y'].to_numpy(),
                                   test.drop('Y',axis=1).to_numpy(), test['Y'].to_numpy())

            res['dataset'] = data_set
            res['fold'] = i
            res['eps'] = eps
            res['fairMet'] = 'EqOp'
            res['C'] = C
            res['method'] = 'hamming loss'
            res.update(compute_fairness(preds,test['Y'],test[group_var]))
            results.append(res)
            
            # Evaluate Acccuraacy master model
            ruleMod = DNFRuleModel(X_tr, Y_tr)
            fairMod = EqualityOfOpportunity.EqualityOfOpportunity(test_params['fixed_model_params'])

            res,preds = eval_model(ZeroOneDoubleSidedMaster.ZeroOneDoubleSidedMaster(ruleMod, fairMod, 
                                                                                     test_params['fixed_model_params']),
                                   ruleMod, 
                                   rules, 
                                   train.drop('Y',axis=1).to_numpy(), train['Y'].to_numpy(),
                                   test.drop('Y',axis=1).to_numpy(), test['Y'].to_numpy())
            


            res['dataset'] = 'compas'
            res['fold'] = i
            res['eps'] = eps
            res['fairMet'] = 'EqOp'
            res['C'] = C
            res['method'] = 'accuracy'
            res.update(compute_fairness(preds,test['Y'],test[group_var]))
            results.append(res)
            
results = pd.DataFrame.from_records(results)
results.to_csv('01_vs_Hamming.csv')