# Fair CG Rule Generation
This notebook includes code to generate candidate rule sets for each dataset/fold using our column generation approach

In [None]:
import pandas as pd
from test_helpers import *

In [None]:
#Tests for Equality of Opportunity


#Experment Parameters
results = []
datasets = ['adult','compas','default']
protected_features = {'compas': 'race', 
                      'adult': 'gender',
                      'default': 'X2'
                     }
fair_met = 'EqOfOp'
test_params = {
            'price_limit': 45,
            'train_limit': 300,
            'fixed_model_params': {
                'ruleGenerator': 'Hybrid',
                'masterSolver':'barrierCrossover',
                'numRulesToReturn': 100,
                'fairness_module': 'EqOfOp'
            },
        }

#Range of fairness constraint and complexities to test
eps_range = {'adult': [0, 0.01, 0.05, 0.1,0.2, 1],
    'compas':[0, 0.01, 0.05, 0.1, 0.15, 0.2, 0.5, 1],
     'default': [0, 0.01, 0.05, 0.1,0.2, 1]
    }
C_range = {'adult': [40, 5, 15, 20, 30],
    'compas': [5, 10, 15, 20, 30],
     'default': [5, 10, 15, 20, 30]
    }

for dataset in datasets:
    print('**** DATASET %s ******'%dataset)
    group_var = protected_features[dataset]
    name = dataset+'_'+str(fair_met)

    for i in range(10):
        print('***** FOLD %d ******'%i)
        train  = pd.read_csv('data_split/bin_'+dataset+'_train_%d.csv'%i)
        test = pd.read_csv('data_split/bin_'+dataset+'_test_%d.csv'%i)
        
        saved_rules = None
        for eps in eps_range[dataset]:
            for C in C_range[dataset]:
                #Set up reporting
                res = TestResults(name+' '+'(%d,%d)'%(eps, C)+'-'+str(i))
                res.res['eps'] = eps
                res.res['C'] = C
                
                #Set hyperparameters
                test_params = test_params.copy()
                test_params['fixed_model_params']['epsilon'] = eps
                test_params['fixed_model_params']['ruleComplexity'] = C
                
                #Run CG
                res, classif = runSingleTest(train.drop('Y',axis=1).to_numpy(), train['Y'].to_numpy(), 
                                             train[group_var].to_numpy(), 
                                             test.drop('Y',axis=1).to_numpy(), test['Y'].to_numpy(), 
                                             test[group_var].to_numpy(), 
                                             test_params, 
                                             saved_rules, res, colGen = True, rule_filter = False)
                
                #Save rules
                rules = classif.ruleMod.rules
                saved_rules = updateRuleSet(saved_rules,rules)
        #Write rule set
        np.save('rules/'+name+'_fold_'+str(i)+'_rules.txt', saved_rules)

In [None]:
#Tests for Equalized Odds

results = []
datasets = ['compas','default','adult']
protected_features = {'compas': 'race', 
                      'adult': 'gender',
                      'default': 'X2'
                     }
fair_met = 'HammingEqOdd'
test_params = {
            'price_limit': 45,
            'train_limit': 300,
            'fixed_model_params': {
                'ruleGenerator': 'Hybrid',
                'masterSolver':'barrierCrossover',
                'numRulesToReturn': 100,
                'fairness_module': 'HammingEqOdd'
            },
        }

eps_range = {'adult': [0.01, 0.7,0.15, 1],
    'compas': [1, 0.01, 0.2, ],
     'default': [0.01, 0.05,0.1, 1]
    }
C_range = {'adult': [5, 15, 20, 30, 40],
    'compas': [30, 5, 10, 15, 20],
     'default': [5, 10, 15, 20, 30]
    }

#Run tests
for dataset in datasets:
    print('**** DATASET %s ******'%dataset)
    group_var = protected_features[dataset]
    name = dataset+'_'+str(fair_met)

    for i in range(10):
        print('***** FOLD %d ******'%i)
        
        #Load in data
        train  = pd.read_csv('data_split/bin_'+dataset+'_train_%d.csv'%i)
        test = pd.read_csv('data_split/bin_'+dataset+'_test_%d.csv'%i)
        
        saved_rules = None
        for eps in eps_range[dataset]:
            for C in C_range[dataset]:
                #Set up reporting
                res = TestResults(name+' '+'(%d,%d)'%(eps, C)+'-'+str(i))
                res.res['eps'] = eps
                res.res['C'] = C
                
                #Adjust testing parameters
                test_params = test_params.copy()
                test_params['fixed_model_params']['epsilon'] = eps
                test_params['fixed_model_params']['ruleComplexity'] = C
                
                #Run CG
                res, classif = runSingleTest(train.drop('Y',axis=1).to_numpy(), train['Y'].to_numpy(), 
                                             train[group_var].to_numpy(), 
                                             test.drop('Y',axis=1).to_numpy(), test['Y'].to_numpy(), 
                                             test[group_var].to_numpy(), 
                                             test_params, 
                                             saved_rules, res, colGen = True, rule_filter = False)
                
                rules = classif.ruleMod.rules
                #Save rules generated
                saved_rules = updateRuleSet(saved_rules,rules)
        #Write rulesets
        np.save('rules/'+name+'_fold_'+str(i)+'_rules.txt', saved_rules)