In [1]:
import sys
import numpy as np
import pandas as pd

In [2]:
from fairsearchcore.models import FairScoreDoc
import fairsearchcore as fsc

file_name = 'compas_clean_2.csv'
output_file = './data/Baseline/COMPAS_FairTopK.csv'
adj_name = 'adj_matrix_2.npy'

### Generate results for FairTopk

In [None]:
def load_data(file_name, class_label, score_label, prt_group):
    """
    Load data in a format compatible with fairsearchcore package for fairtopk algorithm.
    """
    data =  pd.read_csv(file_name)
    k = int(np.sum(data[class_label])) #
    p = 1-np.sum(data[prt_group])/data.shape[0]
    rankings = list()
    for idx in data[score_label].argsort()[::-1]:
        if data.loc[idx,prt_group] == 1:
            rankings.append( FairScoreDoc(idx, data.loc[idx,score_label], False))
        if data.loc[idx,prt_group] == 0:
            rankings.append( FairScoreDoc(idx, data.loc[idx,score_label], True))
    return rankings, k, p, data
alpha = 0.1
rankings, k, p, data = load_data(file_name, 'h_c', 's', 'race')

fair = fsc.Fair(k, p, alpha)
re_ranked = fair.re_rank(rankings)
eps = 1/len(rankings)
h = np.zeros(len(rankings))
for i,item in enumerate(re_ranked):
    h[item.id] = 1 - i*eps
out_df = pd.DataFrame({'h': h, 'race' : data['race']})
out_df.to_csv(output_file, index = False)

### Generate results for ROC(EOD)

In [8]:
 
import aif360
from aif360.algorithms.postprocessing.calibrated_eq_odds_postprocessing import CalibratedEqOddsPostprocessing
from aif360.algorithms.postprocessing.reject_option_classification import RejectOptionClassification
from codes.utils import power_mean, binarize

def load_data(file_name, class_label, score_label, prt_group):
    """
    Load data in a format compatible with aif360 package.
    """
    data =  pd.read_csv(file_name)
    data['s'] = data[score_label]/10
    data['s_hat'] = binarize(data['s'], int(np.sum(data[class_label])))
    dataset = aif360.datasets.BinaryLabelDataset(
        favorable_label=1,
        unfavorable_label=0,
        df=data,
        label_names=['s_hat'],
        protected_attribute_names=[prt_group])
    dataset.scores = data['s'].to_numpy().reshape((data.shape[0],1))
    return dataset, data

In [9]:
metric_ub = 0.02
metric_lb = -0.02

dataset, df = load_data(file_name, 'h_c', 's', 'race')
ROC = RejectOptionClassification(unprivileged_groups=[{'race': 0}],
                                         privileged_groups= [{'race': 1}],
                                         low_class_thresh=0.01, high_class_thresh=0.99,
                                          num_class_thresh=100, num_ROC_margin=50,
                                          metric_name= "Equal opportunity difference",
                                          metric_ub=metric_ub, metric_lb=metric_lb)
dataset_pred = dataset.copy(deepcopy = True)
ROC = ROC.fit(dataset, dataset_pred)
dataset_transf_pred = ROC.predict(dataset)
h = dataset_transf_pred.labels[:,0]
out_df = pd.DataFrame({'h': h, 'race' :df['race']})
out_df.to_csv('./data/Baseline/Compas_ROC(EOD).csv', index = False)

### Generate results for ROC(SPD)

In [10]:
metric_ub = 0.02
metric_lb = -0.02
dataset, df = load_data(file_name, 'h_c', 's', 'race')
ROC = RejectOptionClassification(unprivileged_groups=[{'race': 0}],
                                         privileged_groups= [{'race': 1}],
                                         low_class_thresh=0.01, high_class_thresh=0.99,
                                          num_class_thresh=100, num_ROC_margin=50,
                                          metric_name= "Statistical parity difference",
                                          metric_ub=metric_ub, metric_lb=metric_lb)
dataset_pred = dataset.copy(deepcopy = True)
ROC = ROC.fit(dataset, dataset_pred)
dataset_transf_pred = ROC.predict(dataset)
h = dataset_transf_pred.labels[:,0]
out_df = pd.DataFrame({'h': h, 'race' :df['race']})
out_df.to_csv('./data/Baseline/Compas_ROC(SPD).csv', index = False)

### Generate results for CEP (FPR)

In [11]:
dataset, df = load_data(file_name, 'h_c', 's', 'race')
cpp = CalibratedEqOddsPostprocessing(unprivileged_groups=[{'race': 0}],
                                         privileged_groups= [{'race': 1}], cost_constraint='fpr',seed=1)
dataset_pred = dataset.copy(deepcopy = True)
cpp = cpp.fit(dataset, dataset_pred)
dataset_transf_pred = cpp.predict(dataset)
h = dataset_transf_pred.labels[:,0]
out_df = pd.DataFrame({'h': h, 'race' :df['race']})
out_df.to_csv('./data/Baseline/Compas_CPP(FPR).csv', index = False)