In [1]:
import sys
import numpy as np
import pandas as pd

### Generate results for FairTopk

In [4]:
from fairsearchcore.models import FairScoreDoc
import fairsearchcore as fsc


In [3]:
def load_data(year, prt_group):
    """
    Load data in a format compatible with fairsearchcore package for fairtopk algorithm.
    """
    data =  pd.read_csv('./data/ICLR'+str(year)+'.data')
    k = int(np.sum(data.h_c))
    p = 1-np.sum(data[prt_group])/data.shape[0]
    rankings = list()
    for idx in data.s.argsort()[::-1]:
        if data.loc[idx,prt_group] == 1:
            rankings.append( FairScoreDoc(idx, data.loc[idx,'s'], False))
        if data.loc[idx,prt_group] == 0:
            rankings.append( FairScoreDoc(idx, data.loc[idx,'s'], True))
    return rankings, k, p, data


alpha = 0.1
for year in [2017,2018,2019,2020]:
    for prt_group in ['top', 'famous']:
        rankings, k, p, data = load_data(year,prt_group)
        fair = fsc.Fair(k, p, alpha)
        re_ranked = fair.re_rank(rankings)
        eps = 1/len(rankings)
        h = np.zeros(len(rankings))
        for i,item in enumerate(re_ranked):
            h[item.id] = 1 - i*eps
        out_df = pd.DataFrame({'h': h, prt_group : data[prt_group]})
        out_df.to_csv('./data/Baseline/ICLR'+str(year)+str(prt_group)+'FairTopK.csv', index = False)



### Generate results for ROC(EOD)

In [5]:
 
import aif360
from aif360.algorithms.postprocessing.calibrated_eq_odds_postprocessing import CalibratedEqOddsPostprocessing
from aif360.algorithms.postprocessing.reject_option_classification import RejectOptionClassification
from codes.utils import power_mean, binarize

def load_data(year, prt_group):
    """
    Load data in a format compatible with aif360 package.
    """
    data =  pd.read_csv('./data/ICLR'+str(year)+'.data')
    data['s'] = data['s']/10
    data['s_hat'] = binarize(data['s'], int(np.sum(data['h_c'])))
    dataset = aif360.datasets.BinaryLabelDataset(
        favorable_label=1,
        unfavorable_label=0,
        df=data,
        label_names=['s_hat'],
        protected_attribute_names=[prt_group])
    dataset.scores = data['s'].to_numpy().reshape((data.shape[0],1))
    return dataset, data

In [6]:
metric_ub = 0.02
metric_lb = -0.02
for year in [2017, 2018, 2019,2020]:
    print(year )
    for prt_group in ['top', 'famous']:
        dataset, df = load_data(year, prt_group)
        ROC = RejectOptionClassification(unprivileged_groups=[{prt_group: 0}], 
                                         privileged_groups= [{prt_group: 1}], 
                                         low_class_thresh=0.01, high_class_thresh=0.99,
                                          num_class_thresh=100, num_ROC_margin=50,
                                          metric_name= "Equal opportunity difference",
                                          metric_ub=metric_ub, metric_lb=metric_lb)
        dataset_pred = dataset.copy(deepcopy = True)
        ROC = ROC.fit(dataset, dataset_pred)
        dataset_transf_pred = ROC.predict(dataset)
        h = dataset_transf_pred.labels[:,0]
        out_df = pd.DataFrame({'h': h, prt_group :df[prt_group]})
        out_df.to_csv('./data/Baseline/ICLR'+str(year)+str(prt_group)+'ROC(EOD).csv', index = False)

2017
2018
2019
2020


### Generate results for ROC(SPD)

In [9]:
metric_ub = 0.02
metric_lb = -0.02
for year in [2017, 2018, 2019,2020]:
    print(year )
    for prt_group in ['top', 'famous']:
        dataset, df = load_data(year, prt_group)
        ROC = RejectOptionClassification(unprivileged_groups=[{prt_group: 0}], 
                                         privileged_groups= [{prt_group: 1}], 
                                         low_class_thresh=0.01, high_class_thresh=0.99,
                                          num_class_thresh=100, num_ROC_margin=50,
                                          metric_name= "Statistical parity difference",
                                          metric_ub=metric_ub, metric_lb=metric_lb)
        dataset_pred = dataset.copy(deepcopy = True)
        ROC = ROC.fit(dataset, dataset_pred)
        dataset_transf_pred = ROC.predict(dataset)
        h = dataset_transf_pred.labels[:,0]
        out_df = pd.DataFrame({'h': h, prt_group :df[prt_group]})
        out_df.to_csv('./data/Baseline/ICLR'+str(year)+str(prt_group)+'ROC(SPD).csv', index = False)

2017
2018
2019
2020


### Generate results for CEP (FPR)

In [8]:
for year in [2017, 2018, 2019,2020]:
    print(year )
    for prt_group in ['top', 'famous']:
        dataset, df = load_data(year, prt_group)
        cpp = CalibratedEqOddsPostprocessing(unprivileged_groups=[{prt_group: 0}], 
                                         privileged_groups= [{prt_group: 1}], cost_constraint='fpr',seed=1)
        dataset_pred = dataset.copy(deepcopy = True)
        cpp = cpp.fit(dataset, dataset_pred)
        dataset_transf_pred = cpp.predict(dataset)
        h = dataset_transf_pred.labels[:,0]
        out_df = pd.DataFrame({'h': h, prt_group :df[prt_group]})
        out_df.to_csv('./data/Baseline/ICLR'+str(year)+str(prt_group)+'CPP(FPR).csv', index = False)

2017
2018
2019
2020
