In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from fairaudit import Auditor
from fairaudit.groups import get_intersections
from fairaudit.metrics import Metric

In [3]:
df_deciles = pd.read_csv('../data/compas-scores-two-years.csv')
keep_columns = ["age", "c_charge_degree", "race", "age_cat", "score_text", "sex", "priors_count", 
                "days_b_screening_arrest", "decile_score", "is_recid", "two_year_recid", "c_jail_in", "c_jail_out"]

df_deciles = df_deciles[keep_columns]

row_filter = (df_deciles["days_b_screening_arrest"] <= 30) & (df_deciles["days_b_screening_arrest"] >= -30)
row_filter &= (df_deciles["is_recid"] != -1) & (df_deciles["c_charge_degree"] != "O") 
row_filter &= df_deciles["score_text"] != "NA"
df_deciles = df_deciles[row_filter]

df_deciles

Unnamed: 0,age,c_charge_degree,race,age_cat,score_text,sex,priors_count,days_b_screening_arrest,decile_score,is_recid,two_year_recid,c_jail_in,c_jail_out
0,69,F,Other,Greater than 45,Low,Male,0,-1.0,1,0,0,2013-08-13 06:03:42,2013-08-14 05:41:20
1,34,F,African-American,25 - 45,Low,Male,0,-1.0,3,1,1,2013-01-26 03:45:27,2013-02-05 05:36:53
2,24,F,African-American,Less than 25,Low,Male,4,-1.0,4,1,1,2013-04-13 04:58:34,2013-04-14 07:02:04
5,44,M,Other,25 - 45,Low,Male,0,0.0,1,0,0,2013-11-30 04:50:18,2013-12-01 12:28:56
6,41,F,Caucasian,25 - 45,Medium,Male,14,-1.0,6,1,1,2014-02-18 05:08:24,2014-02-24 12:18:30
...,...,...,...,...,...,...,...,...,...,...,...,...,...
7209,23,F,African-American,Less than 25,Medium,Male,0,-1.0,7,0,0,2013-11-22 05:18:27,2013-11-24 02:59:20
7210,23,F,African-American,Less than 25,Low,Male,0,-1.0,3,0,0,2014-01-31 07:13:54,2014-02-02 04:03:52
7211,57,F,Other,Greater than 45,Low,Male,0,-1.0,1,0,0,2014-01-13 05:48:01,2014-01-14 07:49:46
7212,33,M,African-American,25 - 45,Low,Female,3,-1.0,2,0,0,2014-03-08 08:06:02,2014-03-09 12:18:04


In [4]:
unique_races, race_inds = np.unique(df_deciles['race'].to_numpy(), return_inverse=True)

unique_ages, age_inds = np.unique(df_deciles['age_cat'].to_numpy(), return_inverse=True)

unique_sexes, sex_inds = np.unique(df_deciles['sex'].to_numpy(), return_inverse=True)

group_features = race_inds.reshape(-1,1)

groups = get_intersections(group_features)

## Lower bounding the predictive parity bias for Black defendants (Northpointe analysis)

In [5]:
# filter to just high-risk offenders
np.seterr(all='raise')

high_risk_filter = df_deciles['decile_score'] >= 5 # Angwin et al. choose 5, Northpointe says 8
df_ppv = df_deciles[high_risk_filter]
groups_ppv = groups[:,[0,2]]
groups_ppv = groups_ppv[high_risk_filter]

y = df_ppv['two_year_recid'].to_numpy()
z = groups[:,2][high_risk_filter] # white indicators

def eval_ppv(Z, Y):
    return np.isclose(Y, 1)

def threshold_white(Z, Y):
    return np.mean(Y[Z == True])

metric = Metric("ppv_COMPAS", eval_ppv, threshold_white)

auditor = Auditor(group_features[high_risk_filter], y, z, metric)

boot_params = {'seed': 0, 'B': 2000}

auditor.calibrate_groups(
    alpha=0.05,
    type='upper',
    epsilon=None,
    groups=groups_ppv,
    bootstrap_params=boot_params
)
auditor.query_group(0)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:01<00:00, 1718.30it/s]


([0.01874145481608773], [0.6495352651722253], [0.5948275862068966])

In [6]:
# filter to just high-risk offenders
np.seterr(all='raise')

high_risk_filter = df_deciles['decile_score'] >= 5
df_ppv = df_deciles[high_risk_filter]
groups_ppv = groups[high_risk_filter]

y = df_ppv['two_year_recid'].to_numpy()
z = groups[:,2][high_risk_filter] # white indicators

def eval_ppv(Z, Y):
    return np.isclose(Y, 1)

# rather than comparing to white average - compare to population average PPV
def threshold(Z, Y):
    return np.mean(Y)

metric = Metric("ppv_COMPAS", eval_ppv, threshold_white)

auditor = Auditor(group_features[high_risk_filter], y, z, metric)

prob_threshold = (25 / len(y))
boot_params = {'seed': 0, 'B': 2000, 'student': 'prob_bound', 'student_threshold': prob_threshold**(3/2)}

auditor.calibrate_groups(
    alpha=0.05,
    type='interval',
    epsilon=None,
    groups=groups_ppv,
    bootstrap_params=boot_params
)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:01<00:00, 1678.80it/s]


In [7]:
for i in range(groups_ppv.shape[1]):
    print(df_deciles['race'][groups[:,i]].unique()[0])
    print(auditor.query_group(i)[0])

African-American
[[0.012142244095936311, 0.09794498464245843]]
Asian
[[-4.5243884202045965, 4.836605112290518]]
Caucasian
[[-0.0690016232655122, 0.0700907740400982]]
Hispanic
[[-0.18784821050121417, 0.12118023400888106]]
Native American
[[-3.525272599736387, 3.6417380735801874]]
Other
[[-0.21240554968008743, 0.22618471976774254]]


In [8]:
# let's go past race to include any intersection of race, age, and sex

group_features = np.concatenate((race_inds.reshape(-1,1), age_inds.reshape(-1,1), sex_inds.reshape(-1,1)), axis=1)

groups_all = get_intersections(group_features)

In [9]:
# filter to just high-risk offenders
np.seterr(all='raise')

high_risk_filter = df_deciles['decile_score'] >= 5
df_ppv = df_deciles[high_risk_filter]
groups_ppv = groups_all[high_risk_filter]

y = df_ppv['two_year_recid'].to_numpy()
z = groups[:,2][high_risk_filter] # white indicators

def eval_ppv(Z, Y):
    return np.isclose(Y, 1)

# rather than comparing to white average - compare to population average PPV
def threshold(Z, Y):
    return np.mean(Y)

metric = Metric("ppv_COMPAS", eval_ppv, threshold_white)

auditor = Auditor(group_features[high_risk_filter], y, z, metric)

prob_threshold = (25 / len(y))
boot_params = {'seed': 0, 'B': 2000, 'student': 'prob_bound', 'student_threshold': prob_threshold**(3/2)}

auditor.calibrate_groups(
    alpha=0.05,
    type='interval',
    epsilon=None,
    groups=groups_ppv,
    bootstrap_params=boot_params
)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:02<00:00, 941.12it/s]


In [10]:
group_feats = ['race', 'age_cat', 'sex']
print("number of groups", groups_ppv.shape[1])
for group_ind in range(groups_ppv.shape[1]):
    bound, metrics, thresholds = auditor.query_group(group_ind)
    if np.asarray(bound).min() >= -0.015:
        feats_filter = df_ppv[group_feats][groups_ppv[:,group_ind]].nunique() == 1
        feats_filter = feats_filter.index[feats_filter]
        print(df_ppv[feats_filter][groups_ppv[:,group_ind]].drop_duplicates())
        print(bound)


number of groups 81
                race
11  African-American
[[0.00990378402911389, 0.10050821508763508]]
   age_cat
6  25 - 45
[[-0.008980577264748935, 0.08789098394610374]]
    sex
6  Male
[[0.018626004091819477, 0.09986519507335179]]
                race  age_cat
17  African-American  25 - 45
[[-0.0036723503908867375, 0.11374772380408638]]
                race       age_cat
11  African-American  Less than 25
[[-0.01397456868430079, 0.14991465059899822]]
                race   sex
11  African-American  Male
[[0.029059467696587647, 0.12725957990995806]]
   age_cat   sex
6  25 - 45  Male
[[0.0006506144026803135, 0.10702102334819014]]
         age_cat   sex
11  Less than 25  Male
[[0.012240315998093512, 0.1588010739990503]]
                race  age_cat   sex
17  African-American  25 - 45  Male
[[0.010569282254261958, 0.13735865468650088]]
                race       age_cat   sex
11  African-American  Less than 25  Male
[[0.015082013230284921, 0.19594510372662777]]


In [None]:
y = df_deciles['two_year_recid'].to_numpy()
z = df_deciles['decile_score'].to_numpy()

def eval_test_fairness(Z, Y):
    return np.isclose(Y, 1)

def threshold_test_fairness(Z, Y):
    return np.mean(np.isclose(Y, 1))

metric_params = {'calibration_bins' : np.unique(z)}

metric = Metric("test_fairness", eval_test_fairness, threshold_test_fairness, metric_params)

auditor = Auditor(group_features, y, z, metric)

prob_threshold = (25 / len(y))
boot_params = {'seed': 0, 'B': 2000, 'student': 'prob_bool', 'student_threshold': prob_threshold**(1/2)}
auditor.calibrate_groups(
    alpha=0.1/10,
    type='interval',
    epsilon=0.15,
    groups=groups,
    bootstrap_params=boot_params
)

auditor_bd = Auditor(group_features, y, z, metric)

prob_threshold = (25 / len(y))
boot_params = {'seed': 0, 'B': 2000, 'student': 'prob_bound', 'student_threshold': prob_threshold**(3/2)}
auditor_bd.calibrate_groups(
    alpha=0.1/10,
    type='interval',
    epsilon=None,
    groups=groups,
    bootstrap_params=boot_params
)

In [136]:
for group_ind in range(groups.shape[1]):
    bound, metrics, thresholds = auditor.query_group(
     groups[:,group_ind]
    )
    
    bound_bd, metrics, thresholds = auditor_bd.query_group(
     groups[:,group_ind]
    )
    # print(bound)
    print(np.all(bound))
    print(np.abs(bound_bd).max() < 0.11)
    if np.abs(bound_bd).max() < 0.11:
        print(bound_bd)
        print(df_deciles[group_feats][groups[:,group_ind]].drop_duplicates())

True
True
[[-0.03982872313242021, 0.07435009569929951], [-0.08596871187462736, 0.04875442679019057], [-0.03871810895963246, 0.10963588425611465], [-0.039069919471408934, 0.09796636586108168], [-0.06182329793072372, 0.07709324250524387], [-0.060680474231902246, 0.07847676448890888], [-0.06109783290391682, 0.07763060171402328], [-0.06103603305482951, 0.053361548973905486], [-0.04766418628718098, 0.06823546790544954], [-0.03200106933765108, 0.10589902639445559]]
                race          age_cat     sex
1   African-American          25 - 45    Male
2   African-American     Less than 25    Male
20  African-American  Greater than 45    Male
40  African-American     Less than 25  Female
43  African-American  Greater than 45  Female
78  African-American          25 - 45  Female
False
False
False
False
False
False
False
False
False
False


## Flagging unfairness

In [10]:
false_filter = df_deciles['two_year_recid'] == 0

df_fpr = df_deciles[false_filter]
groups_fpr = groups_all[false_filter]
groups_fpr = groups_fpr[:, np.any(groups_fpr, axis=0)]

def eval_fpr(Z, Y):
    return np.isclose(Z, 1)

def threshold_fpr(Z, Y):
    return np.mean(np.isclose(Z, 1))

x = group_features[false_filter]
y = df_fpr['two_year_recid'].to_numpy()
z = (df_fpr['decile_score'] >= 5).to_numpy() # high-risk flag

metric = Metric("fpr", eval_fpr, threshold_fpr)

auditor = Auditor(x, y, z, metric)

boot_params = {'seed': 0, "student": "mad", "student_threshold": 1e-8, "prob_threshold": 25 / len(x)}
flags, metric_values = auditor.flag_groups(groups_fpr, "lower", 0.1, 0, boot_params)




In [11]:
group_feats = ['race', 'age_cat', 'sex']

for group_ind in range(groups_fpr.shape[1]):
    if flags[group_ind]:
        feats_filter = df_fpr[group_feats][groups_fpr[:,group_ind]].nunique() == 1
        feats_filter = feats_filter.index[feats_filter]
        print(metric_values[:, group_ind], df_fpr[feats_filter][groups_fpr[:,group_ind]].drop_duplicates().values)

[0.12067585] [['African-American']]
[0.19729408] [['Native American' '25 - 45' 'Male']]
[0.00718303] [['25 - 45']]
[0.23186407] [['Less than 25']]
[0.00025448] [['Male']]
[0.11251147] [['African-American' '25 - 45']]
[0.29163371] [['African-American' 'Less than 25']]
[0.18044015] [['Caucasian' 'Less than 25']]
[0.17943694] [['Hispanic' 'Less than 25']]
[0.19729408] [['Native American' '25 - 45' 'Male']]
[0.10269949] [['Other' 'Less than 25']]
[0.0759068] [['African-American' 'Female']]
[0.13393792] [['African-American' 'Male']]
[0.19729408] [['Native American' '25 - 45' 'Male']]
[0.01650119] [['25 - 45' 'Male']]
[0.30513722] [['Less than 25' 'Female']]
[0.20638499] [['Less than 25' 'Male']]
[0.03691672] [['African-American' '25 - 45' 'Female']]
[0.13514719] [['African-American' '25 - 45' 'Male']]
[0.32692371] [['African-American' 'Less than 25' 'Female']]
[0.27957256] [['African-American' 'Less than 25' 'Male']]
[0.39729408] [['Caucasian' 'Less than 25' 'Female']]
[0.09573158] [['Cauca

In [12]:
true_filter = df_deciles['two_year_recid'] == 1

df_ppv = df_deciles[true_filter]
groups_ppv = groups_all[true_filter]
groups_ppv = groups_ppv[:, np.any(groups_ppv, axis=0)]

def eval_ppv(Z, Y):
    return np.isclose(Z, 1)

def threshold_ppv(Z, Y):
    return np.mean(np.isclose(Z, 1)[np.isclose(Y, 1)])

x = group_features[true_filter]
y = groups_ppv[:,2]
z = (df_ppv['decile_score'] >= 5).to_numpy() # high-risk flag

metric = Metric("fpr", eval_ppv, threshold_ppv)

auditor = Auditor(x, y, z, metric)

boot_params = {'seed': 0, "student": "mad", "student_threshold": 1e-8}
flags, metric_values = auditor.flag_groups(groups_ppv, "upper", 0.1, 0, boot_params)




In [13]:
group_feats = ['race', 'age_cat', 'sex']

for group_ind in range(groups_ppv.shape[1]):
    if flags[group_ind]:
        feats_filter = df_ppv[group_feats][groups_ppv[:,group_ind]].nunique() == 1
        feats_filter = feats_filter.index[feats_filter]
        print(metric_values[:, group_ind], df_ppv[feats_filter][groups_ppv[:,group_ind]].drop_duplicates().values)

[-0.08566022] [['Hispanic']]
[-0.16493996] [['Other']]
[-0.09302162] [['Greater than 45']]
[-0.22717905] [['Caucasian' 'Greater than 45']]
[-0.11476075] [['Hispanic' '25 - 45']]
[-0.32507821] [['Hispanic' 'Greater than 45']]
[-0.22828732] [['Other' '25 - 45']]
[-0.33698297] [['Other' 'Greater than 45']]
[-0.50364964] [['Asian' 'Greater than 45' 'Female']]
[-0.01285209] [['Caucasian' 'Male']]
[-0.34980348] [['Hispanic' 'Female']]
[-0.04352694] [['Hispanic' 'Male']]
[-0.04910418] [['Other' 'Female']]
[-0.17621601] [['Other' 'Male']]
[-0.19856489] [['Greater than 45' 'Female']]
[-0.07548062] [['Greater than 45' 'Male']]
[-0.50364964] [['Asian' 'Greater than 45' 'Female']]
[-0.01847443] [['Caucasian' '25 - 45' 'Male']]
[-0.27507821] [['Caucasian' 'Greater than 45' 'Female']]
[-0.21476075] [['Caucasian' 'Greater than 45' 'Male']]
[-0.38600258] [['Hispanic' '25 - 45' 'Female']]
[-0.0640892] [['Hispanic' '25 - 45' 'Male']]
[-0.50364964] [['Hispanic' 'Greater than 45' 'Female']]
[-0.28625833] 