In [46]:
!pip install aequitas
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:75% !important; }</style>"))
import yaml
import os
import pandas as pd
import numpy as np
import seaborn as sns
from aequitas.group import Group
from aequitas.bias import Bias
from aequitas.fairness import Fairness
import aequitas.plot as ap
DATAPATH = 'https://github.com/dssg/fairness_tutorial/raw/master/data/'

# Auditing the Model with Highest Precision at top 1000

In [30]:
df = pd.read_csv(DATAPATH + 'single_audit_df.csv.gz', compression='gzip')

Aequitas needs predictions (binary score), the label value, and the attributes to breakdown in group wise metric results.

In [31]:
df.head(10)

Unnamed: 0,score,label_value,poverty_level,metro_type,teacher_sex
0,0.0,0.0,lower,suburban_rural,female
1,0.0,0.0,highest,unknown,female
2,0.0,1.0,lower,suburban_rural,female
3,0.0,0.0,lower,unknown,female
4,0.0,0.0,lower,urban,female
5,0.0,1.0,lower,unknown,female
6,0.0,0.0,highest,urban,female
7,0.0,0.0,highest,unknown,female
8,0.0,1.0,lower,unknown,female
9,0.0,0.0,highest,unknown,female


In [34]:
# The score has been binarized (0/1)
df['score'].value_counts()

0.0    16677
1.0     1000
Name: score, dtype: int64

## Aequitas

In [35]:
g = Group()
xtab, _ = g.get_crosstabs(df)

model_id, score_thresholds 0 {'rank_abs': [1000]}


In [36]:
absolute_metrics = g.list_absolute_metrics(xtab)

In [37]:
xtab[[col for col in xtab.columns if col not in absolute_metrics]]

Unnamed: 0,model_id,score_threshold,k,attribute_name,attribute_value,pp,pn,fp,fn,tn,tp,group_label_pos,group_label_neg,group_size,total_entities
0,0,binary 0/1,1000,poverty_level,highest,247,9726,89,2954,6772,158,3112,6861,9973,17677
1,0,binary 0/1,1000,poverty_level,lower,753,6951,359,2464,4487,394,2858,4846,7704,17677
2,0,binary 0/1,1000,metro_type,suburban_rural,626,5880,293,2060,3820,333,2393,4113,6506,17677
3,0,binary 0/1,1000,metro_type,unknown,182,1874,83,666,1208,99,765,1291,2056,17677
4,0,binary 0/1,1000,metro_type,urban,192,8923,72,2692,6231,120,2812,6303,9115,17677
5,0,binary 0/1,1000,teacher_sex,female,951,14142,424,4620,9522,527,5147,9946,15093,17677
6,0,binary 0/1,1000,teacher_sex,male,49,2535,24,798,1737,25,823,1761,2584,17677


In [38]:
xtab[['attribute_name', 'attribute_value'] + absolute_metrics]

Unnamed: 0,attribute_name,attribute_value,tpr,tnr,for,fdr,fpr,fnr,npv,precision,ppr,pprev,prev
0,poverty_level,highest,0.050771,0.987028,0.303722,0.360324,0.012972,0.949229,0.696278,0.639676,0.247,0.024767,0.312043
1,poverty_level,lower,0.137859,0.925918,0.354481,0.47676,0.074082,0.862141,0.645519,0.52324,0.753,0.097741,0.370976
2,metro_type,suburban_rural,0.139156,0.928762,0.35034,0.468051,0.071238,0.860844,0.64966,0.531949,0.626,0.096219,0.367814
3,metro_type,unknown,0.129412,0.935709,0.35539,0.456044,0.064291,0.870588,0.64461,0.543956,0.182,0.088521,0.372082
4,metro_type,urban,0.042674,0.988577,0.301692,0.375,0.011423,0.957326,0.698308,0.625,0.192,0.021064,0.308502
5,teacher_sex,female,0.10239,0.95737,0.326686,0.445846,0.04263,0.89761,0.673314,0.554154,0.951,0.063009,0.341019
6,teacher_sex,male,0.030377,0.986371,0.314793,0.489796,0.013629,0.969623,0.685207,0.510204,0.049,0.018963,0.318498


## Defining the reference groups to calculate disparities

In [39]:
b = Bias()
bdf = b.get_disparity_predefined_groups(xtab, original_df=df, ref_groups_dict={'poverty_level':'lower', 'metro_type':'suburban_rural', 'teacher_sex':'male'})

get_disparity_predefined_group()


In [50]:
bdf[['attribute_name', 'attribute_value'] + b.list_disparities(bdf)]

Unnamed: 0,attribute_name,attribute_value,ppr_disparity,pprev_disparity,precision_disparity,fdr_disparity,for_disparity,fpr_disparity,fnr_disparity,tpr_disparity,tnr_disparity,npv_disparity
0,poverty_level,highest,0.328021,0.253392,1.222528,0.755777,0.856807,0.175102,1.101013,0.368285,1.065999,1.078633
1,poverty_level,lower,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,metro_type,suburban_rural,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,metro_type,unknown,0.290735,0.92,1.022572,0.974346,1.014413,0.902491,1.011319,0.929977,1.007479,0.992228
4,metro_type,urban,0.306709,0.218919,1.174925,0.801195,0.861141,0.160353,1.112078,0.306665,1.064402,1.074882
5,teacher_sex,female,19.408163,3.322778,1.086141,0.91027,1.037782,3.127991,0.925731,3.37067,0.970598,0.982642
6,teacher_sex,male,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


Now we are going to focus our analysis on the fairness metric(s) of interest in this case study: TPR across different groups. The aequitas plot module exposes the disparities_metrics() plot, which displays both the disparities and the group-wise metric results side by side.

In [40]:
metrics = ['tpr']

In [47]:
ap.disparities_metrics(bdf, metrics, 'poverty_level', fairness_threshold = 1.30)

In [48]:
ap.disparities_metrics(bdf, metrics, 'metro_type', fairness_threshold = 1.30)

In [49]:
ap.disparities_metrics(bdf, metrics, 'teacher_sex', fairness_threshold = 1.30)