In [17]:
import aif360
import pandas as pd
from aif360.datasets import BinaryLabelDataset
from aif360.metrics import ClassificationMetric

In [18]:
df = pd.read_csv('data/dataframe_final.csv')
protected_attributes = ['GENDER', 'LANGUAGE', 'INSURANCE', 'RELIGION', 'ETHNICITY', 'AGE']
metrics = ['Error Rate Difference (ERD)', 'False Positive Rate Difference (FPRD)', 'False Discovery Rate Difference (FDRD)', 'Positive Predicted Value (PPV)']
df

Unnamed: 0,SUBJECT_ID,Y_PRED,Y_TRUE,GENDER,LANGUAGE,INSURANCE,RELIGION,ETHNICITY,AGE
0,10011,1,1,0,1,1,1,0,1
1,10026,0,0,0,1,1,0,1,0
2,10030,0,0,1,1,1,1,1,0
3,10042,0,0,1,1,1,1,1,0
4,10094,0,0,1,1,1,1,0,0
...,...,...,...,...,...,...,...,...,...
3231,9970,0,0,1,1,1,1,1,1
3232,9977,0,0,1,1,1,1,1,0
3233,99863,0,0,1,0,1,1,1,1
3234,99883,0,0,1,1,1,1,1,0


In [19]:
# create the table to be printed
df_print = pd.DataFrame(columns=protected_attributes, index=[metrics])

In [20]:
for protected_attribute in protected_attributes:
    # prepare the dataset in the format that aif360 requires
    ytrue_df = df['Y_TRUE'].to_frame()
    ytrue_df.rename(columns={0: 'Y_TRUE'}, inplace=True)
    attribute_df = df[protected_attribute].to_frame()
    attribute_df.rename(columns={0: protected_attribute}, inplace=True)
    attribute_df = pd.concat([attribute_df, ytrue_df], axis=1)

    # create the aif360 dataset with the ground truth
    dataset = BinaryLabelDataset(
        favorable_label=1,
        unfavorable_label=0,
        df=attribute_df,
        label_names=['Y_TRUE'],
        protected_attribute_names=[protected_attribute]
    )

    # create the aif360 dataset with the predictions
    dataset_pred = dataset.copy()
    dataset_pred.labels = df['Y_PRED']

    # creates an object that computes metrics for binary classification
    index = dataset_pred.protected_attribute_names.index(protected_attribute)
    privileged_groups = [{protected_attribute: dataset_pred.privileged_protected_attributes[index][0]}]
    unprivileged_groups = [{protected_attribute: dataset_pred.unprivileged_protected_attributes[index][0]}]
    classified_metric = ClassificationMetric(dataset, dataset_pred, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)

    # fill in the metrics
    df_print[protected_attribute] = [classified_metric.error_rate_difference(), classified_metric.false_positive_rate_difference(), classified_metric.false_discovery_rate_difference(), classified_metric.positive_predictive_value()]
df_print

Unnamed: 0,GENDER,LANGUAGE,INSURANCE,RELIGION,ETHNICITY,AGE
Error Rate Difference (ERD),0.015614,0.008343,-0.017292,0.021871,0.007677,0.04451
False Positive Rate Difference (FPRD),0.005234,-0.002806,0.008616,0.000565,0.003875,-0.005059
False Discovery Rate Difference (FDRD),0.032453,-0.064643,-0.176344,0.099052,0.020293,-0.127575
Positive Predicted Value (PPV),0.628272,0.628272,0.628272,0.628272,0.628272,0.628272
