In [75]:
import aif360
import pandas as pd
from aif360.datasets import BinaryLabelDataset
from aif360.metrics import ClassificationMetric

In [76]:
df = pd.read_csv('data/dataframe_final.csv')
protected_attributes = ['GENDER', 'LANGUAGE', 'INSURANCE', 'RELIGION', 'ETHNICITY', 'AGE']
df

Unnamed: 0,SUBJECT_ID,Y_PRED,Y_TRUE,GENDER,LANGUAGE,INSURANCE,RELIGION,ETHNICITY,AGE
0,10011,1,1,0,1,1,1,0,1
1,10026,0,0,0,1,1,0,1,0
2,10030,0,0,1,1,1,1,1,0
3,10042,0,0,1,1,1,1,1,0
4,10094,0,0,1,1,1,1,0,0
...,...,...,...,...,...,...,...,...,...
3231,9970,0,0,1,1,1,1,1,1
3232,9977,0,0,1,1,1,1,1,0
3233,99863,0,0,1,0,1,1,1,1
3234,99883,0,0,1,1,1,1,1,0


In [77]:
for protected_attribute in protected_attributes:
    # prepare the dataset in the format that aif360 requires
    ytrue_df = df['Y_TRUE'].to_frame()
    ytrue_df.rename(columns={0: 'Y_TRUE'}, inplace=True)
    attribute_df = df[protected_attribute].to_frame()
    attribute_df.rename(columns={0: protected_attribute}, inplace=True)
    attribute_df = pd.concat([attribute_df, ytrue_df], axis=1)

    # create the aif360 dataset with the ground truth
    dataset = BinaryLabelDataset(
        favorable_label=1,
        unfavorable_label=0,
        df=attribute_df,
        label_names=['Y_TRUE'],
        protected_attribute_names=[protected_attribute]
    )

    # create the aif360 dataset with the predictions
    dataset_pred = dataset.copy()
    dataset_pred.labels = df['Y_PRED']

    # creates an object that computes metrics for binary classification
    index = dataset_pred.protected_attribute_names.index(protected_attribute)
    privileged_groups = [{protected_attribute: dataset_pred.privileged_protected_attributes[index][0]}]
    unprivileged_groups = [{protected_attribute: dataset_pred.unprivileged_protected_attributes[index][0]}]
    classified_metric = ClassificationMetric(dataset, dataset_pred, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)

    # print the metrics
    print("For ", protected_attribute, " :")
    print("Error rate: ", classified_metric.error_rate())
    print("Error rate difference: ", classified_metric.error_rate_difference())
    print("Error rate ratio: ", classified_metric.error_rate_ratio())
    print("False omission rate: ", classified_metric.false_omission_rate())
    print("False omission rate difference: ", classified_metric.false_omission_rate_difference())
    print("False omission rate ratio: ", classified_metric.false_omission_rate_ratio())
    print("---------------------------------------------")

For  GENDER  :
Error rate:  0.10043263288009885
Error rate difference:  0.015614395546898363
Error rate ratio:  1.1670182666534308
False omission rate:  0.08341543513957307
False omission rate difference:  0.012607172328908914
False omission rate ratio:  1.1619830626502237
---------------------------------------------
For  LANGUAGE  :
Error rate:  0.10043263288009885
Error rate difference:  0.008343244151104434
Error rate ratio:  1.0836290836290838
False omission rate:  0.08341543513957307
False omission rate difference:  0.012209092270952848
False omission rate ratio:  1.1480947036502591
---------------------------------------------
For  INSURANCE  :
Error rate:  0.10043263288009885
Error rate difference:  -0.017291666666666594
Error rate ratio:  0.828157349896481
False omission rate:  0.08341543513957307
False omission rate difference:  -0.019093691803840143
False omission rate ratio:  0.7716333845366103
---------------------------------------------
For  RELIGION  :
Error rate:  0.10