In [1]:
import aif360
import pandas as pd
import numpy as np
from aif360.datasets import BinaryLabelDataset
from aif360.metrics import ClassificationMetric

pip install 'aif360[LawSchoolGPA]'
pip install 'aif360[Reductions]'
pip install 'aif360[Reductions]'
pip install 'aif360[Reductions]'


In [2]:
protected_attributes = ['Age', 'Gender', 'Occupation']
metrics = ['Error Rate Difference (ERD)', 'False Omission Rate Difference (FORD)', 'Statistical Parity Difference (SPD)', 'Negative Predicted Value Difference (NPVD)']

In [3]:
# create the table to be printed
df_print = pd.DataFrame(columns=protected_attributes, index=[metrics])

In [4]:
from sklearn.preprocessing import LabelEncoder

def preprocess_dataframe(df):
    # Group all Master students in one category

    df['Occupation'] = np.where(df['Occupation']=="MSc Electrical Engineering","MSc", df['Occupation'])
    df['Occupation'] = np.where(df['Occupation']=="Master MKE", "MSc", df['Occupation'])
    df['Occupation'] = np.where(df['Occupation']=="MSc Technische Informatica", "MSc", df['Occupation'])

    df['Occupation'] = np.where(df['Occupation']=="Computer Engineering", "student", df['Occupation'])
    df['Occupation'] = np.where(df['Occupation']=="Physics", "student", df['Occupation'])
    df['Occupation'] = np.where(df['Occupation']=="Information Science", "student", df['Occupation'])


    df['Occupation'] = np.where(df['Occupation']=="Icelandic", "other", df['Occupation'])
    df['Occupation'] = np.where(df['Occupation']=="Photography", "other", df['Occupation'])
    df['Occupation'] = np.where(df['Occupation']=="Technische bestuurskunde", "other", df['Occupation'])

    # Group all PhD students in one category

    df['Occupation'] = np.where(df['Occupation']=="PhD informatics", "PhD", df['Occupation'])
    df['Occupation'] = np.where(df['Occupation']=="student (Phd?)", "PhD", df['Occupation'])
    df['Occupation'] = np.where(df['Occupation']=="PhD candidate", "PhD", df['Occupation'])

    le = LabelEncoder()

    df['Occupation'] = le.fit_transform(df['Occupation'])
    df['Gender'] = le.fit_transform(df['Gender'])
    
    return df

## Models trained without protected attributes

### Generic Model

In [5]:
df = pd.read_csv('Output_Files/SWELL_Generic_Model.csv')
preprocess_dataframe(df)
df

Unnamed: 0,PP,y_true,y_pred,Age,Gender,Occupation
0,PP1,0,0,27,1,1
1,PP1,0,1,27,1,1
2,PP1,0,0,27,1,1
3,PP1,0,1,27,1,1
4,PP1,0,1,27,1,1
...,...,...,...,...,...,...
1013,PP16,1,1,27,0,1
1014,PP16,1,1,27,0,1
1015,PP16,1,1,27,0,1
1016,PP16,1,1,27,0,1


In [13]:
for protected_attribute in protected_attributes:
    # prepare the dataset in the format that aif360 requires
    ytrue_df = df['y_true'].to_frame()
    ytrue_df.rename(columns={0: 'y_true'}, inplace=True)
    attribute_df = df[protected_attribute].to_frame()
    attribute_df.rename(columns={0: protected_attribute}, inplace=True)
    attribute_df = pd.concat([attribute_df, ytrue_df], axis=1)

    # create the aif360 dataset with the ground truth
    dataset = BinaryLabelDataset(
        favorable_label=1,
        unfavorable_label=0,
        df=attribute_df,
        label_names=['y_true'],
        protected_attribute_names=[protected_attribute]
    )

    # create the aif360 dataset with the predictions
    dataset_pred = dataset.copy()
    dataset_pred.labels = df['y_pred']

    # creates an object that computes metrics for binary classification
    index = dataset_pred.protected_attribute_names.index(protected_attribute)
    privileged_groups = [{protected_attribute: dataset_pred.privileged_protected_attributes[index][0]}]
    unprivileged_groups = [{protected_attribute: dataset_pred.unprivileged_protected_attributes[index][0]}]
    classified_metric = ClassificationMetric(dataset, dataset_pred, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)

    # fill in the metrics
    df_print[protected_attribute] = [classified_metric.error_rate_difference(), 
                                     classified_metric.false_omission_rate_difference(), 
                                     classified_metric.statistical_parity_difference(), 
                                     classified_metric.negative_predictive_value(privileged=False) - classified_metric.negative_predictive_value(privileged=True)]
df_print

Unnamed: 0,Age,Gender,Occupation
Error Rate Difference (ERD),-0.155497,-0.113362,-0.109225
False Omission Rate Difference (FORD),-0.288138,-0.299172,-0.137787
Statistical Parity Difference (SPD),0.277696,0.376754,0.031159
Negative Predicted Value Difference (NPVD),0.288138,0.299172,0.137787


### User-Based Splitting

In [5]:
df = pd.read_csv('Output_Files/SWELL_User_Based_Splitting.csv')
preprocess_dataframe(df)
df

Unnamed: 0,PP,y_true,y_pred,Age,Gender,Occupation
0,PP1,0,1,27,1,3
1,PP1,1,1,27,1,3
2,PP1,1,1,27,1,3
3,PP1,1,1,27,1,3
4,PP1,1,1,27,1,3
...,...,...,...,...,...,...
792,PP9,0,1,28,1,1
793,PP9,1,0,28,1,1
794,PP9,1,1,28,1,1
795,PP9,1,1,28,1,1


In [6]:
for protected_attribute in protected_attributes:
    # prepare the dataset in the format that aif360 requires
    ytrue_df = df['y_true'].to_frame()
    ytrue_df.rename(columns={0: 'y_true'}, inplace=True)
    attribute_df = df[protected_attribute].to_frame()
    attribute_df.rename(columns={0: protected_attribute}, inplace=True)
    attribute_df = pd.concat([attribute_df, ytrue_df], axis=1)

    # create the aif360 dataset with the ground truth
    dataset = BinaryLabelDataset(
        favorable_label=1,
        unfavorable_label=0,
        df=attribute_df,
        label_names=['y_true'],
        protected_attribute_names=[protected_attribute]
    )

    # create the aif360 dataset with the predictions
    dataset_pred = dataset.copy()
    dataset_pred.labels = df['y_pred']

    # creates an object that computes metrics for binary classification
    index = dataset_pred.protected_attribute_names.index(protected_attribute)
    privileged_groups = [{protected_attribute: dataset_pred.privileged_protected_attributes[index][0]}]
    unprivileged_groups = [{protected_attribute: dataset_pred.unprivileged_protected_attributes[index][0]}]
    classified_metric = ClassificationMetric(dataset, dataset_pred, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)

    # fill in the metrics
    df_print[protected_attribute] = [classified_metric.error_rate_difference(), 
                                     classified_metric.false_omission_rate_difference(), 
                                     classified_metric.statistical_parity_difference(), 
                                     classified_metric.negative_predictive_value(privileged=False) - classified_metric.negative_predictive_value(privileged=True)]
df_print

Unnamed: 0,Age,Gender,Occupation
Error Rate Difference (ERD),0.064336,0.01586,0.054119
False Omission Rate Difference (FORD),-0.15873,0.048062,0.007735
Statistical Parity Difference (SPD),0.147319,-0.094538,0.057318
Negative Predicted Value Difference (NPVD),0.15873,-0.048062,-0.007735


### Single Attribute Splitting

In [14]:
df = pd.read_csv('Output_Files/SWELL_Single_Attribute_Splitting.csv')
preprocess_dataframe(df)
df

Unnamed: 0,id,y_true,y_pred,Age,Gender,Occupation
0,PP13,1,0,26,1,1
1,PP13,0,0,26,1,1
2,PP13,0,0,26,1,1
3,PP13,0,0,26,1,1
4,PP13,0,1,26,1,1
...,...,...,...,...,...,...
1430,PP17,1,0,38,0,1
1431,PP17,1,1,38,0,1
1432,PP17,0,1,38,0,1
1433,PP17,0,1,38,0,1


In [15]:
for protected_attribute in protected_attributes:
    # prepare the dataset in the format that aif360 requires
    ytrue_df = df['y_true'].to_frame()
    ytrue_df.rename(columns={0: 'y_true'}, inplace=True)
    attribute_df = df[protected_attribute].to_frame()
    attribute_df.rename(columns={0: protected_attribute}, inplace=True)
    attribute_df = pd.concat([attribute_df, ytrue_df], axis=1)

    # create the aif360 dataset with the ground truth
    dataset = BinaryLabelDataset(
        favorable_label=1,
        unfavorable_label=0,
        df=attribute_df,
        label_names=['y_true'],
        protected_attribute_names=[protected_attribute]
    )

    # create the aif360 dataset with the predictions
    dataset_pred = dataset.copy()
    dataset_pred.labels = df['y_pred']

    # creates an object that computes metrics for binary classification
    index = dataset_pred.protected_attribute_names.index(protected_attribute)
    privileged_groups = [{protected_attribute: dataset_pred.privileged_protected_attributes[index][0]}]
    unprivileged_groups = [{protected_attribute: dataset_pred.unprivileged_protected_attributes[index][0]}]
    classified_metric = ClassificationMetric(dataset, dataset_pred, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)

    # fill in the metrics
    df_print[protected_attribute] = [classified_metric.error_rate_difference(), 
                                     classified_metric.false_omission_rate_difference(), 
                                     classified_metric.statistical_parity_difference(), 
                                     classified_metric.negative_predictive_value(privileged=False) - classified_metric.negative_predictive_value(privileged=True)]
df_print

Unnamed: 0,Age,Gender,Occupation
Error Rate Difference (ERD),-0.241836,-0.058484,0.008288
False Omission Rate Difference (FORD),-0.7,-0.068848,0.039934
Statistical Parity Difference (SPD),0.020814,-0.016993,-0.265376
Negative Predicted Value Difference (NPVD),0.7,0.068848,-0.039934


### Multi Attribute Splitting

In [16]:
df = pd.read_csv('Output_Files/SWELL_Multi_Attribute_Splitting.csv')
preprocess_dataframe(df)
df

Unnamed: 0,id,y_true,y_pred,Age,Gender,Occupation
0,PP24,0,1,22,1,0
1,PP24,0,1,22,1,0
2,PP24,0,1,22,1,0
3,PP24,0,1,22,1,0
4,PP24,0,1,22,1,0
...,...,...,...,...,...,...
1169,PP9,1,0,28,1,1
1170,PP9,0,0,28,1,1
1171,PP9,1,0,28,1,1
1172,PP9,1,0,28,1,1


In [17]:
for protected_attribute in protected_attributes:
    # prepare the dataset in the format that aif360 requires
    ytrue_df = df['y_true'].to_frame()
    ytrue_df.rename(columns={0: 'y_true'}, inplace=True)
    attribute_df = df[protected_attribute].to_frame()
    attribute_df.rename(columns={0: protected_attribute}, inplace=True)
    attribute_df = pd.concat([attribute_df, ytrue_df], axis=1)

    # create the aif360 dataset with the ground truth
    dataset = BinaryLabelDataset(
        favorable_label=1,
        unfavorable_label=0,
        df=attribute_df,
        label_names=['y_true'],
        protected_attribute_names=[protected_attribute]
    )

    # create the aif360 dataset with the predictions
    dataset_pred = dataset.copy()
    dataset_pred.labels = df['y_pred']

    # creates an object that computes metrics for binary classification
    index = dataset_pred.protected_attribute_names.index(protected_attribute)
    privileged_groups = [{protected_attribute: dataset_pred.privileged_protected_attributes[index][0]}]
    unprivileged_groups = [{protected_attribute: dataset_pred.unprivileged_protected_attributes[index][0]}]
    classified_metric = ClassificationMetric(dataset, dataset_pred, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)

    # fill in the metrics
    df_print[protected_attribute] = [classified_metric.error_rate_difference(), 
                                     classified_metric.false_omission_rate_difference(), 
                                     classified_metric.statistical_parity_difference(), 
                                     classified_metric.negative_predictive_value(privileged=False) - classified_metric.negative_predictive_value(privileged=True)]
df_print

Unnamed: 0,Age,Gender,Occupation
Error Rate Difference (ERD),0.718921,0.005697,0.033792
False Omission Rate Difference (FORD),0.595238,-0.014563,0.317622
Statistical Parity Difference (SPD),-0.237175,-0.116691,0.561864
Negative Predicted Value Difference (NPVD),-0.595238,0.014563,-0.317622


### Fuzzy Splitting

In [18]:
df = pd.read_csv('Output_Files/SWELL_Fuzzy_Splitting.csv')
preprocess_dataframe(df)
df

Unnamed: 0,id,y_true,y_pred,Age,Gender,Occupation
0,PP1,1,0,27,1,1
1,PP1,1,0,27,1,1
2,PP1,0,0,27,1,1
3,PP13,0,0,26,1,0
4,PP13,1,0,26,1,0
...,...,...,...,...,...,...
661,PP13,1,0,26,1,0
662,PP13,0,1,26,1,0
663,PP13,1,0,26,1,0
664,PP13,1,1,26,1,0


In [19]:
for protected_attribute in protected_attributes:
    # prepare the dataset in the format that aif360 requires
    ytrue_df = df['y_true'].to_frame()
    ytrue_df.rename(columns={0: 'y_true'}, inplace=True)
    attribute_df = df[protected_attribute].to_frame()
    attribute_df.rename(columns={0: protected_attribute}, inplace=True)
    attribute_df = pd.concat([attribute_df, ytrue_df], axis=1)

    # create the aif360 dataset with the ground truth
    dataset = BinaryLabelDataset(
        favorable_label=1,
        unfavorable_label=0,
        df=attribute_df,
        label_names=['y_true'],
        protected_attribute_names=[protected_attribute]
    )

    # create the aif360 dataset with the predictions
    dataset_pred = dataset.copy()
    dataset_pred.labels = df['y_pred']

    # creates an object that computes metrics for binary classification
    index = dataset_pred.protected_attribute_names.index(protected_attribute)
    privileged_groups = [{protected_attribute: dataset_pred.privileged_protected_attributes[index][0]}]
    unprivileged_groups = [{protected_attribute: dataset_pred.unprivileged_protected_attributes[index][0]}]
    classified_metric = ClassificationMetric(dataset, dataset_pred, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)

    # fill in the metrics
    df_print[protected_attribute] = [classified_metric.error_rate_difference(), 
                                     classified_metric.false_omission_rate_difference(), 
                                     classified_metric.statistical_parity_difference(), 
                                     classified_metric.negative_predictive_value(privileged=False) - classified_metric.negative_predictive_value(privileged=True)]
df_print

Unnamed: 0,Age,Gender,Occupation
Error Rate Difference (ERD),0.279528,0.040037,0.024031
False Omission Rate Difference (FORD),0.159148,0.042099,0.073864
Statistical Parity Difference (SPD),0.130249,-0.027545,0.064496
Negative Predicted Value Difference (NPVD),-0.159148,-0.042099,-0.073864


## Models trained including protected attributes

### Generic Model

In [29]:
df = pd.read_csv('Output_Files/SWELL_Generic_Model_Bias.csv')
preprocess_dataframe(df)
df

Unnamed: 0,PP,y_true,y_pred,Age,Gender,Occupation
0,PP1,0,1,27,1,1
1,PP1,0,1,27,1,1
2,PP1,0,1,27,1,1
3,PP1,0,1,27,1,1
4,PP1,0,1,27,1,1
...,...,...,...,...,...,...
1013,PP16,1,1,27,0,1
1014,PP16,1,1,27,0,1
1015,PP16,1,1,27,0,1
1016,PP16,1,1,27,0,1


In [30]:
for protected_attribute in protected_attributes:
    # prepare the dataset in the format that aif360 requires
    ytrue_df = df['y_true'].to_frame()
    ytrue_df.rename(columns={0: 'y_true'}, inplace=True)
    attribute_df = df[protected_attribute].to_frame()
    attribute_df.rename(columns={0: protected_attribute}, inplace=True)
    attribute_df = pd.concat([attribute_df, ytrue_df], axis=1)

    # create the aif360 dataset with the ground truth
    dataset = BinaryLabelDataset(
        favorable_label=1,
        unfavorable_label=0,
        df=attribute_df,
        label_names=['y_true'],
        protected_attribute_names=[protected_attribute]
    )

    # create the aif360 dataset with the predictions
    dataset_pred = dataset.copy()
    dataset_pred.labels = df['y_pred']

    # creates an object that computes metrics for binary classification
    index = dataset_pred.protected_attribute_names.index(protected_attribute)
    privileged_groups = [{protected_attribute: dataset_pred.privileged_protected_attributes[index][0]}]
    unprivileged_groups = [{protected_attribute: dataset_pred.unprivileged_protected_attributes[index][0]}]
    classified_metric = ClassificationMetric(dataset, dataset_pred, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)

    # fill in the metrics
    df_print[protected_attribute] = [classified_metric.error_rate_difference(), 
                                     classified_metric.false_omission_rate_difference(), 
                                     classified_metric.statistical_parity_difference(), 
                                     classified_metric.negative_predictive_value(privileged=False) - classified_metric.negative_predictive_value(privileged=True)]
df_print

Unnamed: 0,Age,Gender,Occupation
Error Rate Difference (ERD),-0.000549,-0.007252,0.00646
False Omission Rate Difference (FORD),0.0,0.0,0.0
Statistical Parity Difference (SPD),0.0,0.0,0.0
Negative Predicted Value Difference (NPVD),0.0,0.0,0.0


### User-Based Splitting

In [7]:
df = pd.read_csv('Output_Files/SWELL_User_Based_Splitting_Bias.csv')
preprocess_dataframe(df)
df

Unnamed: 0,PP,y_true,y_pred,Age,Gender,Occupation
0,PP1,1,0,27,1,3
1,PP1,1,1,27,1,3
2,PP1,1,1,27,1,3
3,PP1,1,1,27,1,3
4,PP1,1,0,27,1,3
...,...,...,...,...,...,...
792,PP9,1,0,28,1,1
793,PP9,1,0,28,1,1
794,PP9,1,1,28,1,1
795,PP9,1,1,28,1,1


In [8]:
for protected_attribute in protected_attributes:
    # prepare the dataset in the format that aif360 requires
    ytrue_df = df['y_true'].to_frame()
    ytrue_df.rename(columns={0: 'y_true'}, inplace=True)
    attribute_df = df[protected_attribute].to_frame()
    attribute_df.rename(columns={0: protected_attribute}, inplace=True)
    attribute_df = pd.concat([attribute_df, ytrue_df], axis=1)

    # create the aif360 dataset with the ground truth
    dataset = BinaryLabelDataset(
        favorable_label=1,
        unfavorable_label=0,
        df=attribute_df,
        label_names=['y_true'],
        protected_attribute_names=[protected_attribute]
    )

    # create the aif360 dataset with the predictions
    dataset_pred = dataset.copy()
    dataset_pred.labels = df['y_pred']

    # creates an object that computes metrics for binary classification
    index = dataset_pred.protected_attribute_names.index(protected_attribute)
    privileged_groups = [{protected_attribute: dataset_pred.privileged_protected_attributes[index][0]}]
    unprivileged_groups = [{protected_attribute: dataset_pred.unprivileged_protected_attributes[index][0]}]
    classified_metric = ClassificationMetric(dataset, dataset_pred, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)

    # fill in the metrics
    df_print[protected_attribute] = [classified_metric.error_rate_difference(), 
                                     classified_metric.false_omission_rate_difference(), 
                                     classified_metric.statistical_parity_difference(), 
                                     classified_metric.negative_predictive_value(privileged=False) - classified_metric.negative_predictive_value(privileged=True)]
df_print

Unnamed: 0,Age,Gender,Occupation
Error Rate Difference (ERD),0.048951,0.033838,-0.008931
False Omission Rate Difference (FORD),0.025253,0.045168,0.024527
Statistical Parity Difference (SPD),0.206993,-0.088442,-0.01073
Negative Predicted Value Difference (NPVD),-0.025253,-0.045168,-0.024527


### Single Attribute Splitting

In [22]:
df = pd.read_csv('Output_Files/SWELL_Single_Attribute_Splitting_Bias.csv')
preprocess_dataframe(df)
df

Unnamed: 0,id,y_true,y_pred,Age,Gender,Occupation
0,PP13,1,0,26,1,1
1,PP13,0,0,26,1,1
2,PP13,0,0,26,1,1
3,PP13,0,0,26,1,1
4,PP13,0,1,26,1,1
...,...,...,...,...,...,...
1430,PP17,1,0,38,0,1
1431,PP17,1,1,38,0,1
1432,PP17,0,1,38,0,1
1433,PP17,0,1,38,0,1


In [23]:
for protected_attribute in protected_attributes:
    # prepare the dataset in the format that aif360 requires
    ytrue_df = df['y_true'].to_frame()
    ytrue_df.rename(columns={0: 'y_true'}, inplace=True)
    attribute_df = df[protected_attribute].to_frame()
    attribute_df.rename(columns={0: protected_attribute}, inplace=True)
    attribute_df = pd.concat([attribute_df, ytrue_df], axis=1)

    # create the aif360 dataset with the ground truth
    dataset = BinaryLabelDataset(
        favorable_label=1,
        unfavorable_label=0,
        df=attribute_df,
        label_names=['y_true'],
        protected_attribute_names=[protected_attribute]
    )

    # create the aif360 dataset with the predictions
    dataset_pred = dataset.copy()
    dataset_pred.labels = df['y_pred']

    # creates an object that computes metrics for binary classification
    index = dataset_pred.protected_attribute_names.index(protected_attribute)
    privileged_groups = [{protected_attribute: dataset_pred.privileged_protected_attributes[index][0]}]
    unprivileged_groups = [{protected_attribute: dataset_pred.unprivileged_protected_attributes[index][0]}]
    classified_metric = ClassificationMetric(dataset, dataset_pred, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)

    # fill in the metrics
    df_print[protected_attribute] = [classified_metric.error_rate_difference(), 
                                     classified_metric.false_omission_rate_difference(), 
                                     classified_metric.statistical_parity_difference(), 
                                     classified_metric.negative_predictive_value(privileged=False) - classified_metric.negative_predictive_value(privileged=True)]
df_print

Unnamed: 0,Age,Gender,Occupation
Error Rate Difference (ERD),-0.148324,0.063463,-0.028413
False Omission Rate Difference (FORD),-0.519737,0.061167,-0.011779
Statistical Parity Difference (SPD),0.021303,0.07715,-0.175248
Negative Predicted Value Difference (NPVD),0.519737,-0.061167,0.011779


### Multi Attribute Splitting

In [5]:
df = pd.read_csv('Output_Files/SWELL_Multi_Attribute_Splitting_Bias.csv')
preprocess_dataframe(df)
df

Unnamed: 0,id,y_true,y_pred,Age,Gender,Occupation
0,PP24,0,1,22,1,0
1,PP24,0,1,22,1,0
2,PP24,0,1,22,1,0
3,PP24,0,1,22,1,0
4,PP24,0,1,22,1,0
...,...,...,...,...,...,...
1169,PP9,1,0,28,1,1
1170,PP9,1,0,28,1,1
1171,PP9,0,1,28,1,1
1172,PP9,1,1,28,1,1


In [7]:
for protected_attribute in protected_attributes:
    # prepare the dataset in the format that aif360 requires
    ytrue_df = df['y_true'].to_frame()
    ytrue_df.rename(columns={0: 'y_true'}, inplace=True)
    attribute_df = df[protected_attribute].to_frame()
    attribute_df.rename(columns={0: protected_attribute}, inplace=True)
    attribute_df = pd.concat([attribute_df, ytrue_df], axis=1)

    # create the aif360 dataset with the ground truth
    dataset = BinaryLabelDataset(
        favorable_label=1,
        unfavorable_label=0,
        df=attribute_df,
        label_names=['y_true'],
        protected_attribute_names=[protected_attribute]
    )

    # create the aif360 dataset with the predictions
    dataset_pred = dataset.copy()
    dataset_pred.labels = df['y_pred']

    # creates an object that computes metrics for binary classification
    index = dataset_pred.protected_attribute_names.index(protected_attribute)
    privileged_groups = [{protected_attribute: dataset_pred.privileged_protected_attributes[index][0]}]
    unprivileged_groups = [{protected_attribute: dataset_pred.unprivileged_protected_attributes[index][0]}]
    classified_metric = ClassificationMetric(dataset, dataset_pred, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)

    # fill in the metrics
    df_print[protected_attribute] = [classified_metric.error_rate_difference(), 
                                     classified_metric.false_omission_rate_difference(), 
                                     classified_metric.statistical_parity_difference(), 
                                     classified_metric.negative_predictive_value(privileged=False) - classified_metric.negative_predictive_value(privileged=True)]
df_print

Unnamed: 0,Age,Gender,Occupation
Error Rate Difference (ERD),0.434264,-0.264734,0.113599
False Omission Rate Difference (FORD),0.0,-0.256947,0.39491
Statistical Parity Difference (SPD),0.424242,-0.156692,0.398623
Negative Predicted Value Difference (NPVD),-1.0,0.256947,-0.39491


### Fuzzy Splitting

In [26]:
df = pd.read_csv('Output_Files/SWELL_Fuzzy_Splitting_Bias.csv')
preprocess_dataframe(df)
df

Unnamed: 0,id,y_true,y_pred,Age,Gender,Occupation
0,PP1,1,0,27,1,1
1,PP1,1,1,27,1,1
2,PP1,0,0,27,1,1
3,PP13,0,0,26,1,0
4,PP13,1,0,26,1,0
...,...,...,...,...,...,...
661,PP13,1,0,26,1,0
662,PP13,0,1,26,1,0
663,PP13,1,0,26,1,0
664,PP13,1,1,26,1,0


In [28]:
for protected_attribute in protected_attributes:
    # prepare the dataset in the format that aif360 requires
    ytrue_df = df['y_true'].to_frame()
    ytrue_df.rename(columns={0: 'y_true'}, inplace=True)
    attribute_df = df[protected_attribute].to_frame()
    attribute_df.rename(columns={0: protected_attribute}, inplace=True)
    attribute_df = pd.concat([attribute_df, ytrue_df], axis=1)

    # create the aif360 dataset with the ground truth
    dataset = BinaryLabelDataset(
        favorable_label=1,
        unfavorable_label=0,
        df=attribute_df,
        label_names=['y_true'],
        protected_attribute_names=[protected_attribute]
    )

    # create the aif360 dataset with the predictions
    dataset_pred = dataset.copy()
    dataset_pred.labels = df['y_pred']

    # creates an object that computes metrics for binary classification
    index = dataset_pred.protected_attribute_names.index(protected_attribute)
    privileged_groups = [{protected_attribute: dataset_pred.privileged_protected_attributes[index][0]}]
    unprivileged_groups = [{protected_attribute: dataset_pred.unprivileged_protected_attributes[index][0]}]
    classified_metric = ClassificationMetric(dataset, dataset_pred, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)

    # fill in the metrics
    df_print[protected_attribute] = [classified_metric.error_rate_difference(), 
                                     classified_metric.false_omission_rate_difference(), 
                                     classified_metric.statistical_parity_difference(), 
                                     classified_metric.negative_predictive_value(privileged=False) - classified_metric.negative_predictive_value(privileged=True)]
df_print

Unnamed: 0,Age,Gender,Occupation
Error Rate Difference (ERD),0.196194,0.024012,-0.034729
False Omission Rate Difference (FORD),0.119048,0.047241,0.007476
Statistical Parity Difference (SPD),0.213583,0.144122,-0.048682
Negative Predicted Value Difference (NPVD),-0.119048,-0.047241,-0.007476
