# Computes Fairness/Bias metrics
Uses the AIF360 AI Fairness 360 toolkit to compute Fairness/Bias metrics

In [None]:
!pip3 install scikit-learn==0.24.1 aif360==0.3.0  tensorflow==2.4.0 nodejs==0.1.1 ipywidgets==7.6.3 lime==0.2.0.1 wget==3.2 #aix360==0.2.1 

In [None]:
import wget
wget.download(
    'https://raw.githubusercontent.com/'
    'elyra-ai/component-library/master/claimed_utils.py'
)

In [None]:
import os
from claimed_utils import unzip
import pandas as pd
from aif360.datasets import BinaryLabelDataset
import pickle
from aif360.metrics import BinaryLabelDatasetMetric

In [None]:
# @dependency codait_utils.ipynb
# @dependency metadata
# @param target_column Column name containing the target/prediction value
# (the real measured value)
# @param protected_column Protected column (like sex, race, age, ...)
# Note: column arrays not supported at the moment
# @param prediction_column Column name containing the prediction of the model
# @param unpriviledged_group_key value containted in the protected_column
# indicating a unpriviledged group (e.g. female)
# @param priviledged_group_key value containted in the protected_column
# indicating a priviledged group (e.g. male)
# @param priviledged_group_key value containted in the protected_column
# indicating a priviledged group (e.g. male)
# @param metadata csv file name of the data
# @param metric_output file name of the pickeled metric object
# @returns pickeled metric object

In [None]:
target_column = os.environ.get('target_column', 'target')
protected_column = os.environ.get('protected_column')
prediction_column = os.environ.get('prediction_column', 'prediction')
unpriviledged_group_key = os.environ.get('unpriviledged_group_key')
priviledged_group_key = os.environ.get('priviledged_group_key')
metadata = os.environ.get('metadata', 'metadata.csv')
metric_output = os.environ.get('metric_output', 'metric.pickle')

In [None]:
# !jupyter labextension install @jupyter-widgets/jupyterlab-manager

In [None]:
unzip('.', 'model.zip')
unzip('.', 'data.zip')

In [None]:
df = pd.read_csv(metadata)

In [None]:
df["protected_column_index"] = df[protected_column].apply(
    lambda x:
    list(df[protected_column].unique()).index(x)
)

df["missclassified"] = df.apply(
    lambda d:
    1 if d[target_column] != d[prediction_column] else 0, axis=1
)

In [None]:
unprivileged_groups = [
    {'protected_column_index':
     df.loc[
         df[protected_column] == unpriviledged_group_key
     ].iloc[0]['protected_column_index']}
]
privileged_groups = [
    {'protected_column_index':
     df.loc[
         df[protected_column] == priviledged_group_key
     ].iloc[0]['protected_column_index']}
]

favorable_label = 0
unfavorable_label = 1  # missclassified == True

df_for_aif360 = df[["protected_column_index", "missclassified"]]

df_for_aif360

In [None]:
aif360_dataset = BinaryLabelDataset(
    favorable_label=favorable_label,
    unfavorable_label=unfavorable_label,
    df=df_for_aif360,
    label_names=['missclassified'],
    protected_attribute_names=['protected_column_index'],
    unprivileged_protected_attributes=unprivileged_groups)

In [None]:
metric_orig_train = BinaryLabelDatasetMetric(
    aif360_dataset,
    unprivileged_groups=unprivileged_groups,
    privileged_groups=privileged_groups)

In [None]:
metric_orig_train.statistical_parity_difference()

In [None]:
metric_orig_train.smoothed_empirical_differential_fairness()

In [None]:
pickle.dump(metric_orig_train, open(metric_output, "wb"))