In [None]:
import numpy as np
np.random.seed(0)
import pandas as pd
from IPython.display import Markdown, display
import sys
sys.path.insert(1, "/kaggle/input/aif360/")


from aif360.metrics import *
from aif360.algorithms.preprocessing import *
from aif360.datasets import GermanDataset
from aif360.explainers import MetricTextExplainer, MetricJSONExplainer

In [None]:
!pip install inFairness

Collecting inFairness
  Downloading inFairness-0.2.2-py3-none-any.whl (38 kB)
Collecting functorch~=0.1.1
  Downloading functorch-0.1.1-cp37-cp37m-manylinux1_x86_64.whl (21.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.4/21.4 MB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0mm
[?25hCollecting POT>=0.8.0
  Downloading POT-0.8.2-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (664 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.9/664.9 kB[0m [31m20.1 MB/s[0m eta [36m0:00:00[0m
Collecting torch>=1.11.0
  Downloading torch-1.11.0-cp37-cp37m-manylinux1_x86_64.whl (750.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m750.6/750.6 MB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: torch, POT, functorch, inFairness
  Attempting uninstall: torch
    Found existing installation: torch 1.13.0+cpu
    Uninstalling torch-1.13.0+cpu:
      Successfully uninstal

# Load dataset and specify protected attribute

In [None]:
dataset_org = GermanDataset(
    protected_attribute_names=['age'],           
    privileged_classes=[lambda x: x >= 25], 
    features_to_drop=['personal_status', 'sex']
)

privileged_groups = [{'age': 1}]
unprivileged_groups = [{'age': 0}]

In [None]:
dataset_org

               instance weights features                \
                                                         
                                   month credit_amount   
instance names                                           
0                           1.0      6.0        1169.0   
1                           1.0     48.0        5951.0   
2                           1.0     12.0        2096.0   
3                           1.0     42.0        7882.0   
4                           1.0     24.0        4870.0   
...                         ...      ...           ...   
995                         1.0     12.0        1736.0   
996                         1.0     30.0        3857.0   
997                         1.0     12.0         804.0   
998                         1.0     45.0        1845.0   
999                         1.0     45.0        4576.0   

                                                                \
                                                               

# Computing fairness metrics on the Original dataset

In [None]:
metric_otr = BinaryLabelDatasetMetric(dataset_org, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)

text_exp_otr = MetricTextExplainer(metric_otr) 
json_exp_otr = MetricJSONExplainer(metric_otr) 
display(Markdown("#### Original training dataset"))
display(Markdown("##### The training dataset: Statistical parity difference"))
print(text_exp_otr.statistical_parity_difference() + "\n") 
print(json_exp_otr.statistical_parity_difference() + "\n")

display(Markdown("##### The training dataset: Disparate impact"))
print(text_exp_otr.disparate_impact() + "\n") 
print(json_exp_otr.disparate_impact() + "\n")

# print("The training dataset: Average odds difference\n")
# print(text_exp_otr.average_odds_difference() + "\n") 
# print(json_exp_otr.average_odds_difference() + "\n")

display(Markdown("##### The training dataset: Consistency"))
print(text_exp_otr.consistency(5) + "\n") 
print(json_exp_otr.consistency(5) + "\n")

#### Original training dataset

##### The training dataset: Statistical parity difference

Statistical parity difference (probability of favorable outcome for unprivileged instances - probability of favorable outcome for privileged instances): -0.12854990969960323

{"metric": "Statistical Parity Difference", "message": "Statistical parity difference (probability of favorable outcome for unprivileged instances - probability of favorable outcome for privileged instances): -0.12854990969960323", "numPositivesUnprivileged": 88.0, "numInstancesUnprivileged": 149.0, "numPositivesPrivileged": 612.0, "numInstancesPrivileged": 851.0, "description": "Computed as the difference of the rate of favorable outcomes received by the unprivileged group to the privileged group.", "ideal": " The ideal value of this metric is 0"}



##### The training dataset: Disparate impact

Disparate impact (probability of favorable outcome for unprivileged instances / probability of favorable outcome for privileged instances): 0.8212484098784929

{"metric": "Disparate Impact", "message": "Disparate impact (probability of favorable outcome for unprivileged instances / probability of favorable outcome for privileged instances): 0.8212484098784929", "numPositivePredictionsUnprivileged": 88.0, "numUnprivileged": 149.0, "numPositivePredictionsPrivileged": 612.0, "numPrivileged": 851.0, "description": "Computed as the ratio of rate of favorable outcome for the unprivileged group to that of the privileged group.", "ideal": "The ideal value of this metric is 1.0 A value < 1 implies higher benefit for the privileged group and a value >1 implies a higher benefit for the unprivileged group."}



##### The training dataset: Consistency

Consistency (Zemel, et al. 2013): [0.6814]

{"metric": "Consistency", "message": "Consistency (Zemel, et al. 2013): [0.6814]", "description": "Individual fairness metric from Zemel, Rich, et al. \"Learning fair representations.\", ICML 2013. Measures how similar the labels are for similar instances.", "ideal": "The ideal value of this metric is 1.0"}



# Mitigate bias by transforming using Reweighing algorithm

In [None]:
RW = Reweighing(unprivileged_groups=unprivileged_groups,
                privileged_groups=privileged_groups)
dataset_transf = RW.fit_transform(dataset_org)

# Computing fairness metrics on the Transformed dataset

In [None]:
metric_transf = BinaryLabelDatasetMetric(dataset_transf, 
                                               unprivileged_groups=unprivileged_groups,
                                               privileged_groups=privileged_groups)
display(Markdown("#### Transformed training dataset"))

metric_otr = BinaryLabelDatasetMetric(dataset_transf, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)

text_exp_otr = MetricTextExplainer(metric_otr) 
json_exp_otr = MetricJSONExplainer(metric_otr) 
display(Markdown("##### The training dataset: Statistical parity difference"))
print(text_exp_otr.statistical_parity_difference() + "\n") 
print(json_exp_otr.statistical_parity_difference() + "\n")

display(Markdown("##### The training dataset: Disparate impact"))
print(text_exp_otr.disparate_impact() + "\n") 
print(json_exp_otr.disparate_impact() + "\n")

# print("The training dataset: Average odds difference\n")
# print(text_exp_otr.average_odds_difference() + "\n") 
# print(json_exp_otr.average_odds_difference() + "\n")

display(Markdown("##### The training dataset: Consistency"))
print(text_exp_otr.consistency(5) + "\n") 
print(json_exp_otr.consistency(5) + "\n")

#### Transformed training dataset

##### The training dataset: Statistical parity difference

Statistical parity difference (probability of favorable outcome for unprivileged instances - probability of favorable outcome for privileged instances): 1.1102230246251565e-16

{"metric": "Statistical Parity Difference", "message": "Statistical parity difference (probability of favorable outcome for unprivileged instances - probability of favorable outcome for privileged instances): 1.1102230246251565e-16", "numPositivesUnprivileged": 104.30000000000001, "numInstancesUnprivileged": 149.00000000000003, "numPositivesPrivileged": 595.7, "numInstancesPrivileged": 851.0000000000002, "description": "Computed as the difference of the rate of favorable outcomes received by the unprivileged group to the privileged group.", "ideal": " The ideal value of this metric is 0"}



##### The training dataset: Disparate impact

Disparate impact (probability of favorable outcome for unprivileged instances / probability of favorable outcome for privileged instances): 1.0000000000000002

{"metric": "Disparate Impact", "message": "Disparate impact (probability of favorable outcome for unprivileged instances / probability of favorable outcome for privileged instances): 1.0000000000000002", "numPositivePredictionsUnprivileged": 104.30000000000001, "numUnprivileged": 149.00000000000003, "numPositivePredictionsPrivileged": 595.7, "numPrivileged": 851.0000000000002, "description": "Computed as the ratio of rate of favorable outcome for the unprivileged group to that of the privileged group.", "ideal": "The ideal value of this metric is 1.0 A value < 1 implies higher benefit for the privileged group and a value >1 implies a higher benefit for the unprivileged group."}



##### The training dataset: Consistency

Consistency (Zemel, et al. 2013): [0.6814]

{"metric": "Consistency", "message": "Consistency (Zemel, et al. 2013): [0.6814]", "description": "Individual fairness metric from Zemel, Rich, et al. \"Learning fair representations.\", ICML 2013. Measures how similar the labels are for similar instances.", "ideal": "The ideal value of this metric is 1.0"}

