In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from aif360.datasets import GermanDataset
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.explainers import MetricTextExplainer
from aif360.algorithms.preprocessing import Reweighing
from aif360.algorithms.preprocessing import DisparateImpactRemover
from IPython.display import Markdown, display

  warn_deprecated('vmap', 'torch.vmap')


In [2]:
# 加载数据集，将性别和年龄设置为受保护属性
dataset_orig = GermanDataset(
    protected_attribute_names=['sex', 'age'],
    features_to_drop=['personal_status']        # 忽略与个人状态有关的属性
)

# 将数据集拆分为训练集和测试集
dataset_orig_train, dataset_orig_test = dataset_orig.split([0.7], shuffle=True)

# 定义特权组和非特权组
privileged_groups_sex = [{'sex': 1}]
unprivileged_groups_sex = [{'sex': 0}]
unprivileged_groups_age = [{'age': 0}]
privileged_groups_age = [{'age': 1}]

# 使用 BinaryLabelDatasetMetric 计算公平性指标
metric_orig_train_age = BinaryLabelDatasetMetric(dataset_orig_train, 
                                             unprivileged_groups=unprivileged_groups_age,
                                             privileged_groups=privileged_groups_age)
metric_orig_train_sex = BinaryLabelDatasetMetric(dataset_orig_train, 
                                             unprivileged_groups=unprivileged_groups_sex,
                                             privileged_groups=privileged_groups_sex)
display(Markdown("#### Original Training Dataset"))
print("Disparate Impact - Sex: ", metric_orig_train_sex.disparate_impact())
print("Statistical Parity Difference - Sex: ", metric_orig_train_sex.statistical_parity_difference())
print("Disparate Impact - Age: ", metric_orig_train_age.disparate_impact())
print("Statistical Parity Difference - Age: ", metric_orig_train_age.statistical_parity_difference())

#### Original Training Dataset

Disparate Impact - Sex:  0.9007460182245284
Statistical Parity Difference - Sex:  -0.07246786535489458
Disparate Impact - Age:  0.78020666371108
Statistical Parity Difference - Age:  -0.1626785804432329


In [3]:
di_repairer_sex = DisparateImpactRemover(repair_level=1.0, sensitive_attribute='sex')
di_repairer_age = DisparateImpactRemover(repair_level=1.0, sensitive_attribute='age')

dataset_transf_train_di_sex = di_repairer_sex.fit_transform(dataset_orig_train)
dataset_transf_train_di_age = di_repairer_age.fit_transform(dataset_orig_train)

metric_transf_train_di_aa = BinaryLabelDatasetMetric(dataset_transf_train_di_age, 
                                                  unprivileged_groups=unprivileged_groups_age,
                                                  privileged_groups=privileged_groups_age)
metric_transf_train_di_as = BinaryLabelDatasetMetric(dataset_transf_train_di_age, 
                                                  unprivileged_groups=unprivileged_groups_sex,
                                                  privileged_groups=privileged_groups_sex)
metric_transf_train_di_sa = BinaryLabelDatasetMetric(dataset_transf_train_di_sex, 
                                                  unprivileged_groups=unprivileged_groups_age,
                                                  privileged_groups=privileged_groups_age)
metric_transf_train_di_ss = BinaryLabelDatasetMetric(dataset_transf_train_di_age, 
                                                  unprivileged_groups=unprivileged_groups_sex,
                                                  privileged_groups=privileged_groups_sex)

display(Markdown("#### DI repaired dataset"))
print("DI repaired by age on age:")
print("Disparate Impact: ", metric_transf_train_di_aa.disparate_impact())
print("Statistical Parity Difference: ", metric_transf_train_di_aa.statistical_parity_difference())
print("DI repaired by age on sex:")
print("Disparate Impact: ", metric_transf_train_di_as.disparate_impact())
print("Statistical Parity Difference: ", metric_transf_train_di_as.statistical_parity_difference())
print("DI repaired by sex on age:")
print("Disparate Impact: ", metric_transf_train_di_sa.disparate_impact())
print("Statistical Parity Difference: ", metric_transf_train_di_sa.statistical_parity_difference())
print("DI repaired by sex on sex:")
print("Disparate Impact: ", metric_transf_train_di_ss.disparate_impact())
print("Statistical Parity Difference: ", metric_transf_train_di_ss.statistical_parity_difference())

#### DI repaired dataset

DI repaired by age on age:
Disparate Impact:  0.78020666371108
Statistical Parity Difference:  -0.1626785804432329
DI repaired by age on sex:
Disparate Impact:  0.9007460182245284
Statistical Parity Difference:  -0.07246786535489458
DI repaired by sex on age:
Disparate Impact:  0.78020666371108
Statistical Parity Difference:  -0.1626785804432329
DI repaired by sex on sex:
Disparate Impact:  0.9007460182245284
Statistical Parity Difference:  -0.07246786535489458


In [4]:
RW_age = Reweighing(unprivileged_groups=unprivileged_groups_age,
                privileged_groups=privileged_groups_age)
dataset_transf_train_age = RW_age.fit_transform(dataset_orig_train)

RW_sex = Reweighing(unprivileged_groups=unprivileged_groups_sex,
                privileged_groups=privileged_groups_sex)
dataset_transf_train_sex = RW_sex.fit_transform(dataset_orig_train)

metric_transf_train_aa = BinaryLabelDatasetMetric(dataset_transf_train_age, 
                                               unprivileged_groups=unprivileged_groups_age,
                                               privileged_groups=privileged_groups_age)
metric_transf_train_as = BinaryLabelDatasetMetric(dataset_transf_train_age, 
                                               unprivileged_groups=unprivileged_groups_sex,
                                               privileged_groups=privileged_groups_sex)

metric_transf_train_sa = BinaryLabelDatasetMetric(dataset_transf_train_sex, 
                                               unprivileged_groups=unprivileged_groups_age,
                                               privileged_groups=privileged_groups_age)

metric_transf_train_ss = BinaryLabelDatasetMetric(dataset_transf_train_sex, 
                                               unprivileged_groups=unprivileged_groups_sex,
                                               privileged_groups=privileged_groups_sex)

display(Markdown("#### Reweighed dataset"))
print("Reweighted by age on age:")
print("Disparate Impact: ", metric_transf_train_aa.disparate_impact())
print("Statistical Parity Difference: ", metric_transf_train_aa.statistical_parity_difference())
print("Reweighted by age on sex:")
print("Disparate Impact: ", metric_transf_train_as.disparate_impact())
print("Statistical Parity Difference: ", metric_transf_train_as.statistical_parity_difference())
print("Reweighted by sex on age:")
print("Disparate Impact: ", metric_transf_train_sa.disparate_impact())
print("Statistical Parity Difference: ", metric_transf_train_sa.statistical_parity_difference())
print("Reweighted by sex on sex:")
print("Disparate Impact: ", metric_transf_train_ss.disparate_impact())
print("Statistical Parity Difference: ", metric_transf_train_ss.statistical_parity_difference())

#### Reweighed dataset

Reweighted by age on age:
Disparate Impact:  1.0000000000000002
Statistical Parity Difference:  1.1102230246251565e-16
Reweighted by age on sex:
Disparate Impact:  0.9447281477881262
Statistical Parity Difference:  -0.03978543479618435
Reweighted by sex on age:
Disparate Impact:  0.8074539422665871
Statistical Parity Difference:  -0.1416818480272991
Reweighted by sex on sex:
Disparate Impact:  1.0000000000000002
Statistical Parity Difference:  1.1102230246251565e-16
