In [1]:
import pandas as pd
from aif360.datasets import (
    AdultDataset,
    CompasDataset,
    BankDataset,
    GermanDataset,
    MEPSDataset21,
)

from aif360.metrics import BinaryLabelDatasetMetric


indices = [[
    "adult-sex",
    "adult-race",
    "compas-sex",
    "compas-race",
    "bank-age",
    "german-sex",
    "german-age",
    "meps-race",
],
["None", "Privileged", "Unprivileged"]]

numbers = pd.DataFrame(
    columns=["num_instances", "num_positives", "num_negatives"],
    index=pd.MultiIndex.from_product(indices, names=["dataset", "condition"])
)

pip install 'aif360[LawSchoolGPA]'
pip install 'aif360[AdversarialDebiasing]'
pip install 'aif360[AdversarialDebiasing]'
pip install 'aif360[Reductions]'
pip install 'aif360[Reductions]'
pip install 'aif360[Reductions]'


# adult-sex

In [2]:
data = AdultDataset(
    protected_attribute_names=["sex"],
    privileged_classes=[["Male"]],
    features_to_drop=["race"],
)
p, u = [{"sex": 1}], [{"sex": 0}]
metrics = BinaryLabelDatasetMetric(
    dataset=data,
    privileged_groups=p,
    unprivileged_groups=u,
)

numbers.loc[("adult-sex", "None"), "num_instances"] = metrics.num_instances(privileged=None)
numbers.loc[("adult-sex", "Privileged"), "num_instances"] = metrics.num_instances(privileged=True)
numbers.loc[("adult-sex", "Unprivileged"), "num_instances"] = metrics.num_instances(privileged=False)

numbers.loc[("adult-sex", "None"), "num_positives"] = metrics.num_positives(privileged=None)
numbers.loc[("adult-sex", "Privileged"), "num_positives"] = metrics.num_positives(privileged=True)
numbers.loc[("adult-sex", "Unprivileged"), "num_positives"] = metrics.num_positives(privileged=False)

numbers.loc[("adult-sex", "None"), "num_negatives"] = metrics.num_negatives(privileged=None)
numbers.loc[("adult-sex", "Privileged"), "num_negatives"] = metrics.num_negatives(privileged=True)
numbers.loc[("adult-sex", "Unprivileged"), "num_negatives"] = metrics.num_negatives(privileged=False)



# adult-race

In [3]:
data = AdultDataset(
    protected_attribute_names=["race"],
    privileged_classes=[["White"]],
    features_to_drop=["sex"],
)
p, u = [{"race": 1}], [{"race": 0}]
metrics = BinaryLabelDatasetMetric(
    dataset=data,
    privileged_groups=p,
    unprivileged_groups=u,
)

numbers.loc[("adult-race", "None"), "num_instances"] = metrics.num_instances(privileged=None)
numbers.loc[("adult-race", "Privileged"), "num_instances"] = metrics.num_instances(privileged=True)
numbers.loc[("adult-race", "Unprivileged"), "num_instances"] = metrics.num_instances(privileged=False)

numbers.loc[("adult-race", "None"), "num_positives"] = metrics.num_positives(privileged=None)
numbers.loc[("adult-race", "Privileged"), "num_positives"] = metrics.num_positives(privileged=True)
numbers.loc[("adult-race", "Unprivileged"), "num_positives"] = metrics.num_positives(privileged=False)

numbers.loc[("adult-race", "None"), "num_negatives"] = metrics.num_negatives(privileged=None)
numbers.loc[("adult-race", "Privileged"), "num_negatives"] = metrics.num_negatives(privileged=True)
numbers.loc[("adult-race", "Unprivileged"), "num_negatives"] = metrics.num_negatives(privileged=False)



# compas-sex

In [4]:
data = CompasDataset(
    protected_attribute_names=["sex"],
    privileged_classes=[["Female"]],
    features_to_drop=["race"],
)
p, u = [{"sex": 1}], [{"sex": 0}]
metrics = BinaryLabelDatasetMetric(
    dataset=data,
    privileged_groups=p,
    unprivileged_groups=u,
)

numbers.loc[("compas-sex", "None"), "num_instances"] = metrics.num_instances(privileged=None)
numbers.loc[("compas-sex", "Privileged"), "num_instances"] = metrics.num_instances(privileged=True)
numbers.loc[("compas-sex", "Unprivileged"), "num_instances"] = metrics.num_instances(privileged=False)

numbers.loc[("compas-sex", "None"), "num_positives"] = metrics.num_positives(privileged=None)
numbers.loc[("compas-sex", "Privileged"), "num_positives"] = metrics.num_positives(privileged=True)
numbers.loc[("compas-sex", "Unprivileged"), "num_positives"] = metrics.num_positives(privileged=False)

numbers.loc[("compas-sex", "None"), "num_negatives"] = metrics.num_negatives(privileged=None)
numbers.loc[("compas-sex", "Privileged"), "num_negatives"] = metrics.num_negatives(privileged=True)
numbers.loc[("compas-sex", "Unprivileged"), "num_negatives"] = metrics.num_negatives(privileged=False)



# compas-race

In [5]:
data = CompasDataset(
    protected_attribute_names=["race"],
    privileged_classes=[["Caucasian"]],
    features_to_drop=["sex"],
)
p, u = [{"race": 1}], [{"race": 0}]
metrics = BinaryLabelDatasetMetric(
    dataset=data,
    privileged_groups=p,
    unprivileged_groups=u,
)

numbers.loc[("compas-race", "None"), "num_instances"] = metrics.num_instances(privileged=None)
numbers.loc[("compas-race", "Privileged"), "num_instances"] = metrics.num_instances(privileged=True)
numbers.loc[("compas-race", "Unprivileged"), "num_instances"] = metrics.num_instances(privileged=False)

numbers.loc[("compas-race", "None"), "num_positives"] = metrics.num_positives(privileged=None)
numbers.loc[("compas-race", "Privileged"), "num_positives"] = metrics.num_positives(privileged=True)
numbers.loc[("compas-race", "Unprivileged"), "num_positives"] = metrics.num_positives(privileged=False)

numbers.loc[("compas-race", "None"), "num_negatives"] = metrics.num_negatives(privileged=None)
numbers.loc[("compas-race", "Privileged"), "num_negatives"] = metrics.num_negatives(privileged=True)
numbers.loc[("compas-race", "Unprivileged"), "num_negatives"] = metrics.num_negatives(privileged=False)



# bank-age

In [6]:
data = BankDataset(
    protected_attribute_names=["age"],
    privileged_classes=[lambda x: x > 25],
)
p, u = [{"age": 1}], [{"age": 0}]
metrics = BinaryLabelDatasetMetric(
    dataset=data,
    privileged_groups=p,
    unprivileged_groups=u,
)

numbers.loc[("bank-age", "None"), "num_instances"] = metrics.num_instances(privileged=None)
numbers.loc[("bank-age", "Privileged"), "num_instances"] = metrics.num_instances(privileged=True)
numbers.loc[("bank-age", "Unprivileged"), "num_instances"] = metrics.num_instances(privileged=False)

numbers.loc[("bank-age", "None"), "num_positives"] = metrics.num_positives(privileged=None)
numbers.loc[("bank-age", "Privileged"), "num_positives"] = metrics.num_positives(privileged=True)
numbers.loc[("bank-age", "Unprivileged"), "num_positives"] = metrics.num_positives(privileged=False)

numbers.loc[("bank-age", "None"), "num_negatives"] = metrics.num_negatives(privileged=None)
numbers.loc[("bank-age", "Privileged"), "num_negatives"] = metrics.num_negatives(privileged=True)
numbers.loc[("bank-age", "Unprivileged"), "num_negatives"] = metrics.num_negatives(privileged=False)



# german-sex

In [7]:
data = GermanDataset(
    protected_attribute_names=["sex"],
    privileged_classes=[["male"]],
    features_to_drop=["age", "personal_status"],
)
p, u = [{"sex": 1}], [{"sex": 0}]
metrics = BinaryLabelDatasetMetric(
    dataset=data,
    privileged_groups=p,
    unprivileged_groups=u,
)

numbers.loc[("german-sex", "None"), "num_instances"] = metrics.num_instances(privileged=None)
numbers.loc[("german-sex", "Privileged"), "num_instances"] = metrics.num_instances(privileged=True)
numbers.loc[("german-sex", "Unprivileged"), "num_instances"] = metrics.num_instances(privileged=False)

numbers.loc[("german-sex", "None"), "num_positives"] = metrics.num_positives(privileged=None)
numbers.loc[("german-sex", "Privileged"), "num_positives"] = metrics.num_positives(privileged=True)
numbers.loc[("german-sex", "Unprivileged"), "num_positives"] = metrics.num_positives(privileged=False)

numbers.loc[("german-sex", "None"), "num_negatives"] = metrics.num_negatives(privileged=None)
numbers.loc[("german-sex", "Privileged"), "num_negatives"] = metrics.num_negatives(privileged=True)
numbers.loc[("german-sex", "Unprivileged"), "num_negatives"] = metrics.num_negatives(privileged=False)

# german-age

In [8]:
data = GermanDataset(
    protected_attribute_names=["age"],
    privileged_classes=[lambda x: x > 25],
    features_to_drop=["sex", "personal_status"]
)
p, u = [{"age": 1}], [{"age": 0}]
metrics = BinaryLabelDatasetMetric(
    dataset=data,
    privileged_groups=p,
    unprivileged_groups=u,
)

numbers.loc[("german-age", "None"), "num_instances"] = metrics.num_instances(privileged=None)
numbers.loc[("german-age", "Privileged"), "num_instances"] = metrics.num_instances(privileged=True)
numbers.loc[("german-age", "Unprivileged"), "num_instances"] = metrics.num_instances(privileged=False)

numbers.loc[("german-age", "None"), "num_positives"] = metrics.num_positives(privileged=None)
numbers.loc[("german-age", "Privileged"), "num_positives"] = metrics.num_positives(privileged=True)
numbers.loc[("german-age", "Unprivileged"), "num_positives"] = metrics.num_positives(privileged=False)

numbers.loc[("german-age", "None"), "num_negatives"] = metrics.num_negatives(privileged=None)
numbers.loc[("german-age", "Privileged"), "num_negatives"] = metrics.num_negatives(privileged=True)
numbers.loc[("german-age", "Unprivileged"), "num_negatives"] = metrics.num_negatives(privileged=False)

# meps-race

In [9]:
data = MEPSDataset21(
    protected_attribute_names=["RACE"],
    privileged_classes=[["White"]],
)
# p, u = [{"RACE": 1}], [{"RACE": 0}]
# metrics = BinaryLabelDatasetMetric(
#     dataset=data,
#     privileged_groups=p,
#     unprivileged_groups=u,
# )

meps, _ = data.convert_to_dataframe()

# there is some bug in the AIF360 library, it returns floats for
# the num_* metrics for the MEPS dataset. I calculate the metrics
# manually.
numbers.loc[("meps-race", "None"), "num_instances"] = meps.shape[0]
numbers.loc[("meps-race", "Privileged"), "num_instances"] = meps[meps.RACE == 1].shape[0]
numbers.loc[("meps-race", "Unprivileged"), "num_instances"] = meps[meps.RACE == 0].shape[0]

numbers.loc[("meps-race", "None"), "num_positives"] = meps[meps.UTILIZATION == 1].shape[0]
numbers.loc[("meps-race", "Privileged"), "num_positives"] = meps[(meps.RACE == 1) & (meps.UTILIZATION == 1)].shape[0]
numbers.loc[("meps-race", "Unprivileged"), "num_positives"] = meps[(meps.RACE == 0) & (meps.UTILIZATION == 1)].shape[0]

numbers.loc[("meps-race", "None"), "num_negatives"] = meps[meps.UTILIZATION == 0].shape[0]
numbers.loc[("meps-race", "Privileged"), "num_negatives"] = meps[(meps.RACE == 1) & (meps.UTILIZATION == 0)].shape[0]
numbers.loc[("meps-race", "Unprivileged"), "num_negatives"] = meps[(meps.RACE == 0) & (meps.UTILIZATION == 0)].shape[0]

In [10]:
numbers

Unnamed: 0_level_0,Unnamed: 1_level_0,num_instances,num_positives,num_negatives
dataset,condition,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
adult-sex,,45222.0,11208.0,34014.0
adult-sex,Privileged,30527.0,9539.0,20988.0
adult-sex,Unprivileged,14695.0,1669.0,13026.0
adult-race,,45222.0,11208.0,34014.0
adult-race,Privileged,38903.0,10207.0,28696.0
adult-race,Unprivileged,6319.0,1001.0,5318.0
compas-sex,,6167.0,3358.0,2809.0
compas-sex,Privileged,1173.0,760.0,413.0
compas-sex,Unprivileged,4994.0,2598.0,2396.0
compas-race,,6167.0,3358.0,2809.0


In [11]:
numbers.to_clipboard()