In [1]:
import numpy as np

def get_intersectional_groups(priv_bool1, priv_bool2):
    dis_bool1 = np.logical_not(priv_bool1)
    dis_bool2 = np.logical_not(priv_bool2)
    
    priv_priv = np.logical_and(priv_bool1, priv_bool2)
    priv_dis  = np.logical_and(priv_bool1, dis_bool2)
    dis_priv  = np.logical_and(dis_bool1,  priv_bool2)
    dis_dis   = np.logical_and(dis_bool1,  dis_bool2)
    
    return {
        "priv_priv": priv_priv.sum(),
        "priv_dis":  priv_dis.sum(),
        "dis_priv":  dis_priv.sum(),
        "dis_dis":   dis_dis.sum(),
    }

In [2]:
%pip install prettytable

In [3]:
from prettytable import PrettyTable

def draw_table(dataset, attr1, attr2, counts):
    x = PrettyTable()
    x.field_names = [dataset, f"{attr1} priv", f"{attr1} dis", "total"]
    x.add_row([f"{attr2} priv", counts["priv_priv"], counts["dis_priv"], counts["priv_priv"] + counts["dis_priv"]])
    x.add_row([f"{attr2} dis", counts["priv_dis"], counts["dis_dis"], counts["priv_dis"] + counts["dis_dis"]])
    x.add_row(["total", counts["priv_priv"] + counts["priv_dis"], counts["dis_priv"] + counts["dis_dis"], counts["priv_priv"] + counts["priv_dis"] + counts["dis_priv"] + counts["dis_dis"]])
    print(x)

# Age threshold to partition `cardio` dataset

In [4]:
import pandas as pd

In [5]:
cardio_raw = pd.read_csv("data/cardio/cardio.csv", sep=";")

In [6]:
for age_in_years in [40, 55, 65]:
    cardio_age_threshold = age_in_years * 365.25  # age in days
    counts = get_intersectional_groups(cardio_raw["gender"] == 2, cardio_raw["age"] > cardio_age_threshold)
    draw_table(f"CARDIO (age@{age_in_years})", "gender", "age", counts)

# Intersection with sex in `GermanCredit` dataset

In [7]:
german_columns = ["status", "month", "credit_history", "purpose", "credit_amount", "savings",
                  "employment", "investment_as_income_percentage", "personal_status",
                  "other_debtors", "residence_since", "property", "age",
                  "installment_plans", "housing", "number_of_credits",
                  "skill_level", "people_liable_for", "telephone",
                  "foreign_worker", "credit"]
german_raw = pd.read_csv("data/german/german.data", sep=" ", header=None, names=german_columns,
                         na_values=["A65", "A124"])

In [8]:
german_raw.head()

In [9]:
counts = get_intersectional_groups(german_raw["age"] > 25, german_raw["personal_status"].isin(["A91", "A93", "A94"]))
draw_table("GERMAN", "age", "sex", counts)

# Intersection with `foreign_worker` in `GermanCredit` dataset

In [10]:
counts = get_intersectional_groups(german_raw["age"] > 25, german_raw["foreign_worker"] == "A202")
draw_table("GERMAN", "age", "foreign_worker", counts)