# Adult Dataset, aka Census income

https://archive.ics.uci.edu/ml/datasets/adult

In [28]:
import os

import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy
from matplotlib.ticker import PercentFormatter
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import KNNImputer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import ShuffleSplit
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import LabelEncoder, OrdinalEncoder, StandardScaler
from sklearn.tree import DecisionTreeClassifier

plt.style.use('default')

In [2]:
features = [
    'age',
    'workclass',
    'fnlwgt',       # removed
    'education',    # sorted later on
    'education-num',
    'marital-status',
    'occupation',
    'relationship',
    'race',
    'sex',
    'capital-gain',
    'capital-loss',
    'hours-per-week',
    'native-country',
]
dataset = pd.read_csv('data/adult.data', sep=', ', na_values=['?', ' ?'],
                      header=0, names=features + ['income'])
dataset.drop(columns=['fnlwgt'], inplace=True)
features.remove('fnlwgt')

plots_dir = os.path.join('out', 'plots', 'case_study', 'census_income')
os.makedirs(plots_dir, exist_ok=True)

dataset.head()

  dataset = pd.read_csv('data/adult.data', sep=', ', na_values=['?', ' ?'],


Unnamed: 0,age,workclass,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income
0,50,Self-emp-not-inc,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,13,United-States,<=50K
1,38,Private,HS-grad,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,<=50K
2,53,Private,11th,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,40,United-States,<=50K
3,28,Private,Bachelors,13,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,40,Cuba,<=50K
4,37,Private,Masters,14,Married-civ-spouse,Exec-managerial,Wife,White,Female,0,0,40,United-States,<=50K


## Group and class sizes

groups: sex
class: income (1: >=50k)

In [3]:
# decide on the split size

print(
    dataset.shape,
    dataset[dataset['sex'] == 'Female'].shape[0],
    dataset[dataset['sex'] != 'Female'].shape[0],
    dataset[dataset['income'] != '>50K'].shape[0],
    dataset[dataset['income'] == '>50K'].shape[0],
    # dataset[(dataset['income'] == '>50K') & (dataset['sex'] == 'Female')].shape,
    sep='\n'
)

print('\n\n')

# check set intersections
conditions_g = [
    (dataset['sex'] == 'Female', 'F'),
    (dataset['sex'] == 'Male', 'M'),
]

conditions_i = [
    (dataset['income'] == '<=50K', '0'),
    (dataset['income'] == '>50K', '1'),
]

for c_g, g in conditions_g:
    for c_i, i in conditions_i:
        print(f'{g} {i}: {dataset[c_g & c_i].shape[0]}')

(32560, 14)
10771
21789
24719
7841



F 0: 9592
F 1: 1179
M 0: 15127
M 1: 6662


## different approaches for creating subsets

### split approach 1: "ladies first"
1. sample the needed number of women
1. calculate the number of rows of each class that we got in this way, and replace if there's too many of one class
1. calculate how many men of each class we need to get the desired ratio
1. sample the men (separately for each class) and join all the selected samples

In [4]:
# [majority, minority]
sex = ['Male', 'Female']
income = ['<=50K', '>50K']

def split_03(dset, n, gr, ir):
    cls_n = [int(n * (1 - ir)), int(n * ir)]
    sex_n = [int(n * (1 - gr)), int(n * gr)]

    # decide ratio of poor/rich women, based on the ratio from original data
    og0 = dset[(dset['sex'] == sex[1]) & (dset['income'] == income[0])].shape[0] / dset[dset['sex'] == sex[1]].shape[0]
    # og1 = dset[(dset['sex'] == sex[1]) & (dset['income'] == income[1])].shape[0] / dset.shape[0]
    print(og0)

    f0 = round(sex_n[1] * og0)
    f1 = sex_n[1] - f0
    print('f: ', f0, f1)

    # sample women
    df = pd.concat([
        dset[(dset['sex'] == sex[1]) & (dset['income'] == income[0])].sample(n=f0, random_state=2137),
        dset[(dset['sex'] == sex[1]) & (dset['income'] == income[1])].sample(n=f1, random_state=2137),
    ])
    print('f: ', f0, f1)
    assert f0 + f1 == sex_n[1]

    # sample men
    m0 = cls_n[0] - f0
    m1 = cls_n[1] - f1
    print("m: ", m0, m1)
    assert m0 + m1 == sex_n[0]

    df_m0 = dset[
        (dset['sex'] == sex[0]) & (dset['income'] == income[0])
        ].sample(n=m0, random_state=2137)
    df_m1 = dset[
        (dset['sex'] == sex[0]) & (dset['income'] == income[1])
        ].sample(n=m1, random_state=2137)

    # print(df_m0.shape, df_m1.shape)
    df = pd.concat([df, df_m0, df_m1])

    return df

### split approach 2: set ratios in advance

1. decide on number of f0, f1, m0, m1
2. sample accordingly

In [5]:
n = dataset.shape[0]
gr = ir = .5

# [majority, minority]
sex = ['Male', 'Female']
income = ['<=50K', '>50K']


def split_02(df, n, gr, ir, group_swap=False, cls_swap=False):
    """
    :param df: original data
    :param n: final size of the sample
    :param cls_swap: if you want to swap the groups (i.e. get `gr` men and not women)
    :param group_swap: if want to swap the decision classes (i.e. get `ir`of the poor and not the rich)
    :return: the sample
    """
    data_len = df.shape[0]
    # set ratios of sex and income
    f0 = round(n * gr * (1 - ir))
    f1 = round(n * gr * ir)
    m0 = round(n * (1 - gr) * (1 - ir))
    m1 = round(n * (1 - gr) * ir)

    if group_swap:
        f0, f1, m0, m1 = m0, m1, f0, f1
    if cls_swap:
        f0, f1, m0, m1 = f1, f0, m1, m0
    print(f0, f1, m0, m1)

    sample = pd.concat([
        df[(df['sex'] == sex[1]) & (df['income'] == income[0])].sample(n=int(f0), random_state=2137),
        df[(df['sex'] == sex[1]) & (df['income'] == income[1])].sample(n=int(f1), random_state=2137),
        df[(df['sex'] == sex[0]) & (df['income'] == income[0])].sample(n=int(m0), random_state=2137),
        df[(df['sex'] == sex[0]) & (df['income'] == income[1])].sample(n=int(m1), random_state=2137),
    ]).reset_index(drop=True)
    return sample


In [6]:
def split_04(dset, n, gr, ir):
    cls_n = [int(n * (1 - ir)), int(n * ir)]
    sex_n = [int(n * (1 - gr)), int(n * gr)]
    print('cls: ', cls_n)
    print('sex: ', sex_n)

    f1_max = min(cls_n[0], sex_n[1], dset[(dset['sex'] == sex[1]) & (dset['income'] == income[1])].shape[0])
    f1 = round(min(
        np.random.normal(f1_max / 2, f1_max / 5),
        f1_max,
        cls_n[1],
        sex_n[1],
    ) * .9)
    f0 = sex_n[1] - f1
    m0 = cls_n[0] - f0
    m1 = sex_n[0] - m0

    print(f0, f1, m0, m1)

    sample = pd.concat([
        dset[(dset['sex'] == sex[1]) & (dset['income'] == income[0])].sample(n=int(f0), random_state=2137),
        dset[(dset['sex'] == sex[1]) & (dset['income'] == income[1])].sample(n=int(f1), random_state=2137),
        dset[(dset['sex'] == sex[0]) & (dset['income'] == income[0])].sample(n=int(m0), random_state=2137),
        dset[(dset['sex'] == sex[0]) & (dset['income'] == income[1])].sample(n=int(m1), random_state=2137),
    ]).reset_index(drop=True)
    return sample

## preprocessing and helpers for classification/evaluation

In [7]:
categorical_fs = [
    'workclass',
    'education',
    'marital-status',
    'occupation',
    'relationship',
    'race',
    'sex',
    'native-country',
]

education_order = [
    'Preschool',
    '1st-4th',
    '5th-6th',
    '7th-8th',
    '9th',
    '10th',
    '11th',
    '12th',
    'HS-grad',
    'Some-college',
    'Assoc-acdm',
    'Assoc-voc',
    'Bachelors',
    'Masters',
    'Prof-school',
    'Doctorate',
]

# get the columns in the correct order
cols = np.concatenate([dataset.columns.copy(deep=True).drop(categorical_fs + ['income']), categorical_fs])
cols_d = {c: i for i, c in enumerate(cols)}

classifiers = [
    [RandomForestClassifier, {'random_state': 2137}],
    [DecisionTreeClassifier, {'random_state': 2137}],
    [GaussianNB, {}],
    [LogisticRegression, {}],
    [KNeighborsClassifier, {}],
]

In [8]:
def preprocess(dataset):
    X_all = dataset[features]
    y_all = LabelEncoder().fit_transform(dataset['income'])

    # encode categorical features
    data_encoder = OrdinalEncoder().fit(X_all[categorical_fs])
    X_categorical = data_encoder.transform(X_all[categorical_fs])

    edu_encoder = OrdinalEncoder(categories=[education_order]).fit(X_all[['education']])
    X_categorical[:, categorical_fs.index('education')] = edu_encoder.transform(X_all[['education']])[0]

    # finally, the features
    X_all = np.concatenate([X_all.drop(categorical_fs, axis=1), X_categorical], axis=1)
    # X_all[:5]

    return X_all, y_all

In [9]:
def calculate_fairness(clf, X, y, protected, group=1, cls=1):
    """
    :param protected: id/name of the protected attribute column
    :param group: id of the protected group
    :param cls: id of the positive class
    :return: dictionary of fairness metrics for the given classifier's results
    """
    y_pred = clf.predict(X)
    # columns: protected_value, y_true, y_pred
    labelled = np.concatenate([
        X[:, protected].reshape(-1, 1),
        y.reshape(-1, 1),
        y_pred.reshape(-1, 1)
    ], axis=1)

    # calculate confusion matrices
    cms = [None, None]

    # y true/pred for the protected group
    ys = labelled[labelled[:, 0] == group]
    cms[0] = confusion_matrix(ys[:, 1], ys[:, 2], labels=[0, 1])
    # ... and for the other (unprotected) group
    ys = labelled[labelled[:, 0] != group]
    cms[1] = confusion_matrix(ys[:, 1], ys[:, 2], labels=[0, 1])

    # print(*cms, sep='\n')

    # mj = majority - unprotected
    # mr = minority - protected
    mr, mj = group, 1 - group
    pos, neg = cls, 1 - cls

    # labels for the confusion matrix items
    tn = (neg, neg)
    fp = (neg, pos)
    fn = (pos, neg)
    tp = (pos, pos)

    # calculate fairness metrics
    fairness = dict()

    # Accuracy Equality Difference
    fairness['Accuracy Equality Difference'] = \
        (cms[mj].item(tp) + cms[mj].item(tn)) / cms[mj].sum() - \
        (cms[mr].item(tp) + cms[mr].item(tn)) / cms[mr].sum()

    # Equal Opportunity Difference: j_tpr - i_tpr
    try:
        fairness['Equal Opportunity Difference'] = \
            cms[mj].item(tp) / (cms[mj].item(tp) + cms[mj].item(fn)) - \
            cms[mr].item(tp) / (cms[mr].item(tp) + cms[mr].item(fn))
    except ZeroDivisionError:
        fairness['Equal Opportunity Difference'] = np.nan

    # Predictive Equality Difference: j_fpr - i_fpr
    try:
        fairness['Predictive Equality Difference'] = \
            cms[mj].item(fp) / (cms[mj].item(fp) + cms[mj].item(tn)) - \
            cms[mr].item(fp) / (cms[mr].item(fp) + cms[mr].item(tn))
    except ZeroDivisionError:
        fairness['Predictive Equality Difference'] = np.nan

    # Positive Predictive Parity Difference: j_ppv - i_ppv
    try:
        fairness['Positive Predictive Parity Difference'] = \
            cms[mj].item(tp) / (cms[mj].item(tp) + cms[mj].item(fp)) - \
            cms[mr].item(tp) / (cms[mr].item(tp) + cms[mr].item(fp))
    except ZeroDivisionError:
        fairness['Positive Predictive Parity Difference'] = np.nan

    # Negative Predictive Parity Difference: j_npv - i_npv
    try:
        fairness['Negative Predictive Parity Difference'] = \
            cms[mj].item(tn) / (cms[mj].item(tn) + cms[mj].item(fn)) - \
            cms[mr].item(tn) / (cms[mr].item(tn) + cms[mr].item(fn))
    except ZeroDivisionError:
        fairness['Negative Predictive Parity Difference'] = np.nan

    fairness['Statistical Parity Difference'] = \
        (cms[mj].item(tp) + cms[mj].item(fp)) / cms[mj].sum() - \
        (cms[mr].item(tp) + cms[mr].item(fp)) / cms[mr].sum()

    return fairness

In [21]:
# 'vibrant' scheme from https://personal.sron.nl/~pault/
colours = ['#EE7733', '#0077BB', '#33BBEE', '#EE3377', '#CC3311', '#009988']

# group by metric
def plot_fairness_gb_metric(fairness, gr, ir):
    fig, ax = plt.subplots(figsize=(12, 8))

    ax.set_title(f'Fairness metrics for different classifiers; GR = {gr}, IR = {ir}')
    ax.set_ylabel('Fairness metric value')

    metrics = ['\n'.join([' '.join(f.split(" ")[:2]), ' '.join(f.split(" ")[2:])])
               for f in fairness[list(fairness.keys())[0]].keys()]
    xticks = np.arange(len(metrics))
    width = 1. / (len(fairness.keys()) + 2)

    for i, (clf, f) in enumerate(fairness.items()):
        ax.bar(xticks + i * width, f.values(), width, label=clf.replace('Classifier', ''), color=colours[i])

    ax.set_xticks(xticks + width * len(fairness.keys()) / 2, metrics, rotation=45)
    ax.legend(ncols=1)
    plt.tight_layout()
    return fig

def plot_fairness_gb_clf(fairness, gr, ir):
    fig, ax = plt.subplots(figsize=(12, 8))

    ax.set_title(f'Fairness metrics for different classifiers; GR = {gr}, IR = {ir}')
    ax.set_ylabel('Fairness metric value')

    metrics = fairness[list(fairness.keys())[0]].keys()
    classifiers = [c.replace('Classifier', '') for c in fairness.keys()]
    xticks = np.arange(len(classifiers))
    width = 1. / (len(metrics) + 2)
    shift = np.arange(len(metrics)) * width

    for i, (clf, f) in enumerate(fairness.items()):
        ax.bar(i + shift, f.values(), width, color=colours[:len(metrics)])
    # ax.bar(xticks + shift, fairness, width)
    ax.set_xticks(xticks + width * len(metrics) / 2, classifiers)
    ax.legend(handles=[mpatches.Patch(color=c, label=m) for c, m in zip(colours, metrics)], ncol=1)
    plt.tight_layout()

    return fig

### Line graph: `fairness(ratio)`

In [12]:
# setup

# cv = RepeatedKFold(n_splits=2, n_repeats=15, random_state=2137)    # 30 splits
holdout = ShuffleSplit(n_splits=50, test_size=.33, random_state=2137)
SAMPLE_SIZE = 1100
split = split_02

rs = [.01, .02, .05] + [round(x, 2) for x in np.arange(.1, 1., .1)] + [.95, .98, .99]
# rs = [.02, .05] + [round(x, 2) for x in np.arange(.1, 1., .1)] + [.95, .98]
ratios = [[.5, ir] for ir in rs] + [[gr, .5] for gr in rs]

In [13]:
# calculations
# fairness_results_cv = {}
results = []

for gr, ir in ratios:
# for gr, ir in [[.5, .9], [.5, .01]]:
    print(f'GR: {gr}, IR: {ir}')

    # because we don't have enough rich women in the dataset
    # swap_gr = True if gr > .5 else False
    # swap_ir = True if ir > .5 else False
    swap_gr, swap_ir = False, False

    df = split(dataset, SAMPLE_SIZE, gr, ir, group_swap=swap_gr, cls_swap=swap_ir)
    X_all, y_all = preprocess(df)

    for i, (traini, testi) in enumerate(holdout.split(X_all)):
        X_train, X_test = X_all[traini], X_all[testi]
        y_train, y_test = y_all[traini], y_all[testi]

        for clf, kwargs in classifiers:
            pipe = make_pipeline(
                KNNImputer(),
                StandardScaler(),
                clf(**kwargs)
            ).fit(X_train, y_train)
            f = calculate_fairness(pipe, X_test, y_test, cols_d['sex'], group=1-int(swap_gr), cls=1-int(swap_ir))

            for metric, value in f.items():
                results.append([gr, ir, clf.__name__.replace('Classifier', ''), metric, value])

fairness_results_cv = pd.DataFrame(results, columns=['gr', 'ir', 'clf', 'metric', 'value'])

GR: 0.5, IR: 0.01
544 6 544 6
GR: 0.5, IR: 0.02
539 11 539 11
GR: 0.5, IR: 0.05
522 28 522 28
GR: 0.5, IR: 0.1
495 55 495 55
GR: 0.5, IR: 0.2
440 110 440 110
GR: 0.5, IR: 0.3
385 165 385 165
GR: 0.5, IR: 0.4
330 220 330 220
GR: 0.5, IR: 0.5
275 275 275 275
GR: 0.5, IR: 0.6
220 330 220 330
GR: 0.5, IR: 0.7
165 385 165 385
GR: 0.5, IR: 0.8
110 440 110 440
GR: 0.5, IR: 0.9
55 495 55 495
GR: 0.5, IR: 0.95
28 522 28 522
GR: 0.5, IR: 0.98
11 539 11 539
GR: 0.5, IR: 0.99
6 544 6 544
GR: 0.01, IR: 0.5
6 6 544 544
GR: 0.02, IR: 0.5
11 11 539 539
GR: 0.05, IR: 0.5
28 28 522 522
GR: 0.1, IR: 0.5
55 55 495 495
GR: 0.2, IR: 0.5
110 110 440 440
GR: 0.3, IR: 0.5
165 165 385 385
GR: 0.4, IR: 0.5
220 220 330 330
GR: 0.5, IR: 0.5
275 275 275 275
GR: 0.6, IR: 0.5
330 330 220 220
GR: 0.7, IR: 0.5
385 385 165 165
GR: 0.8, IR: 0.5
440 440 110 110
GR: 0.9, IR: 0.5
495 495 55 55
GR: 0.95, IR: 0.5
522 522 28 28
GR: 0.98, IR: 0.5
539 539 11 11
GR: 0.99, IR: 0.5
544 544 6 6


  (cms[mj].item(tp) + cms[mj].item(tn)) / cms[mj].sum() - \
  (cms[mj].item(tp) + cms[mj].item(fp)) / cms[mj].sum() - \
  (cms[mj].item(tp) + cms[mj].item(tn)) / cms[mj].sum() - \
  (cms[mj].item(tp) + cms[mj].item(fp)) / cms[mj].sum() - \
  (cms[mj].item(tp) + cms[mj].item(tn)) / cms[mj].sum() - \
  (cms[mj].item(tp) + cms[mj].item(fp)) / cms[mj].sum() - \
  (cms[mj].item(tp) + cms[mj].item(tn)) / cms[mj].sum() - \
  (cms[mj].item(tp) + cms[mj].item(fp)) / cms[mj].sum() - \
  (cms[mj].item(tp) + cms[mj].item(tn)) / cms[mj].sum() - \
  (cms[mj].item(tp) + cms[mj].item(fp)) / cms[mj].sum() - \


In [14]:
fairness_results_cv.head()

Unnamed: 0,gr,ir,clf,metric,value
0,0.5,0.01,RandomForest,Accuracy Equality Difference,-0.011846
1,0.5,0.01,RandomForest,Equal Opportunity Difference,0.0
2,0.5,0.01,RandomForest,Predictive Equality Difference,0.0
3,0.5,0.01,RandomForest,Positive Predictive Parity Difference,
4,0.5,0.01,RandomForest,Negative Predictive Parity Difference,-0.011846


In [29]:
def plot_line(fairness: pd.DataFrame, metric: str, ratio_type: str, fill='std', ylim=(-.5, .5)):
    fig, ax = plt.subplots(figsize=(12, 6))

    # ax.set_title(f'Value of *{metric}* for different {ratio_type.upper()}')
    ax.set_ylabel(metric)
    ax.set_xlabel(ratio_type.upper())

    metrics = fairness['metric'].unique()
    clfs = fairness['clf'].unique()
    ratios = sorted(fairness[ratio_type].unique())
    other_ratio = 'gr' if ratio_type == 'ir' else 'ir'
    mean, stdev, err = {}, {}, {}

    for r in ratios:
        for clf in clfs:
            subset = fairness[
                (fairness[ratio_type] == r) &
                (fairness['clf'] == clf) &
                (fairness[other_ratio] == .5) &
                (fairness['metric'] == metric) &
                fairness['value'].notna()
            ]
            mean[(r, clf)] = subset['value'].mean(skipna=True)
            stdev[(r, clf)] = subset['value'].std(skipna=True)
            err[(r, clf)] = scipy.stats.sem(subset['value'], nan_policy='omit')

    ax.axhline(0, color='black', linestyle='--', alpha=.3)

    for i, clf in enumerate(clfs):
        ax.plot(ratios, [mean[(r, clf)] for r in ratios], label=clf, color=colours[i], marker='o')
        if fill == 'err':
            ax.fill_between(ratios,
                            [mean[(r, clf)] - err[(r, clf)] for r in ratios],
                            [mean[(r, clf)] + err[(r, clf)] for r in ratios],
                            alpha=.15, color=colours[i])
        elif fill == 'std':
            ax.fill_between(ratios,
                            [mean[(r, clf)] - stdev[(r, clf)] for r in ratios],
                            [mean[(r, clf)] + stdev[(r, clf)] for r in ratios],
                            alpha=.15, color=colours[i])

    # if (metric == 'Negative Predictive Parity Difference' and ratio_type == 'ir') or \
    #     (metric == 'Statistical Parity Difference' and ratio_type == 'gr') or \
    #     metric == 'Predictive Equality Difference':
    #     ax.legend(loc=2)
    # else:
    #     ax.legend(loc=1)

    ax.legend(loc=9)
    ax.spines[['top', 'right']].set_visible(False)

    ax.set_xticks(ratios, ratios, rotation=90)
    ax.set_xlim(0, 1)
    if ylim:
        ax.set_ylim(*ylim)
    plt.tight_layout()
    return fig

In [30]:
for fill in ('std', 'err'):
    subdir = f'line_{fill}'
    os.makedirs(os.path.join(plots_dir, subdir), exist_ok=True)

    for ratio_type, ylim in [
        ('ir', (-.9, .9)),
        ('gr', (-.9, .9)),
        ]:
        for metric in fairness_results_cv['metric'].unique():
            fig = plot_line(fairness_results_cv, metric, ratio_type, ylim=ylim, fill=fill)
            fig.savefig(os.path.join(plots_dir, subdir, f'fairness_line_{ratio_type}_{metric}_rh.svg'))
            plt.close()


  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)
  arrmean = um.true_divide(arrmean, div, out=arrmean,


### plot absolute value of fairness metrics

In [31]:
def plot_line_abs(fairness: pd.DataFrame, metric: str, ratio_type: str, fill='std', ylim=None):
    fig, ax = plt.subplots(figsize=(12, 6))

    # ax.set_title(f'Value of *{metric}* for different {ratio_type.upper()}')
    ax.set_ylabel(metric)
    ax.set_xlabel(ratio_type.upper())

    metrics = fairness['metric'].unique()
    clfs = fairness['clf'].unique()
    ratios = sorted(fairness[ratio_type].unique())
    other_ratio = 'gr' if ratio_type == 'ir' else 'ir'
    mean, stdev, err = {}, {}, {}

    for r in ratios:
        for clf in clfs:
            subset = fairness[
                (fairness[ratio_type] == r) &
                (fairness['clf'] == clf) &
                (fairness[other_ratio] == .5) &
                (fairness['metric'] == metric) &
                fairness['value'].notna()
                ]
            mean[(r, clf)] = subset['value'].abs().mean(skipna=True)
            stdev[(r, clf)] = subset['value'].abs().std(skipna=True)
            err[(r, clf)] = scipy.stats.sem(subset['value'].abs(), nan_policy='omit')

    # ax.axhline(0, color='black', linestyle='--', alpha=.3)

    for i, clf in enumerate(clfs):
        ax.plot(ratios, [mean[(r, clf)] for r in ratios], label=clf, color=colours[i], marker='o')
        if fill == 'err':
            ax.fill_between(ratios,
                            [mean[(r, clf)] - err[(r, clf)] for r in ratios],
                            [mean[(r, clf)] + err[(r, clf)] for r in ratios],
                            alpha=.15, color=colours[i])
        elif fill == 'std':
            ax.fill_between(ratios,
                            [mean[(r, clf)] - stdev[(r, clf)] for r in ratios],
                            [mean[(r, clf)] + stdev[(r, clf)] for r in ratios],
                            alpha=.15, color=colours[i])

    # if (metric == 'Negative Predictive Parity Difference' and ratio_type == 'ir') or \
    #         (metric == 'Statistical Parity Difference' and ratio_type == 'gr') or \
    #         metric == 'Predictive Equality Difference':
    #     ax.legend(loc=2)
    # else:
    #     ax.legend(loc=1)
    ax.legend(loc=9)
    ax.spines[['top', 'right']].set_visible(False)

    ax.set_xticks(ratios, ratios, rotation=90)
    ax.set_xlim(0, 1)
    if ylim:
        ax.set_ylim(*ylim)
    plt.tight_layout()
    return fig

In [32]:
for fill in ('std', 'err'):
    subdir = f'line_abs_{fill}'
    os.makedirs(os.path.join(plots_dir, subdir), exist_ok=True)

    for ratio_type in ['ir', 'gr']:
        # ratio_type = 'ir'
        for metric in fairness_results_cv['metric'].unique():
            fig = plot_line_abs(fairness_results_cv, metric, ratio_type, ylim=(0, .6), fill=fill)
            fig.savefig(os.path.join(plots_dir, subdir, f'fairness_line_{ratio_type}_{metric}_{fill}_abs_rh.svg'))
            plt.close()

### count NaN

In [70]:
# count nan for each metric
fairness_results_cv.groupby(['metric', 'clf'])['value'].apply(lambda x: x.isna().sum() / x.shape[0])

metric                                 clf               
Accuracy Equality Difference           DecisionTree          0.000667
                                       GaussianNB            0.000667
                                       KNeighbors            0.000667
                                       LogisticRegression    0.000667
                                       RandomForest          0.000667
Equal Opportunity Difference           DecisionTree          0.010667
                                       GaussianNB            0.010667
                                       KNeighbors            0.010667
                                       LogisticRegression    0.010667
                                       RandomForest          0.010667
Negative Predictive Parity Difference  DecisionTree          0.016667
                                       GaussianNB            0.025333
                                       KNeighbors            0.108667
                                

In [73]:
# fairness_results_cv[
#     (fairness_results_cv['metric'] == 'Negative Predictive Parity Difference') |
#     (fairness_results_cv['metric'] == 'Positive Predictive Parity Difference')
# ].groupby(['metric', 'clf', 'gr'])['value'].apply(lambda x: x.isna().sum() / x.shape[0])
fairness_results_cv.groupby(['metric', 'clf', 'gr'])['value'].apply(lambda x: x.isna().sum() / x.shape[0])

metric                        clf           gr  
Accuracy Equality Difference  DecisionTree  0.01    0.00
                                            0.02    0.00
                                            0.05    0.00
                                            0.10    0.00
                                            0.20    0.00
                                                    ... 
Statistical Parity            RandomForest  0.80    0.00
                                            0.90    0.00
                                            0.95    0.00
                                            0.98    0.00
                                            0.99    0.02
Name: value, Length: 450, dtype: float64

In [74]:
# fairness_results_cv[
#     (fairness_results_cv['metric'] == 'Negative Predictive Parity Difference') |
#     (fairness_results_cv['metric'] == 'Positive Predictive Parity Difference')
#     ].groupby(['metric', 'clf', 'ir'])['value'].apply(lambda x: x.isna().sum() / x.shape[0])
fairness_results_cv.groupby(['metric', 'clf', 'ir'])['value'].apply(lambda x: x.isna().sum() / x.shape[0])

metric                        clf           ir  
Accuracy Equality Difference  DecisionTree  0.01    0.0
                                            0.02    0.0
                                            0.05    0.0
                                            0.10    0.0
                                            0.20    0.0
                                                   ... 
Statistical Parity            RandomForest  0.80    0.0
                                            0.90    0.0
                                            0.95    0.0
                                            0.98    0.0
                                            0.99    0.0
Name: value, Length: 450, dtype: float64

### plot nan count

In [33]:
def plot_nan(fairness, ratio_type, clfs=None, metrics=None, ylim=None):
    if clfs is None:
        clfs = fairness['clf'].unique()
    if metrics is None:
        metrics = fairness['metric'].unique()
    ratios = sorted(fairness[ratio_type].unique())
    other_ratio = 'gr' if ratio_type == 'ir' else 'ir'

    fig, ax = plt.subplots(2, (len(metrics) - 1) // 2 + 1,
                           sharex=True, sharey=True,
                           figsize=(16, 9))

    for i, metric in enumerate(metrics):
        ax[i % 2, i // 2].set_title(metric)
        ax[i % 2, i // 2].set_ylabel('NaN probability')
        ax[i % 2, i // 2].set_xlabel(ratio_type.upper())
        ax[i % 2, i // 2].yaxis.set_major_formatter(PercentFormatter(1))
        ax[i % 2, i // 2].spines[['top', 'right']].set_visible(False)

        for j, clf in enumerate(clfs):
            subset = fairness[
                (fairness['clf'] == clf) &
                (fairness[other_ratio] == .5) &
                (fairness['metric'] == metric)
                ]
            counts = subset.groupby(ratio_type)['value'].apply(lambda x: x.isna().sum() / x.shape[0])
            ax[i % 2, i // 2].plot(ratios, counts,
                                   label=clf, color=colours[j], marker='o', alpha=.6)

    if ylim:
        ax[0, 0].set_ylim(*ylim)
    else:
        ax[0, 0].set_ylim(0, ax[0, 0].get_ylim()[1] * 1.1)
    ax[0, 0].set_xlim(0, 1)
    ax[0, 0].legend(loc=0)

    return fig


In [34]:
for ratio_type in ['ir', 'gr']:
    fig = plot_nan(fairness_results_cv, ratio_type,
                   metrics=[
                       'Accuracy Equality Difference',
                       'Statistical Parity Difference',
                       'Equal Opportunity Difference',
                       'Predictive Equality Difference',
                       'Positive Predictive Parity Difference',
                       'Negative Predictive Parity Difference',
                   ])
    fig.savefig(os.path.join(plots_dir, f'fairness_nan_{ratio_type}.svg'))
    plt.close()

In [35]:
# pickle the results
import pickle

with open(os.path.join('out', 'fairness_results_cv.pkl'), 'wb') as f:
    pickle.dump(fairness_results_cv, f)