In [1]:
import yaml
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn.metrics
from aequitas.group import Group

from autodefer.utils import thresholding as t

In [2]:
expert_cfg_path = '../experiments/baf_haic/experts/cfg.yaml'
with open(expert_cfg_path, 'r') as infile:
    expert_cfg = yaml.safe_load(infile)
    
with open(expert_cfg['data_cfg_path'], 'r') as infile:
    data_cfg = yaml.safe_load(infile)
    
sns.set_theme(style='whitegrid')

In [3]:
data = pd.read_parquet(data_cfg['data_path'])
LABEL_COL = data_cfg['data_cols']['label']
TIMESTAMP_COL = data_cfg['data_cols']['timestamp']
PROTECTED_COL = data_cfg['data_cols']['protected']
CATEGORICAL_COLS = data_cfg['data_cols']['categorical']
data[CATEGORICAL_COLS] = data[CATEGORICAL_COLS].astype('category')
del data_cfg

def splitter(df, timestamp_col, beginning: int, end: int):
    return df[
        (df[timestamp_col] >= beginning) &
        (df[timestamp_col] < end)].copy()

train = splitter(data, TIMESTAMP_COL, *expert_cfg['splits']['train']).drop(columns=TIMESTAMP_COL)
deployment = splitter(data, TIMESTAMP_COL, *expert_cfg['splits']['deployment']).drop(columns=TIMESTAMP_COL)

In [4]:
with open(expert_cfg['output_paths']['ids'], 'r') as infile:
    EXPERT_IDS = yaml.safe_load(infile)
EXPERT_IDS

{'human_ids': ['regular#0',
  'regular#1',
  'regular#2',
  'regular#3',
  'regular#4',
  'regular#5',
  'regular#6',
  'regular#7',
  'regular#8',
  'regular#9',
  'regular#10',
  'regular#11',
  'regular#12',
  'regular#13',
  'regular#14',
  'regular#15',
  'regular#16',
  'regular#17',
  'regular#18',
  'regular#19',
  'feat_dependent#0',
  'feat_dependent#1',
  'feat_dependent#2',
  'feat_dependent#3',
  'feat_dependent#4',
  'feat_dependent#5',
  'feat_dependent#6',
  'feat_dependent#7',
  'feat_dependent#8',
  'feat_dependent#9',
  'model_agreeing#0',
  'model_agreeing#1',
  'model_agreeing#2',
  'model_agreeing#3',
  'model_agreeing#4',
  'model_agreeing#5',
  'model_agreeing#6',
  'model_agreeing#7',
  'model_agreeing#8',
  'model_agreeing#9',
  'unfair#0',
  'unfair#1',
  'unfair#2',
  'unfair#3',
  'unfair#4',
  'unfair#5',
  'unfair#6',
  'unfair#7',
  'unfair#8',
  'unfair#9'],
 'model_ids': ['model#0']}

In [5]:
train_expert_pred = pd.read_parquet(expert_cfg['output_paths']['train'])
deployment_expert_pred = pd.read_parquet(expert_cfg['output_paths']['deployment'])
deployment_expert_pred

Unnamed: 0,model#0,regular#0,regular#1,regular#2,regular#3,regular#4,regular#5,regular#6,regular#7,regular#8,...,unfair#0,unfair#1,unfair#2,unfair#3,unfair#4,unfair#5,unfair#6,unfair#7,unfair#8,unfair#9
397039,0.005073,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
397040,0.003180,0,0,0,0,1,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
397041,0.007852,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
397042,0.036083,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
397043,0.008888,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
999995,0.003055,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
999996,0.001584,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
999997,0.001894,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,1,0,1,0
999998,0.004934,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


# Label statistics

In [6]:
print(f'Prevalence (train) = {train[LABEL_COL].mean():.3f}')
print(f'Prevalence (deployment) = {deployment[LABEL_COL].mean():.3f}')

Prevalence (train) = 0.010
Prevalence (deployment) = 0.012


# Expert statistics

In [None]:
def calc_expert_stats(expert_pred, expert_list, y_true, protected_attr):
    expert_stats = dict()
    for expert_id in expert_list:
        expert_stats[expert_id] = dict()
        expert_stats[expert_id]['type'] = expert_id.split('#')[0]

        exp_pred = expert_pred[expert_id]
        tn, fp, fn, tp = sklearn.metrics.confusion_matrix(
            y_true=y_true,
            y_pred=exp_pred,
            labels=[0, 1]
        ).ravel()
        expert_stats[expert_id]['tpr'] = tp/(tp+fn)
        expert_stats[expert_id]['fnr'] = fn/(tp+fn)
        expert_stats[expert_id]['fpr'] = fp/(tn+fp)
        
        # fairness
        aequitas_df = pd.DataFrame({
            'protected_attr': protected_attr,
            'score': exp_pred,
            'label_value': y_true,
        })
        g = Group()
        aequitas_results = g.get_crosstabs(aequitas_df, attr_cols=['protected_attr'])[0]
        expert_stats[expert_id]['Older_fpr'] = aequitas_results[aequitas_results['attribute_value'] == 'Older']['fpr'].item()
        expert_stats[expert_id]['Younger_fpr'] = aequitas_results[aequitas_results['attribute_value'] == 'Younger']['fpr'].item()
        expert_stats[expert_id]['fairness_ratio'] = expert_stats[expert_id]['Younger_fpr'] / expert_stats[expert_id]['Older_fpr']
        expert_stats[expert_id]['fairness_diff'] = expert_stats[expert_id]['Older_fpr'] - expert_stats[expert_id]['Younger_fpr']

    expert_stats = (
        pd.DataFrame(expert_stats)
        .T
        .reset_index(drop=False)
        .rename(columns={'index': 'expert'})
    )
    
    return expert_stats

train_expert_stats = calc_expert_stats(
    expert_pred=train_expert_pred,
    expert_list=EXPERT_IDS['human_ids'],
    y_true=train[LABEL_COL],
    protected_attr=(train[PROTECTED_COL] >= 50).map({True: 'Older', False: 'Younger'}),
)
deployment_expert_stats = calc_expert_stats(
    expert_pred=deployment_expert_pred,
    expert_list=EXPERT_IDS['human_ids'],
    y_true=deployment[LABEL_COL],
    protected_attr=(deployment[PROTECTED_COL] >= 50).map({True: 'Older', False: 'Younger'}),
)
deployment_expert_stats.head()

In [None]:
sns.scatterplot(data=deployment_expert_stats, x='fpr', y='tpr', hue='type', style='type')
plt.show()

In [None]:
f, ax = plt.subplots(figsize=(15, 8))
sns.barplot(data=deployment_expert_stats, x='expert', y='fairness_diff', color='steelblue')
plt.xticks(rotation=90)
plt.show()

In [None]:
def plot_jaccard_similarity(df, cols, show=True):
    jac_sim = 1 - sklearn.metrics.pairwise.pairwise_distances(
        df[cols].T,
        metric="hamming"
    )
    jac_sim = pd.DataFrame(jac_sim, index=cols, columns=cols)
    f, ax = plt.subplots(figsize=(20, 15))
    sns.heatmap(jac_sim, robust=True, cmap='Blues')
    if show:
        plt.show()
    else:
        return ax

plot_jaccard_similarity(deployment_expert_pred, cols=EXPERT_IDS['human_ids'])

In [None]:
label_neg_preds = deployment_expert_pred[deployment[LABEL_COL] == 0]
plot_jaccard_similarity(label_neg_preds, cols=EXPERT_IDS['human_ids'])

In [None]:
label_pos_preds = deployment_expert_pred[deployment[LABEL_COL] == 1]
plot_jaccard_similarity(label_pos_preds, cols=EXPERT_IDS['human_ids'])

## Inteded vs. actual properties

In [None]:
properties = pd.read_parquet(expert_cfg['output_paths']['properties'])
properties

In [None]:
train_stats = properties.merge(
    train_expert_stats,
    on='expert', how='left',
    suffixes=('_intended', '_actual')
)
train_stats.head()

In [None]:
deployment_stats = properties.merge(
    deployment_expert_stats,
    on='expert', how='left',
    suffixes=('_intended', '_actual')
)
deployment_stats.head()

In [None]:
def plot_stats(stats_df, metric):
    plt.plot(np.linspace(0, 1, 100), np.linspace(0, 1, 100), color='grey', linestyle='dashed')
    sns.scatterplot(
        data=stats_df, x=f'{metric}_intended', y=f'{metric}_actual', hue='type',
        style='type'
    )
    plt.xlim(0.95*stats_df[f'{metric}_intended'].min(), 1.05*stats_df[f'{metric}_intended'].max())
    plt.ylim(0.95*stats_df[f'{metric}_actual'].min(), 1.05*stats_df[f'{metric}_actual'].max())
    plt.show()
plot_stats(train_stats, 'fnr')
plot_stats(train_stats, 'fpr')

In [None]:
plot_stats(deployment_stats, 'fnr')
plot_stats(deployment_stats, 'fpr')