In [None]:
import matplotlib.pyplot as plt
import mpl_lego as mplego
import numpy as np
import pandas as pd
import pickle

from pyprojroot import here
from hate_target import keys
from hate_target import utils
from scipy.stats import iqr
from mpl_lego.labels import bold_text

%matplotlib inline

In [None]:
mplego.style.use_latex_style()

In [None]:
race = utils.analyze_experiment(here('experiments/subgroups/race.pkl'), verbose=True)
race_cols = sorted(keys.target_race_cols)
n_race_groups = race['accuracy_by_chance'].shape[1]

In [None]:
race_labels = [' '.join([word.capitalize() for word in col[12:].split('_')])
               for col in race_cols]

In [None]:
gender = utils.analyze_experiment(here('experiments/subgroups/gender.pkl'), verbose=True)
gender_cols = target_cols = [
    'target_gender_men',
    'target_gender_non_binary',
    'target_gender_transgender',
    'target_gender_women']
n_gender_groups = gender['accuracy_by_chance'].shape[1]

In [None]:
gender_labels = [' '.join([word.capitalize() for word in col[14:].split('_')])
                 for col in gender_cols]

In [None]:
race_gender = utils.analyze_experiment(here('experiments/subgroups/race_gender.pkl'), verbose=True)
race_gender_cols = race_cols + gender_cols

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(10, 6), sharex=True)
plt.subplots_adjust(hspace=0.3)

axes[0, 0].bar(
    x=np.arange(n_race_groups),
    height=race['accuracy_by_chance'].mean(axis=0),
    yerr=iqr(race['accuracy_by_chance'], axis=0),
    color='lightgray',
    edgecolor='black',
    capsize=4,
    error_kw={'elinewidth': 2})

axes[0, 1].bar(
    x=np.arange(n_race_groups),
    height=race['log_odds_difference'].mean(axis=0),
    yerr=iqr(race['log_odds_difference'], axis=0),
    color='lightgray',
    edgecolor='black',
    capsize=4,
    error_kw={'elinewidth': 2})

axes[1, 0].bar(
    x=np.arange(n_race_groups),
    height=race['roc_aucs'].mean(axis=0),
    yerr=iqr(race['roc_aucs'], axis=0),
    color='lightgray',
    edgecolor='black',
    capsize=4,
    error_kw={'elinewidth': 2})

axes[1, 1].bar(
    x=np.arange(n_race_groups),
    height=race['pr_aucs'].mean(axis=0),
    yerr=iqr(race['pr_aucs'], axis=0),
    color='lightgray',
    edgecolor='black',
    capsize=4,
    error_kw={'elinewidth': 2})

axes[0, 0].set_ylim([1, 1.13])

axes[1, 0].set_ylim([0.8, 1.0])


for ax in axes.ravel():
    ax.tick_params(labelsize=14)
    ax.grid(axis='y')
    ax.set_axisbelow(True)
    
axes[1, 0].set_xticks(np.arange(n_race_groups))
axes[1, 1].set_xticks(np.arange(n_race_groups))
axes[1, 0].set_xticklabels(bold_text(race_labels), ha='right', rotation=30, fontsize=13)
axes[1, 1].set_xticklabels(bold_text(race_labels), ha='right', rotation=30, fontsize=13)
    
axes[0, 0].set_title(bold_text('Accuracy over Chance'), fontsize=18)
axes[0, 1].set_title(bold_text('Log-Odds Difference'), fontsize=18)
axes[1, 0].set_title(bold_text('ROC-AUC'), fontsize=18)
axes[1, 1].set_title(bold_text('PR-AUC'), fontsize=18)

plt.show()

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(8, 6), sharex=True)
plt.subplots_adjust(hspace=0.3)

axes[0, 0].bar(
    x=np.arange(n_gender_groups),
    height=gender['accuracy_by_chance'].mean(axis=0),
    yerr=iqr(gender['accuracy_by_chance'], axis=0),
    color='lightgray',
    edgecolor='black',
    capsize=4,
    error_kw={'elinewidth': 2})

axes[0, 1].bar(
    x=np.arange(n_gender_groups),
    height=gender['log_odds_difference'].mean(axis=0),
    yerr=iqr(gender['log_odds_difference'], axis=0),
    color='lightgray',
    edgecolor='black',
    capsize=4,
    error_kw={'elinewidth': 2})

axes[1, 0].bar(
    x=np.arange(n_gender_groups),
    height=gender['roc_aucs'].mean(axis=0),
    yerr=iqr(gender['roc_aucs'], axis=0),
    color='lightgray',
    edgecolor='black',
    capsize=4,
    error_kw={'elinewidth': 2})

axes[1, 1].bar(
    x=np.arange(n_gender_groups),
    height=gender['pr_aucs'].mean(axis=0),
    yerr=iqr(gender['pr_aucs'], axis=0),
    color='lightgray',
    edgecolor='black',
    capsize=4,
    error_kw={'elinewidth': 2})

axes[0, 0].set_ylim(bottom=1)

axes[1, 0].set_ylim([0.8, 1.0])


for ax in axes.ravel():
    ax.tick_params(labelsize=14)
    ax.grid(axis='y')
    ax.set_axisbelow(True)
    
axes[1, 0].set_xticks(np.arange(n_gender_groups))
axes[1, 1].set_xticks(np.arange(n_gender_groups))
axes[1, 0].set_xticklabels(bold_text(gender_labels), ha='right', rotation=30, fontsize=13)
axes[1, 1].set_xticklabels(bold_text(gender_labels), ha='right', rotation=30, fontsize=13)

    
axes[0, 0].set_title(bold_text('Accuracy over Chance'), fontsize=17)
axes[0, 1].set_title(bold_text('Log-Odds Difference'), fontsize=17)
axes[1, 0].set_title(bold_text('ROC-AUC'), fontsize=17)
axes[1, 1].set_title(bold_text('PR-AUC'), fontsize=17)

plt.show()

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(12, 6), sharex=True)
plt.subplots_adjust(hspace=0.3)

axes[0, 0].bar(
    x=np.arange(n_race_groups + n_gender_groups),
    height=race_gender['accuracy_by_chance'].mean(axis=0),
    yerr=iqr(race_gender['accuracy_by_chance'], axis=0),
    color='lightgray',
    edgecolor='black',
    capsize=4,
    error_kw={'elinewidth': 2})

axes[0, 1].bar(
    x=np.arange(n_race_groups + n_gender_groups),
    height=race_gender['log_odds_difference'].mean(axis=0),
    yerr=iqr(race_gender['log_odds_difference'], axis=0),
    color='lightgray',
    edgecolor='black',
    capsize=4,
    error_kw={'elinewidth': 2})

axes[1, 0].bar(
    x=np.arange(n_race_groups + n_gender_groups),
    height=race_gender['roc_aucs'].mean(axis=0),
    yerr=iqr(race_gender['roc_aucs'], axis=0),
    color='lightgray',
    edgecolor='black',
    capsize=4,
    error_kw={'elinewidth': 2})

axes[1, 1].bar(
    x=np.arange(n_race_groups + n_gender_groups),
    height=race_gender['pr_aucs'].mean(axis=0),
    yerr=iqr(race_gender['pr_aucs'], axis=0),
    color='lightgray',
    edgecolor='black',
    capsize=4,
    error_kw={'elinewidth': 2})

axes[0, 0].set_ylim(bottom=1)

axes[1, 0].set_ylim([0.8, 1.0])


for ax in axes.ravel():
    ax.tick_params(labelsize=14)
    ax.grid(axis='y')
    ax.set_axisbelow(True)
    
axes[1, 0].set_xticks(np.arange(n_race_groups + n_gender_groups))
axes[1, 1].set_xticks(np.arange(n_race_groups + n_gender_groups))
axes[1, 0].set_xticklabels(bold_text(race_labels + gender_labels), ha='right', rotation=30, fontsize=13)
axes[1, 1].set_xticklabels(bold_text(race_labels + gender_labels), ha='right', rotation=30, fontsize=13)

    
axes[0, 0].set_title(bold_text('Accuracy over Chance'), fontsize=17)
axes[0, 1].set_title(bold_text('Log-Odds Difference'), fontsize=17)
axes[1, 0].set_title(bold_text('ROC-AUC'), fontsize=17)
axes[1, 1].set_title(bold_text('PR-AUC'), fontsize=17)

plt.show()

In [None]:
fig, axes = plt.subplots(4, 2, figsize=(7, 14), sharey='row', sharex='col',
                         gridspec_kw={'width_ratios': [1, 0.8]})
plt.subplots_adjust(hspace=0.3)

axes[0, 0].bar(
    x=np.arange(n_race_groups),
    height=race_gender['accuracy_by_chance'].mean(axis=0)[:n_race_groups] / race['accuracy_by_chance'].mean(axis=0),
    color='lightgray',
    edgecolor='black',
    capsize=4,
    error_kw={'elinewidth': 2})

axes[1, 0].bar(
    x=np.arange(n_race_groups),
    height=race_gender['log_odds_difference'].mean(axis=0)[:n_race_groups] / race['log_odds_difference'].mean(axis=0),
    color='lightgray',
    edgecolor='black',
    capsize=4,
    error_kw={'elinewidth': 2})

axes[2, 0].bar(
    x=np.arange(n_race_groups),
    height=race_gender['roc_aucs'].mean(axis=0)[:n_race_groups] / race['roc_aucs'].mean(axis=0),
    color='lightgray',
    edgecolor='black',
    capsize=4,
    error_kw={'elinewidth': 2})

axes[3, 0].bar(
    x=np.arange(n_race_groups),
    height=race_gender['pr_aucs'].mean(axis=0)[:n_race_groups] / race['pr_aucs'].mean(axis=0),
    color='lightgray',
    edgecolor='black',
    capsize=4,
    error_kw={'elinewidth': 2})


axes[0, 1].bar(
    x=np.arange(n_gender_groups),
    height=race_gender['accuracy_by_chance'].mean(axis=0)[n_race_groups:] / gender['accuracy_by_chance'].mean(axis=0),
    color='lightgray',
    edgecolor='black',
    capsize=4,
    error_kw={'elinewidth': 2})

axes[1, 1].bar(
    x=np.arange(n_gender_groups),
    height=race_gender['log_odds_difference'].mean(axis=0)[n_race_groups:] / gender['log_odds_difference'].mean(axis=0),
    color='lightgray',
    edgecolor='black',
    capsize=4,
    error_kw={'elinewidth': 2})

axes[2, 1].bar(
    x=np.arange(n_gender_groups),
    height=race_gender['roc_aucs'].mean(axis=0)[n_race_groups:] / gender['roc_aucs'].mean(axis=0),
    color='lightgray',
    edgecolor='black',
    capsize=4,
    error_kw={'elinewidth': 2})

axes[3, 1].bar(
    x=np.arange(n_gender_groups),
    height=race_gender['pr_aucs'].mean(axis=0)[n_race_groups:] / gender['pr_aucs'].mean(axis=0),
    color='lightgray',
    edgecolor='black',
    capsize=4,
    error_kw={'elinewidth': 2})


for ax in axes.ravel():
    ax.tick_params(labelsize=14)
    ax.grid(axis='y')
    ax.set_axisbelow(True)
    
axes[3, 0].set_xticks(np.arange(n_race_groups))
axes[3, 0].set_xticklabels(bold_text(race_labels), ha='right', rotation=30, fontsize=13)
    
axes[3, 1].set_xticks(np.arange(n_gender_groups))
axes[3, 1].set_xticklabels(bold_text(gender_labels), ha='right', rotation=30, fontsize=13)


axes[0, 0].set_ylabel(bold_text('Accuracy over\nChance'), fontsize=18)
axes[1, 0].set_ylabel(bold_text('Log-Odds\nDifference'), fontsize=18)
axes[2, 0].set_ylabel(bold_text('ROC-AUC'), fontsize=18)
axes[3, 0].set_ylabel(bold_text('PR-AUC'), fontsize=18)

axes[0, 0].set_ylim([0.995, 1.005])
axes[1, 0].set_ylim([0.92, 1.02])
axes[2, 0].set_ylim([0.95, 1.0])
axes[3, 0].set_ylim([0.75, 1.0])

fig.suptitle(bold_text('Fraction of Performance in\nMultitask vs. Single Task'), y=0.93,
             fontsize=20)

plt.savefig('joint.pdf', bbox_inches='tight')
plt.show()