In [None]:
import matplotlib.pyplot as plt
import mpl_lego as mplego
import numpy as np

from hate_target import utils
from mpl_lego.labels import bold_text, apply_subplot_labels
from pyprojroot import here

%matplotlib inline

In [None]:
mplego.style.use_latex_style()

In [None]:
analysis_race = utils.analyze_experiment(
    path=here('experiments/figure2a_results.pkl'),
    soft=True,
    verbose=True,
    thresholds=[0.5, 0.5, 0.5, 0.5, 0.5, 0.25, 0.25, 0.5])

In [None]:
analysis_gender = utils.analyze_experiment(
    path=here('experiments/figure2b_results.pkl'),
    soft=True,
    verbose=True,
    thresholds=[0.5, 0.25, 0.5, 0.5])

In [None]:
# Determine number of identity groups
n_race_groups = analysis_race['roc_aucs'].shape[1]
# Calculate incidence rates for race
incidence_race = analysis_race['incidence_rate']
# Sort incidence rates by magnitude for figure
sorted_race_idx = np.flip(np.argsort(incidence_race))
# Generate labels for plot
labels_race = bold_text([
    'Asian',
    'Black',
    'Latinx',
    'Middle Eastern',
    'Native American',
    'Other',
    'Pacific Islander',
    'White'])

In [None]:
# Determine number of identity groups
analysis_gender = utils.analyze_experiment(
    here('experiments/figure2b_results.pkl'),
    soft=True,
    verbose=True,
    thresholds=[0.5, 0.25, 0.5, 0.5])
incidence_gender = analysis_gender['incidence_rate']
sorted_gender = np.flip(np.argsort(incidence_gender))
n_gender_groups = analysis_gender['roc_aucs'].shape[1]
# Calculate incidence rates for gender
incidence_gender = analysis_gender['incidence_rate']
# Sort incidence rates by magnitude for figure
sorted_gender_idx = np.flip(np.argsort(incidence_gender))
# Generate labels for plot
labels_gender = bold_text([
    'Men',
    'Non-Binary',
    'Transgender',
    'Women'])

In [None]:
# Calculate summary metrics for Figure 2a
precision_race_mean = analysis_race['precision'].mean(axis=0)[sorted_race_idx]
precision_race_std = np.std(analysis_race['precision'], axis=0)[sorted_race_idx]
recall_race_mean = analysis_race['recall'].mean(axis=0)[sorted_race_idx]
recall_race_std = np.std(analysis_race['recall'], axis=0)[sorted_race_idx]
f1_race_mean = analysis_race['f1_scores'].mean(axis=0)[sorted_race_idx]
f1_race_std = np.std(analysis_race['f1_scores'], axis=0)[sorted_race_idx]
# Calculate summary metrics for Figure 2b
roc_auc_race_mean = analysis_race['roc_aucs'].mean(axis=0)[sorted_race_idx]
roc_auc_race_std = np.std(analysis_race['roc_aucs'], axis=0)[sorted_race_idx]
pr_auc_race_mean = analysis_race['pr_aucs'].mean(axis=0)[sorted_race_idx]
pr_auc_race_std = np.std(analysis_race['pr_aucs'], axis=0)[sorted_race_idx]
# Calculate summary metrics for Figure 2c
precision_gender_mean = analysis_gender['precision'].mean(axis=0)[sorted_gender_idx]
precision_gender_std = np.std(analysis_gender['precision'], axis=0)[sorted_gender_idx]
recall_gender_mean = analysis_gender['recall'].mean(axis=0)[sorted_gender_idx]
recall_gender_std = np.std(analysis_gender['recall'], axis=0)[sorted_gender_idx]
f1_gender_mean = analysis_gender['f1_scores'].mean(axis=0)[sorted_gender_idx]
f1_gender_std = np.std(analysis_gender['f1_scores'], axis=0)[sorted_gender_idx]
# Calculate summary metrics for Figure 2d
roc_auc_gender_mean = analysis_gender['roc_aucs'].mean(axis=0)[sorted_gender_idx]
roc_auc_gender_std = np.std(analysis_gender['roc_aucs'], axis=0)[sorted_gender_idx]
pr_auc_gender_mean = analysis_gender['pr_aucs'].mean(axis=0)[sorted_gender_idx]
pr_auc_gender_std = np.std(analysis_gender['pr_aucs'], axis=0)[sorted_gender_idx]

In [None]:
"""
Figure 2:
Model performance on target identity sub-groups
"""
fig, axes = plt.subplots(2, 2, figsize=(14, 7))
plt.subplots_adjust(wspace=0.15, hspace=0.8)

# Colors
precision_color = 'C0'
recall_color = 'C1'
f1_color = 'C2'
roc_auc_color = 'C4'
pr_auc_color = 'lightgrey'
# Bar plot settings
center_points_race = np.arange(n_race_groups)
center_points_gender = np.arange(n_gender_groups)
fig_2a_bar_width = 0.30
fig_2b_bar_width = 0.40
fig2c_bar_width = 0.3
fig2d_bar_width = 0.4
fig_2a_cap_size = 2
fig_2b_cap_size = 3
fig2c_cap_size = 2
fig2d_cap_size = 3
bar_edge_color = 'black'
incidence_color = 'black'
# Size settings
incidence_lw = 2.5
subplot_label_size = 20
legend_size = 12
xtick_rotation = 20
xtick_size = 13
xlabel_size = 16
ylabel_size = 15
tick_label_size = 14

"""
Figure 2a:
Precision, recall, and F1 score across race sub-groups
"""
# Precision bar plot
axes[0, 0].bar(
    x=center_points_race - fig_2a_bar_width,
    height=precision_race_mean,
    width=fig_2a_bar_width,
    yerr=precision_race_std,
    color=precision_color,
    edgecolor=bar_edge_color,
    error_kw={'capsize': fig_2a_cap_size},
    label='Precision')
# Recall bar plot
axes[0, 0].bar(
    x=center_points_race,
    height=recall_race_mean,
    width=fig_2a_bar_width,
    yerr=recall_race_std,
    color=recall_color,
    edgecolor=bar_edge_color,
    error_kw={'capsize': fig_2a_cap_size},
    label='Recall')
# F1 score bar plot
axes[0, 0].bar(
    x=center_points_race + fig_2a_bar_width,
    height=f1_race_mean,
    width=fig_2a_bar_width,
    yerr=f1_race_std,
    color=f1_color,
    edgecolor=bar_edge_color,
    error_kw={'capsize': fig_2a_cap_size},
    label='F1 Score')

axes[0, 0].set_ylim([0, 1])
axes[0, 0].grid(axis='y')
axes[0, 0].set_axisbelow(True)
axes[0, 0].legend(
    bbox_to_anchor=(0.5, 1.08),
    loc='center',
    ncol=3,
    prop={'size': legend_size})

"""
Figure 2b:
ROC / PR AUC scores across race sub-groups
"""
# ROC AUC bar plot
axes[0, 1].bar(
    x=center_points_race - fig_2b_bar_width / 2,
    height=roc_auc_race_mean,
    width=fig_2b_bar_width,
    yerr=roc_auc_race_std,
    color=roc_auc_color,
    edgecolor=bar_edge_color,
    error_kw={'capsize': fig_2b_cap_size},
    label='ROC AUC')
# PR AUC bar plot
axes[0, 1].bar(
    x=center_points_race + fig_2b_bar_width / 2,
    height=pr_auc_race_mean,
    width=fig_2b_bar_width,
    yerr=pr_auc_race_std,
    color=pr_auc_color,
    edgecolor=bar_edge_color,
    error_kw={'capsize': fig_2b_cap_size},
    label='PR AUC')

# Plot incidence rates for each PR AUC
for idx, rate in enumerate(analysis_race['incidence_rate'][sorted_race_idx]):
    axes[0, 1].plot(
        [idx + fig_2b_bar_width, idx],
        [rate, rate],
        color=incidence_color,
        lw=incidence_lw)

axes[0, 1].legend(
    bbox_to_anchor=(0.5, 1.08),
    loc='center',
    ncol=2,
    prop={'size': legend_size})

"""
Figure 2c:
Precision, recall, and F1 score across gender sub-groups
"""
# Precision bar plot
axes[1, 0].bar(
    x=center_points_gender - fig2c_bar_width,
    height=precision_gender_mean,
    width=fig2c_bar_width,
    yerr=precision_gender_std,
    color=precision_color,
    edgecolor=bar_edge_color,
    error_kw={'capsize': fig2c_cap_size},
    label='Precision')
# Recall bar plot
axes[1, 0].bar(
    x=center_points_gender,
    height=recall_gender_mean,
    width=fig2c_bar_width,
    yerr=recall_gender_std,
    color=recall_color,
    edgecolor=bar_edge_color,
    error_kw={'capsize': fig2c_cap_size},
    label='Recall')
# F1 score bar plot
axes[1, 0].bar(
    x=center_points_gender + fig2c_bar_width,
    height=f1_gender_mean,
    width=fig2c_bar_width,
    yerr=f1_gender_std,
    color=f1_color,
    edgecolor=bar_edge_color,
    error_kw={'capsize': fig2c_cap_size},
    label='F1 Score')

axes[1, 0].set_ylim([0, 1])
axes[1, 0].grid(axis='y')
axes[1, 0].set_axisbelow(True)
axes[1, 0].legend(
    bbox_to_anchor=(0.5, 1.08),
    loc='center',
    ncol=3,
    prop={'size': legend_size})

"""
Figure 2d:
ROC / PR AUC scores across gender sub-groups
"""
# ROC AUC bar plot
axes[1, 1].bar(
    x=center_points_gender - fig2d_bar_width / 2,
    height=roc_auc_gender_mean,
    width=fig2d_bar_width,
    yerr=roc_auc_gender_std,
    color=roc_auc_color,
    edgecolor=bar_edge_color,
    error_kw={'capsize': fig2d_cap_size},
    label='ROC AUC')
# PR AUC bar plot
axes[1, 1].bar(
    x=center_points_gender + fig2d_bar_width / 2,
    height=pr_auc_gender_mean,
    width=fig2d_bar_width,
    yerr=pr_auc_gender_std,
    color=pr_auc_color,
    edgecolor=bar_edge_color,
    error_kw={'capsize': fig2d_cap_size},
    label='PR AUC')

axes[1, 1].legend(
    bbox_to_anchor=(0.5, 1.08),
    loc='center',
    ncol=2,
    prop={'size': legend_size})

# Plot incidence rates for each PR AUC
for idx, rate in enumerate(analysis_gender['incidence_rate'][sorted_gender_idx]):
    axes[1, 1].plot(
        [idx + fig2d_bar_width, idx],
        [rate, rate],
        color=incidence_color,
        lw=incidence_lw)

"""
Figure 2 labels and ticks
"""
# Top row labels and ticks
for ax in axes[0]:
    ax.set_xticks(np.arange(n_race_groups))
    ax.set_xticklabels(
        bold_text(np.array(labels_race)[sorted_race_idx]),
        ha='right',
        rotation=xtick_rotation,
        fontsize=xtick_size)
    ax.set_xlabel(bold_text('Race Sub-Groups'), fontsize=xlabel_size)
# Bottom row labels and ticks
for ax in axes[1]:
    ax.set_xticks(np.arange(n_gender_groups))
    ax.set_xticklabels(
        bold_text(np.array(labels_gender)[sorted_gender_idx]),
        ha='right',
        rotation=xtick_rotation,
        fontsize=xtick_size)
    ax.set_xlabel(bold_text('Gender Sub-Groups'), fontsize=xlabel_size)
# Left column y-axis labels
for ax in axes[:, 0]:
    ax.set_ylabel(bold_text('Metric'), fontsize=ylabel_size)
# General plot ticks and sizes
for ax in axes.ravel():
    ax.set_yticks([0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0])
    ax.set_ylim([0, 1.03])
    ax.grid(axis='y')
    # Needs this twice, for some reason
    ax.grid(axis='y')
    ax.set_axisbelow(True)
    ax.tick_params(labelsize=tick_label_size)
# Apply subplot labels
apply_subplot_labels(
    axes,
    bold=True,
    x=-0.04,
    y=1.09,
    size=subplot_label_size)

plt.show()
# plt.savefig('figure2.pdf', bbox_inches='tight')