In [None]:
import numpy as np
import os
import matplotlib.pyplot as plt
import mpl_lego as mplego
import pandas as pd

from hatespeech.utils import annotator_race_to_col

%matplotlib inline

In [None]:
base_path = os.path.join(os.environ['HOME'], 'projects/hatespeech-2021-fairml')
data_path = os.path.join(base_path, 'data/clean_qualtrics_irt_rollout.feather')
results_path = os.path.join(base_path, 'scaling/experiments/exp01')

In [None]:
data = pd.read_feather(data_path)

In [None]:
black_target_annotator_path = os.path.join(results_path, "exp01_scores_black.2.txt")
white_target_annotator_path = os.path.join(results_path, "exp01_scores_white.2.txt")

In [None]:
# Get severities for black-targeting comments
black_target_annotator = pd.read_csv(black_target_annotator_path, delimiter='\t', skiprows=1)
black_target_severity = black_target_annotator['Measure']
# Get severities for white-targeting comments
white_target_annotator = pd.read_csv(white_target_annotator_path, delimiter='\t', skiprows=1)
white_target_severity = white_target_annotator['Measure']

In [None]:
print(f'Black-targeting: {black_target_severity.shape}')
print(f'White-targeting: {white_target_severity.shape}')

In [None]:
# Get annotators that looked at both black- and white- targeting comments
common_annotators = black_target_annotator[
    black_target_annotator['Judges'].isin(white_target_annotator['Judges'])
]['Judges'].values
black_common = black_target_annotator[black_target_annotator['Judges'].isin(common_annotators)]
white_common = white_target_annotator[white_target_annotator['Judges'].isin(common_annotators)]
print(f'Common annotators: {common_annotators.shape}')

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(10, 5))

bins = np.linspace(-6, 6, 51)

ax.hist(
    black_target_severity,
    bins=bins,
    density=True,
    linewidth=2,
    color='C0',
    label=r'\textbf{Black-Targeted}',
    histtype='step')
ax.hist(
    white_target_severity,
    bins=bins,
    density=True,
    linewidth=2,
    color='C1',
    label=r'\textbf{White-Targeted}',
    histtype='step')

ax.set_xlim([-3, 3])
ax.set_xlabel(r'\textbf{Severity} $(\alpha_j)$', fontsize=20)
ax.set_ylabel(r'\textbf{Density}', fontsize=20)
ax.legend(loc='best', prop={'size': 15})
plt.savefig('exp01_severities.pdf', bbox_inches='tight')

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(8, 8))

ax.hexbin(
    black_common['Measure'],
    white_common['Measure'],
    bins='log',
    cmap='Greys',
    gridsize=30)
ax.set_xlim([-6, 6])
ax.set_ylim(ax.get_xlim())
ax.plot(ax.get_xlim(), ax.get_ylim(), color='red', linestyle='--')
ax.set_xlabel(r'\textbf{Severity from Black-Targeting Comments}', fontsize=18)
ax.set_ylabel(r'\textbf{Severity from White-Targeting Comments}', fontsize=18)
ax.tick_params(labelsize=15)
plt.savefig('exp01_severities_common_annotators.pdf', bbox_inches='tight')

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(8, 6))

ax.hist(
    black_common['Measure'] - white_common['Measure'],
    bins=np.linspace(-5, 5, 21),
    histtype='step',
    linewidth=3,
    color='black')
ax.set_xlim([-5, 5])
ax.axvline(0, linestyle='--', color='grey')
ax.tick_params(labelsize=15)
ax.set_xlabel(r'$\alpha_b - \alpha_w$', fontsize=20)
ax.set_ylabel(r'\textbf{Frequency}', fontsize=20)
plt.savefig('exp01_severity_diff.pdf', bbox_inches='tight')

In [None]:
# Get demographic information of annotators
white_annotators = data[data[annotator_race_to_col['white']] == 1]['labeler_id'].values
black_annotators = data[data[annotator_race_to_col['black']] == 1]['labeler_id'].values

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(8, 6))

bins = np.linspace(-4, 4, 21)

black_diffs = (
    black_common[black_common['Judges'].isin(black_annotators)]['Measure'].values
     - white_common[white_common['Judges'].isin(black_annotators)]['Measure'].values
)
white_diffs = (
    black_common[black_common['Judges'].isin(white_annotators)]['Measure'].values
     - white_common[white_common['Judges'].isin(white_annotators)]['Measure'].values)
ax.hist(
    black_diffs,
    bins=bins,
    histtype='step',
    linewidth=3,
    density=True,
    label=r'\textbf{Black Annotators}',
    color='C0')
ax.hist(
    (black_common[black_common['Judges'].isin(white_annotators)]['Measure'].values
     - white_common[white_common['Judges'].isin(white_annotators)]['Measure'].values),
    bins=bins,
    histtype='step',
    linewidth=3,
    density=True,
    label=r'\textbf{White Annotators}',
    color='C1')
ax.set_xlim([-5, 5])
ax.axvline(0, linestyle='--', color='grey')
ax.tick_params(labelsize=15)
ax.set_xlabel(r'$\alpha_b - \alpha_w$', fontsize=20)
ax.set_ylabel(r'\textbf{Frequency}', fontsize=20)
ax.legend(loc='best', prop={'size': 14})

inset = fig.add_axes([0.23, 0.5, 0.15, 0.35])
inset.boxplot(
    x=[black_diffs, white_diffs],
    showfliers=False,
    widths=0.5,
    medianprops={'color': 'black', 'lw': 2},
    boxprops={'lw': 2},)
inset.set_xticklabels(['Black\nAnn.', 'White\nAnn.'])
inset.set_ylabel(r'$\alpha_b - \alpha_w$', fontsize=20)
inset.axhline(0, linestyle='--', color='gray')
inset.tick_params(labelsize=15)
plt.savefig('exp01_white_vs_black_annotators.pdf', bbox_inches='tight')

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(16, 8))

axes[0].scatter(
    black_common[black_common['Judges'].isin(black_annotators)]['Measure'],
    white_common[white_common['Judges'].isin(black_annotators)]['Measure'],
    alpha=0.3,
    color='black')

axes[1].scatter(
    black_common[black_common['Judges'].isin(white_annotators)]['Measure'],
    white_common[white_common['Judges'].isin(white_annotators)]['Measure'],
    alpha=0.2,
    color='black')

for ax in axes:
    ax.set_xlim([-6, 6])
    ax.set_ylim(ax.get_xlim())
    ax.plot(ax.get_xlim(), ax.get_ylim(), color='red', linestyle='--')
    ax.set_xlabel(r'\textbf{Severity from Black-Targeting Comments}', fontsize=18)
    ax.set_ylabel(r'\textbf{Severity from White-Targeting Comments}', fontsize=18)
    ax.tick_params(labelsize=15)