In [None]:
import numpy as np
import os
import matplotlib.pyplot as plt
import mpl_lego as mplego
import pandas as pd

from hatespeech import keys, utils
from mpl_lego.colorbar import append_colorbar_to_axis
%matplotlib inline

In [None]:
mplego.style.use_latex_style()

In [None]:
severity_diffs = pd.read_csv('severity_diffs.csv')

In [None]:
exp = "22"
base_path = os.path.join(os.environ['HOME'], 'projects/annotator_bias_irt')
data_path = "~/data/hatespeech/unfiltered_ratings.feather"
rater_quality_path = "~/data/hatespeech/rater_quality_check.csv"
results_path = os.path.join(base_path, f'scaling/experiments/exp{exp}')

In [None]:
data = pd.read_feather(data_path).rename(columns={'violence_phys': 'violence'})
# Remove all rows in which some item is missing
data = utils.filter_missing_items(data)
# Remove all rows in which the rater is not up to sufficient quality
rater_quality = pd.read_csv(rater_quality_path)
data = utils.filter_annotator_quality(data, rater_quality)
# Recode item responses
data = utils.recode_responses(
    data,
    insult={1: 0, 2: 1, 3: 2, 4: 3},
    humiliate={1: 0, 2: 0, 3: 1, 4: 2},
    status={1: 0, 2: 0, 3: 1, 4: 1},
    dehumanize={1: 0, 2: 0, 3: 1, 4: 1},
    violence={1: 0, 2: 0, 3: 1, 4: 1},
    genocide={1: 0, 2: 0, 3: 1, 4: 1},
    attack_defend={1: 0, 2: 1, 3: 2, 4: 3},
    hatespeech={1: 0, 2: 1})
# Only get comments targeting black / white people
data = data[data['target_race_white'] | data['target_race_black']]
data = data[data[keys.target_race_cols].sum(axis=1) == 1]
data['target_race'] = np.where(data['target_race_white'], 1, 2)

In [None]:
# Read in experiment output
out_path = os.path.join(results_path, f"exp{exp}_out.txt")
with open(out_path) as f:
    lines = f.readlines()

In [None]:
bias_lines = lines[17588:30364]
bias_cut = bias_lines[4:-7]

In [None]:
# Extract elements of each line
n_samples = len(bias_cut)
observed = np.zeros(n_samples)
expected = np.zeros(n_samples)
bias_size = np.zeros(n_samples)
t_stats = np.zeros(n_samples)
p_vals = np.zeros(n_samples)
labeler_id = np.zeros(n_samples)
measure = np.zeros(n_samples)
race = np.zeros(n_samples)

for idx, line in enumerate(bias_cut):
    processed = line.replace('|', ' ').replace('>', ' ').replace('<', ' ').split()
    observed[idx] = float(processed[0])
    expected[idx] = float(processed[1])
    bias_size[idx] = float(processed[4])
    t_stats[idx] = float(processed[6])
    p_vals[idx] = float(processed[8])
    labeler_id[idx] = int(processed[13])
    measure = float(processed[14])
    if processed[16] == 'white':
        race[idx] = 0
    elif processed[16] == 'black':
        race[idx] = 1

In [None]:
# Create results data frame
results = pd.DataFrame({
    'labeler_id': labeler_id,
    'observed': observed,
    'expected': expected,
    'bias': bias_size,
    't_statistic': t_stats,
    'p_value': p_vals,
    'measure': measure,
    'race': race}).astype({'labeler_id': int})
results['bias_corrected'] = -results['bias']
results['bias_abs'] = results['bias'].abs()
results['target_race_name'] = np.where(results['race'] == 1, 'black', 'white')
# Merge in annotator race
results = results.merge(
    right=data[['labeler_id'] + keys.annotator_race_cols].drop_duplicates('labeler_id'),
    how='left',
    on='labeler_id')
# Add in bias sign
results['bias_sign'] = np.where(
    results['bias_corrected'] < 0,
    'negative',
    np.where(
        results['bias_corrected'] > 0,
        'positive',
        'zero'))

In [None]:
black_targets = results[results['target_race_name'] == 'black']
black_bias = black_targets[black_targets['labeler_id'].isin(severity_diffs['labeler_id'])].sort_values('labeler_id')[['labeler_id', 'bias_corrected']]
white_targets = results[results['target_race_name'] == 'white']
white_bias = white_targets[white_targets['labeler_id'].isin(severity_diffs['labeler_id'])].sort_values('labeler_id')[['labeler_id', 'bias_corrected']]
all_bias = black_bias.merge(white_bias, how='inner', on='labeler_id', suffixes=('_black', '_white'))
all_bias['interaction_diffs'] = all_bias['bias_corrected_black'] - all_bias['bias_corrected_white']

In [None]:
diffs = all_bias[['labeler_id', 'interaction_diffs']].merge(severity_diffs, on='labeler_id', how='inner')

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(6, 6))
img = ax.hexbin(
    diffs['severity_diffs'],
    diffs['interaction_diffs'],
    cmap='Greys',
    bins='log',
    extent=(-6, 6, -6, 6),
    gridsize=30)

ax.set_xlim([-6, 6])
ax.set_ylim([-6, 6])
ax.set_aspect('equal')
ax.set_xticks([-6, -3, 0, 3, 6])
ax.set_yticks([-6, -3, 0, 3, 6])
ax.set_xlabel(r'\textbf{Annotator Lean}', fontsize=20)
ax.set_ylabel(r'\textbf{Interaction Difference}', fontsize=20)
ax.tick_params(labelsize=18)
cb, cax = append_colorbar_to_axis(ax, img)
cb.ax.tick_params(labelsize=15)
cb.set_label(mplego.labels.bold_text('Number of Annotators'), rotation=270, fontsize=15, labelpad=20)
plt.savefig('figureA6.pdf', bbox_inches='tight')