In [None]:
import numpy as np
import os
import matplotlib.pyplot as plt
import mpl_lego as mplego
import pandas as pd

from hatespeech import keys, utils
from IPython.display import Markdown
from scipy.stats import chi2_contingency

%matplotlib inline

In [None]:
mplego.style.use_latex_style()

In [None]:
data_path = "~/data/hatespeech/unfiltered_ratings.feather"
rater_quality_path = "~/data/hatespeech/rater_quality_check.csv"

In [None]:
# Read in hate speech data
data = pd.read_feather(data_path)
# Remove all rows in which some item is missing
data = utils.filter_missing_items(data)
# Remove all rows in which the rater is not up to sufficient quality
rater_quality = pd.read_csv(rater_quality_path)
data = utils.filter_annotator_quality(data, rater_quality)

In [None]:
data = data[data[['annotator_race_black', 'annotator_race_white']].sum(axis=1) == 1]

In [None]:
unique_annotators = data.drop_duplicates('labeler_id').copy()

In [None]:
# Liberal
unique_annotators['annotator_liberal'] = unique_annotators[
    ['annotator_ideology_extremeley_liberal', 'annotator_ideology_liberal', 'annotator_ideology_slightly_liberal']
].any(axis=1)
# Non-religious
unique_annotators['annotator_non_religious'] = unique_annotators[
    ['annotator_religion_atheist', 'annotator_religion_nothing']
].any(axis=1)
# Women
unique_annotators['annotator_women'] = unique_annotators['annotator_gender_women']
# Queer
unique_annotators['annotator_queer'] = unique_annotators[
    ['annotator_sexuality_bisexual', 'annotator_sexuality_gay', 'annotator_sexuality_other']
].any(axis=1)

In [None]:
corrs = unique_annotators[
    ['annotator_race_black',
     'annotator_liberal',
     'annotator_non_religious',
     'annotator_women',
     'annotator_queer']].corr(method='spearman')

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(10, 10))
img = ax.imshow(corrs, vmin=0, vmax=1)
cb, cax = mplego.colorbar.append_colorbar_to_axis(ax, img, spacing=0.025)
ax.set_xticks(np.arange(5))
ax.set_xticklabels(['Black', 'Liberal', 'Non-religious', 'Women', 'Queer'], fontsize=18)
ax.set_yticks(np.arange(5))
ax.set_yticklabels(['Black', 'Liberal', 'Non-religious', 'Women', 'Queer'], fontsize=18)
mplego.labels.bold_axis_ticklabels(ax, which='both')

cb.set_ticks([0, 0.25, 0.50, 0.75, 1])
cb.ax.set_ylim([0, 1])
cb.ax.tick_params(labelsize=20)
cb.set_label(mplego.labels.bold_text('Spearman Correlation'), rotation=270, labelpad=25, fontsize=20)
plt.savefig('figureA3.pdf', bbox_inches='tight')