In [None]:
import datasets 
import matplotlib.pyplot as plt
import mpl_lego as mplego
import numpy as np
import simpledorff
import pandas as pd
import pickle

from mpl_lego.colorbar import append_colorbar_to_axis
from mpl_lego.labels import bold_text
from hate_measure.keys import items, item_labels
from hate_measure.utils import recode_responses
from pyprojroot import here
from scipy.stats import bootstrap

%matplotlib inline

In [None]:
mplego.style.use_latex_style()

In [None]:
n_items = len(item_labels)

In [None]:
dataset = datasets.load_dataset('ucberkeley-dlab/measuring-hate-speech', 'binary')   
data = dataset['train'].to_pandas()

In [None]:
with open(here('data/krippendorff_items.pkl'), 'rb') as file:
    krippendorffs = pickle.load(file)
    
with open(here('data/krippendorff_items_recoded.pkl'), 'rb') as file:
    krippendorffs_recoded = pickle.load(file)

In [None]:
y_mean = (krippendorffs['confidence_low'] + krippendorffs['confidence_high']) / 2.
y_err = np.vstack(
    (y_mean - krippendorffs['confidence_low'],
     krippendorffs['confidence_high'] - y_mean))

In [None]:
y_mean_recoded = (krippendorffs_recoded['confidence_low'] + krippendorffs_recoded['confidence_high']) / 2.
y_err_recoded = np.vstack(
    (y_mean_recoded - krippendorffs_recoded['confidence_low'],
     krippendorffs_recoded['confidence_high'] - y_mean_recoded))

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(5, 10))

height = 0.8 / 2

ax.barh(y=np.arange(n_items) + width / 2,
        width=np.flip(y_mean),
        height=height,
        xerr=np.flip(y_err),
        color='gainsboro',
        edgecolor='black',
        error_kw={'capsize': 3},
        label='Original Labels')

ax.barh(y=np.arange(n_items) - width / 2,
       width=np.flip(y_mean_recoded),
       height=height,
       xerr=np.flip(y_err_recoded),
       color='slategray',
       edgecolor='black',
       error_kw={'capsize': 3},
       label='Recoded Labels')

ax.set_yticks(np.flip(np.arange(n_items)))
ax.set_yticklabels(bold_text(item_labels), ha='right')
ax.set_xlim([0, 1])
# ax.set_xticks([0, 0.25, 0.5, 0.75, 1.0])
ax.tick_params(labelsize=13)
ax.grid(axis='x')
ax.set_axisbelow(True)
ax.set_xlabel(bold_text(r"Krippendorff's $\alpha$"), fontsize=18)
ax.set_ylabel(bold_text("Survey Item"), fontsize=19)

ax.legend(loc='best', prop={'size': 14})

plt.savefig('figure3.pdf', bbox_inches='tight')