In [None]:
import datasets 
import matplotlib.pyplot as plt
import mpl_lego as mplego
import numpy as np

from mpl_lego.colorbar import append_colorbar_to_axis
from mpl_lego.labels import bold_text
from hate_measure.keys import items, item_labels

%matplotlib inline

In [None]:
mplego.style.use_latex_style()

In [None]:
dataset = datasets.load_dataset('ucberkeley-dlab/measuring-hate-speech', 'binary')   
data = dataset['train'].to_pandas()

In [None]:
avg_items = data.groupby('comment_id')[items].mean().copy()
item_corr = avg_items.corr(method='spearman')
item_labels[1] = '(Dis)respect'
n_items = len(item_labels)

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5), gridspec_kw={'width_ratios': [1, 0.7]})

plt.subplots_adjust(wspace=0.4)

violins = axes[0].violinplot(
    dataset=[avg_items[col] / np.max(avg_items[col]) for col in avg_items.columns],
    positions=np.flip(np.arange(n_items)),
    vert=False,
    bw_method=0.35,
    showmedians=True,
    widths=0.65)

for pc in violins['bodies']:
    pc.set_facecolor('gray')
    pc.set_edgecolor('black')
    pc.set_linewidth(1.5)
violins['cmedians'].set_edgecolor('red')
violins['cmedians'].set_linewidth(2)
violins['cbars'].set_edgecolor('black')
violins['cbars'].set_linewidth(1)
violins['cmins'].set_edgecolor('black')
violins['cmins'].set_linewidth(1.5)
violins['cmaxes'].set_edgecolor('black')
violins['cmaxes'].set_linewidth(1.5)

axes[0].set_xlim([-0.02, 1.02])
axes[0].set_xticks([0, 0.25, 0.50, 0.75, 1.0])
axes[0].set_yticks(np.flip(np.arange(n_items)))
axes[0].set_yticklabels(bold_text(item_labels), ha='right')
axes[0].tick_params(labelsize=13)
axes[0].set_xlabel(bold_text('Average Normalized Score'), fontsize=16)

masked = np.ma.array(item_corr, mask=np.triu(item_corr, k=0))
img = axes[1].imshow(masked, vmin=0, vmax=1, interpolation=None, cmap='Greys')
cb, cax = append_colorbar_to_axis(axes[1], img)
cax.tick_params(labelsize=13)
cb.set_ticks([0, 0.25, 0.5, 0.75, 1.0])
cb.set_label(bold_text('Spearman Correlation'), fontsize=15, rotation=270, labelpad=20)
axes[1].set_xlim([-0.5, 8.5])
axes[1].set_yticks(1 + np.arange(n_items - 1))
axes[1].set_yticklabels(bold_text(item_labels[1:]), ha='right')

axes[1].set_xticks(np.arange(n_items - 1))
axes[1].set_xticklabels(bold_text(item_labels[:-1]), ha='right', rotation=30)


axes[1].set_ylim([9.5, 0.5])
axes[1].tick_params(labelsize=13)

for spine in axes[1].spines.values():
    spine.set_visible(False)

mplego.labels.apply_subplot_labels(axes, bold=True, x=-0.05, y=1.07, fontsize=22)
plt.savefig('figure1.pdf', bbox_inches='tight')
plt.show()