In [None]:
import matplotlib.pyplot as plt
import mpl_lego as mplego
import itertools
import krippendorff
import numpy as np
import pandas as pd
import seaborn as sns

from mpl_lego.labels import bold_text
from pyprojroot import here

from normative_evaluation_llms_everyday_dilemmas import keys, utils

In [None]:
# Turn into LaTeX style; you need to have LaTeX installed
mplego.style.use_latex_style()

In [None]:
df = pd.read_csv(here('data/normative_evaluation_everyday_dilemmas_dataset.csv'))

In [None]:
# Calculate agreement metrics
self_agreements = {}

# Iterate over models
for model in keys.MODELS:
    # Extract the label columns
    columns = df.filter(regex=f'^{model}_label_.$')
    # Return 0 for missing columns (only for Redditors)
    if columns.shape[1] <= 1:
        self_agreements[model] = 0
    else:
        # Calculate Krippendorff's alpha using weighted nominal measurement
        self_agreements[model] = krippendorff.alpha(utils.label_to_num(columns).values.T,
                                                    level_of_measurement='nominal')

In [None]:
# For reporting values
self_agreements

In [None]:
# Create matrix of agreement values
alphas = pd.DataFrame(data=0,
                      index=keys.MODELS,
                      columns=keys.MODELS)

for model1, model2 in itertools.combinations(keys.MODELS, 2):
    col1 = keys.models_to_labels[model1]
    col2 = keys.models_to_labels[model2]
    alpha = krippendorff.alpha(utils.label_to_num(df[[col1, col2]]).values.T)
    alphas.loc[model1, model2] = alpha
    alphas.loc[model2, model1] = alpha

In [None]:
# Place self-agreements on the diagonal
for model in keys.MODELS:
    alphas.loc[model, model] = self_agreements[model]

In [None]:
alphas.loc['reddit', 'reddit'] = pd.NA

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(6, 5))

CBAR_LABEL_PAD = 15
CBAR_TICK_FONTSIZE = 9
XAXIS_TICK_ROTATION = 30
XAXIS_TICK_FONTSIZE = 12
YAXIS_TICK_FONTSIZE = 12

# Create heatmap using seaborn
sns.heatmap(alphas,
            cmap='RdGy',
            vmin=-1,
            vmax=1,
            annot=True,
            cbar_kws={'label': "Krippendorff's Alpha"},
            ax=ax)

# Colorbar
cbar = ax.collections[0].colorbar
cbar.set_label(bold_text("Krippendorff's Alpha"),
               rotation=270,
               labelpad=CBAR_LABEL_PAD)
cbar.ax.tick_params(labelsize=CBAR_TICK_FONTSIZE)

# x-axis
ax.set_xticklabels(bold_text(keys.MODEL_LABELS_PLOT),
                   rotation=XAXIS_TICK_ROTATION,
                   fontsize=XAXIS_TICK_FONTSIZE,
                   ha='right')
# y-axis
ax.set_yticklabels(bold_text(keys.MODEL_LABELS_PLOT),
                   fontsize=YAXIS_TICK_FONTSIZE,
                   ha='right',
                   rotation=0)

plt.savefig('fig2_agreement.pdf', bbox_inches='tight')