In [None]:
import matplotlib.pyplot as plt
import numpy as np
from analysis import *

In [None]:
surge_annotations = data.surge_evaluation.annotation_dataframe()
surge_annotations_comparative = data.surge_evaluation.comparative_annotation_dataframe()

surge_annotations

# 6 Evaluation


### Worker Group Completed Work


In [None]:
data.surge_evaluation.annotation_counts()

In [None]:
data.student_evaluation.annotation_counts()

In [None]:
data.mturk_evaluation.annotation_counts()

### Worker Group Screening


In [None]:
screening = across_evaluations(
    [data.annotation_pilots_onboarding[-2], data.student_onboarding, data.mturk_onboarding, data.annotation_pilots_onboarding[-1], data.surge_onboarding],
    screening_rates_by_label,
    reload='results/evaluation_screening'
)

### Agreements


In [None]:
agreements = agreement_dataframe(
    surge_annotations, reload='results/surge_agreements'
)
agreements = prettify(agreements, float_prec=3, sort_by=["category", "Krippendorff's alpha"], col_types={"n": int}, to_csv='results/paper/surge_agreements', index=False)
agreements

In [None]:
# Build the plot
plt.rcParams["figure.figsize"] = (10,5)

fig, ax = plt.subplots()

def plot_by_category(ax, df, category, color, xaxis_start):
    extracted = df[df["category"] == category]
    lower_bound = extracted["Krippendorff's alpha"] - extracted["CI low"]
    upper_bound = extracted["CI high"] - extracted["Krippendorff's alpha"]
    xaxis_end = xaxis_start + len(extracted)
    ax.errorbar(np.arange(xaxis_start, xaxis_end),
                extracted["Krippendorff's alpha"],
                yerr=[lower_bound, upper_bound],
                fmt='o',
                elinewidth=1,
                color=color)
    return xaxis_end

likert_turn_color = "blue"
likert_dialogue_color = "red"
comparative_color = "green"
behavior_color = "orange"

krip_agreements = agreements.iloc[: , :-4]
krip_agreements = krip_agreements.reset_index()
likert_dialogue_start = plot_by_category(ax, krip_agreements, "likert turn", likert_turn_color, 0)
comparative_start = plot_by_category(ax, krip_agreements, "likert dialogue", likert_dialogue_color, likert_dialogue_start)
behavior_start = plot_by_category(ax, krip_agreements, "comparative", comparative_color, comparative_start)
misc_start = plot_by_category(ax, krip_agreements, "behavior", behavior_color, behavior_start)

category_range = {likert_dialogue_start: likert_turn_color, comparative_start: likert_dialogue_color, behavior_start: comparative_color, misc_start: behavior_color}
xaxis_colors = {}
prev_idx = 0
for idx, color in category_range.items():
    for i in range(prev_idx, idx):
        xaxis_colors[i] = color
    prev_idx = idx

ax.set_ylabel("Krippendorf's alpha")
xpos = np.arange(len(krip_agreements))
ax.set_xlabel("Evaluation Label")
ax.set_xticks(xpos)
ax.set_xticklabels(krip_agreements["label"], rotation=90)
for tickloc, ticklabel in zip(plt.gca().get_xticks(), plt.gca().get_xticklabels()):
    ticklabel.set_color(xaxis_colors[tickloc])
ax.set_title('Interannotator Agreement')
ax.yaxis.grid(True)

# Save the figure and show
plt.tight_layout()
plt.show()