In [None]:
import pandas as pd
from analysis import *

In [None]:
surge_annotations = data.surge_evaluation.annotation_dataframe()
seeds = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
for s, b in zip(seeds, behavior):
    print(get_example(
        data.surge_evaluation,
        category.behavior, b, context=0, mark=1,
        seed=s,
        annotations=surge_annotations
    ))

### Pilot Filtering

In [None]:
from copy import deepcopy
def filter_pilot(pilot, qualifications):
    pilot_copy = deepcopy(pilot)
    work_units_to_keep = set()
    for work_unit_id, work_unit in pilot.work_units.items():
        if 'likert' not in work_unit.task and 'comparative' not in work_unit.task:
            if qualifications[work_unit.worker_id][work_unit.task]:
                work_units_to_keep.add(work_unit_id)

    for dialogue_id, dialogue in pilot.dialogues.items():
        for turn_idx, turn in enumerate(dialogue.turns):
            for label, annotations in turn.behavior_annotations.items():
                annotations_to_keep = [annot for annot in annotations if annot.work_unit_id in work_units_to_keep]
                pilot_copy.dialogues[dialogue_id].turns[turn_idx].behavior_annotations[label] = annotations_to_keep

    work_unit_objects_to_keep = {wid: u for wid, u in pilot.work_units.items() if wid in work_units_to_keep}
    pilot_copy.work_units = work_unit_objects_to_keep

    return pilot_copy

In [None]:
def get_task(dialogue_id):
    if 'personal_information' in dialogue_id:
        task = 'personal_information'
    elif 'grammar' in dialogue_id:
        task = 'interpretability'
    else:
        task = dialogue_id[:dialogue_id.find('_')]
    return task

In [None]:
# Remove non-Phd+ workers from pilots 0 and 1
phd_plus = ["liyan", "zihao", "jinho", "han", "greg", "sichang"]
under = ["sophy", "jessica", "samir", "angela", "chen", "dan"]

tasks_pilot0 = {u.task for u in data.annotation_pilots[0].work_units.values()}
qualifications_pilot0 = {w: {t: True for t in tasks_pilot0} for w in phd_plus}
qualifications_pilot0.update({w: {t: False for t in tasks_pilot0} for w in under})
grad_filtered_pilot0 = filter_pilot(data.annotation_pilots[0], qualifications_pilot0)

tasks_pilot1 = {u.task for u in data.annotation_pilots[1].work_units.values()}
qualifications_pilot1 = {w: {t: True for t in tasks_pilot1} for w in phd_plus}
qualifications_pilot1.update({w: {t: False for t in tasks_pilot1} for w in under})
grad_filtered_pilot1 = filter_pilot(data.annotation_pilots[1], qualifications_pilot1)

In [None]:
grad_pilots = [grad_filtered_pilot0, grad_filtered_pilot1, *data.annotation_pilots[2:]]

In [None]:
qualifications_pilot0 = {w: {t: True for t in tasks_pilot0} for w in under}
qualifications_pilot0.update({w: {t: False for t in tasks_pilot0} for w in phd_plus})
undergrad_filtered_pilot0 = filter_pilot(data.annotation_pilots[0], qualifications_pilot0)

qualifications_pilot1 = {w: {t: True for t in tasks_pilot1} for w in under}
qualifications_pilot1.update({w: {t: False for t in tasks_pilot1} for w in phd_plus})
undergrad_filtered_pilot1 = filter_pilot(data.annotation_pilots[1], qualifications_pilot1)

In [None]:
undergrad_pilots = [undergrad_filtered_pilot0, undergrad_filtered_pilot1, *data.annotation_pilots[2:]]

# 3 Behavior Evaluation Procedure

### Behavior Examples

In [None]:
seeds = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
for s, b in zip(seeds, behavior):
    print(get_example(
        data.surge_evaluation,
        category.behavior, b, context=0, mark=1,
        seed=s,
        annotations=surge_annotations
    ))

### Behavior Annotation Pilot Agreements

In [None]:
@to_file
def agreement_summaries(evaluations):
    summaries = []
    for evaluation in evaluations:
        annotations = evaluation.annotation_dataframe()
        agreement = agreement_dataframe(annotations, ci=False)
        macros = agreement.dropna().mean()
        summaries.append(macros)
    sum_df = pd.concat(summaries, axis=1).transpose()
    sum_df.set_axis(
        [stat.kripp_alpha, stat.n],
        inplace=True, axis=1
    )
    # sum_df.drop('x', axis=1, inplace=True)
    return sum_df

In [None]:
# undergraduates
distributions(undergrad_pilots)

In [None]:
agreement_summaries(undergrad_pilots)

In [None]:
# graduates
distributions(grad_pilots)

In [None]:
agreement_summaries(grad_pilots)

In [None]:
# all
distributions(data.annotation_pilots)

In [None]:
# todo - include ALL pilot annotations in agreement calculation (not just double annotation)
agreement_summaries(data.annotation_pilots)

### Behavior Annotation Pilot Screening

In [None]:
across_evaluations(
    data.annotation_pilots_onboarding[2:5],
    screening_rates_by_label,
    load='results/annotation_pilot_screening'
)