In [1]:
from pathlib import Path

while Path.cwd().name != 'ambient':
    %cd ..

/mmfs1/gscratch/xlab/alisaliu/ambient/notebooks
/mmfs1/gscratch/xlab/alisaliu/ambient


In [4]:
import pandas as pd
from mturk.annotation_utils import read_batch, time_format, clean_validation_batch
from collections import defaultdict
from utils.constants import id2label, NLI_LABELS
from statsmodels.stats.inter_rater import fleiss_kappa, aggregate_raters

In [5]:
batch_df = read_batch(369686)

In [6]:
'''
Zhaofeng: A2AX828Q4WXK3Z
Julian: A3AA2VKV87R6PG
Alane: A14KPHOYAQCFWH
Alisa: A1KBELVHWNE4D5
'''

batch_df.worker_id.value_counts()

A14KPHOYAQCFWH    50
A3AA2VKV87R6PG    50
A1KBELVHWNE4D5    50
A2AX828Q4WXK3Z    50
Name: worker_id, dtype: int64

In [7]:
results = []
for i, example_df in batch_df.groupby('id'):
    dummy_row = example_df.iloc[0]
    premise, hypothesis = dummy_row['premise'], dummy_row['hypothesis']
    annotations = example_df.q0_gold.tolist()
    results.append({
        'premise': premise,
        'hypothesis': hypothesis,
        'annotations': annotations
    })

In [8]:
pd.DataFrame(results).to_json('annotation/validation/batches/batch_369686/annotations.jsonl', orient='records', lines=True)

In [9]:
processed_examples = []
for i, example_df in batch_df.groupby('id'):
    dummy_row = example_df.iloc[0]
    annotations = example_df['q0_gold'].tolist()
    rewrites = defaultdict(list)
    for _, row in example_df.iterrows():
        for i in range(1, 5):
            if f'q{i}_gold' in row and row[f'q{i}_gold'] != 'nan':
                label = row[f'q{i}_gold']
                rewrites[label].append({
                    'premise': row[f'premise{i}'],
                    'hypothesis': row[f'hypothesis{i}']
                })
    processed_examples.append({
        'premise': dummy_row['premise'],
        'hypothesis': dummy_row['hypothesis'],
        'annotations': annotations,
        'rewrites': rewrites,
        'comments': example_df['feedback'].tolist()
    })

In [68]:
pd.DataFrame(processed_examples).to_json('annotation/validation/batches/batch_369686/annotations.json', indent=2, orient='index')

In [10]:
processed_df = pd.DataFrame(processed_examples)

In [12]:
def get_biggest_annotation(annotations):
    """
    get annotation with the most labels
    """
    def get_length(annotation):
        return len(annotation.split('|'))

    biggest_annotation = ''
    for annot in annotations:
        if get_length(annot) > get_length(biggest_annotation):
            biggest_annotation = annot
    return biggest_annotation

In [13]:
exact_match_ct = 0
union_ct = 0
tot = 0
discards = 0
discards_individual = 0
annotations = []

for i, row in processed_df.iterrows():
    if 'discard' in row['annotations']:
        discards_individual += row['annotations'].count('discard')
        discards += 1
        continue
    tot += 1
    annotations.append(row['annotations'])
    unique_annotations = set(row['annotations']).difference({'discard'})
    labels_union = set('|'.join(unique_annotations).split('|'))
    biggest_label = get_biggest_annotation(unique_annotations)
    if len(unique_annotations) == 1:
        exact_match_ct += 1
    if labels_union.issubset(set(biggest_label.split('|'))):
        union_ct += 1

In [14]:
union_ct/tot

0.6486486486486487

In [15]:
exact_match_ct/tot

0.40540540540540543

In [16]:
arr = aggregate_raters(annotations)[0]
fleiss_kappa(arr)

0.419607843137255

In [17]:
binary_annotations = {}
for label in NLI_LABELS:
    label_annotations = []
    for ex_annotations in annotations:
        ex_annotation = []
        for annotation in ex_annotations:
            if label in annotation:
                ex_annotation.append(1)
            else:
                ex_annotation.append(0)
        label_annotations.append(ex_annotation)
    binary_annotations[label] = label_annotations

In [18]:
for label in NLI_LABELS:
    arr = aggregate_raters(binary_annotations[label])[0]
    print(f'{label}: {fleiss_kappa(arr)}')

contradiction: 0.6170312867560573
entailment: 0.645402815214136
neutral: 0.44206349206349227
