In [128]:
import csv
import json
csv.field_size_limit(1131072)

1131072

In [129]:
def load_annotation(path):
    """
    path: Path to the annotation file

    return: dictionary of annotation {}
        key: The first sentence
        value: annotations
    """
    output = {}
    with open(path, 'r') as f:
        annotation_reader = csv.DictReader(f)
        for instance in annotation_reader:
            try:
                input = json.loads(instance['Input.json_data'])
            except:
                continue
            key = " ".join(input['sentences'][0][:10])
            anno = json.loads(instance['Answer.answer_spans'])
            if key not in output:
                output[key] = [anno]
            else:
                output[key].append(anno)
    return output

In [181]:
def mention_inter_agreement(annotation, reference):
    """
    Calculate the inter-agreement of mention between an annotation and the reference

    annotation
    reference

    return: accuracy of mention
    """
    # collect valid mentions into a set
    anno_mentions = set()
    refer_mentions = set()

    for query in annotation:
        if query['notMention']:
            continue
        queryMention = query['querySpan']
        anno_mentions.add((queryMention['sentenceIndex'], queryMention['startToken'], queryMention['endToken']))
    for query in reference:
        if query['notMention']:
            continue
        queryMention = query['querySpan']
        refer_mentions.add((queryMention['sentenceIndex'], queryMention['startToken'], queryMention['endToken']))

    inter = anno_mentions & refer_mentions
    accuracy = len(inter)/len(refer_mentions)

    return accuracy

In [158]:
mturk = load_annotation('data/mturk.csv')
turkle = load_annotation('data/turkle.csv')

In [184]:
# annotation
for x in mturk:
    if x not in turkle:
        continue
    reference = turkle[x][0]
    for annotation in mturk[x]:
        accuracy = mention_inter_agreement(annotation, reference)
        print(accuracy)

0.853448275862069
0.6637931034482759
0.98989898989899
1.0
0.8888888888888888
