# Merge Assessor Annotations

Annotation occurs at the span level, and we want to map spans to the facts they overlap. 

We produce a file containing all the facts and related spans.

In [1]:
import collections
import glob 
import json
import pandas as pd

In [2]:
with open("collapsed-event-days-summaries.k=512.meta.json", "r") as in_file:
    summary_metadata = json.load(in_file)

In [3]:
req_fact_span_map = {}

for k,v in summary_metadata.items():

    this_meta = v["meta"]
    loc_to_fact_map = {}

    for f_id,(start,end) in zip(this_meta["spans_facts"],this_meta["spans_list"]):
        for i in range(start,end):
            loc_to_fact_map[i] = (f_id, start, end)
            
    req_fact_span_map[k] = loc_to_fact_map


In [4]:
with open("merged-annotations.json", "r") as in_file:
    annotation_data = json.load(in_file)

In [5]:
annotators = {
    "redacted_user_id_01" : "assr_01",
    "redacted_user_id_02" : "assr_02",
    "redacted_user_id_03" : "assr_03",
    "redacted_user_id_04" : "assr_04",
    "redacted_user_id_05" : "assr_05",
    "redacted_user_id_06" : "assr_06",
}

In [6]:
for req_id,req_data in annotation_data.items():
    print(req_id)
    
    this_fact_span_map = req_fact_span_map[req_id]
    
    for span in req_data["spans"]:
        this_span_fact_set = set()
        for i in range(span["start"],span["end"]):
            this_span_fact_set.add(this_fact_span_map.get(i, (None,))[0])
            
        span["facts"] = list(this_span_fact_set)
        span["uid"] = annotators.get(span["uid"])

    this_fact_list = [(f,s["uid"],s["label"]) for s in sorted(req_data["spans"], key=lambda d: d["start"]) for f in s["facts"]]
    req_data["fact_list"] = this_fact_list

CrisisFACTS-003-r5
CrisisFACTS-003-r6
CrisisFACTS-003-r7
CrisisFACTS-003-r8
CrisisFACTS-003-r9
CrisisFACTS-003-r10
CrisisFACTS-003-r11
CrisisFACTS-004-r8
CrisisFACTS-004-r9
CrisisFACTS-004-r10
CrisisFACTS-004-r11
CrisisFACTS-004-r12
CrisisFACTS-004-r13
CrisisFACTS-004-r14
CrisisFACTS-004-r15
CrisisFACTS-004-r16
CrisisFACTS-004-r17
CrisisFACTS-004-r18
CrisisFACTS-004-r19
CrisisFACTS-004-r20
CrisisFACTS-004-r21
CrisisFACTS-004-r22
CrisisFACTS-006-r4
CrisisFACTS-006-r5
CrisisFACTS-006-r6
CrisisFACTS-006-r7
CrisisFACTS-007-r13
CrisisFACTS-007-r14
CrisisFACTS-013-r0
CrisisFACTS-013-r1
CrisisFACTS-013-r2
CrisisFACTS-013-r3
CrisisFACTS-013-r4
CrisisFACTS-013-r5
CrisisFACTS-014-r0
CrisisFACTS-014-r1
CrisisFACTS-014-r2
CrisisFACTS-014-r3
CrisisFACTS-014-r4
CrisisFACTS-014-r5
CrisisFACTS-014-r6
CrisisFACTS-001-r3
CrisisFACTS-001-r4
CrisisFACTS-001-r5
CrisisFACTS-001-r6
CrisisFACTS-001-r7
CrisisFACTS-001-r8
CrisisFACTS-001-r9
CrisisFACTS-001-r10
CrisisFACTS-001-r11
CrisisFACTS-011-r0
CrisisFACTS-

In [8]:
with open("final-annotated-facts-results.json", "w") as out_file:
    json.dump(annotation_data, out_file)