# Notebook for linking question annotations with answer annotations

Assumes that all the annotations were already created by `Public - Document Annotations`

In [304]:
import sys
import json
import pandas as pd
import os

DIR = ''
DATA_DIR = '{}/data'.format(DIR)

In [305]:
with open('{}/auto_PAWLS_SPUI_annotations.json'.format(DIR), 'r') as f:
    sle_annotations = json.load(f)
    
    

In [306]:
sle_annotations

list(filter(lambda x: x['type'] == 'answerSentence', sle_annotations))


[{'id': '227',
  'type': 'answerSentence',
  'attributes': {'bounding_boxes': [{'page': 1,
     'left': 0.07386836306237274,
     'top': 0.5271002556378284,
     'width': 0.42122310480450376,
     'height': 0.15559889681378758}],
   'Name': '987bb16f-331a-4d47-ac5f-32f655c1ff14',
   'text': 'Systemic lupus erythematosus (SLE) is the prototypical auto- activated in SLE, and this justifies the use of medications immune connective tissue disease, affecting 5 million indivi- like steroids, immunosuppressants and disease-modifying duals worldwide, mainly women during the fertile age [1]. anti-rheumatic drugs (DMARDs), which unselectively coun- Clinical presentation broadly varies from patient to patient, teract the immune response. Such a combo-therapy can with kidney and central nervous system (CNS) involvement indeed have many summing side effects that can be further representing the most severe complications [2]. The disease exacerbated by SLE-related organ failure, coagulopathy, or has 

# Link answers to their question

In [307]:
# because we never copy the dicts, all the updates on 'questions' and 'answer' are 
# reflected in the original annotations 
    
# same goes for this function, no need to return anything!
def link_q_a(annotations):
    questions = list(filter(lambda x: x['type'] == 'question', annotations))
    answers = list(filter(lambda x: x['type'] == 'answerSentence', annotations))
    
    for q in questions:
        linked_answers = [s['id'] for s in q['relationships']['definition_sentences']]
        linked_answers = list(filter(lambda x: x['id'] in linked_answers, answers))
        
        for a in linked_answers:
            a['relationships']['question']['id'] = q['id']

In [308]:
link_q_a(sle_annotations)


In [309]:
with open('{}/auto_PAWLS_SPUI_annotations.json'.format(DIR), 'w') as f:
    json.dump(sle_annotations, f)


# Link multiple answers together

In [311]:
# start by going through and adding more details, then link the less details with the more details

SLE_COASTERS = [['226', '241', '240'], ['233', '237']]

def link_coasters(coasters, annotations):
    answers = list(filter(lambda x: x['type'] == 'answerSentence', annotations))

    for coast in coasters:
        
        linked_answers = list(filter(lambda x: x['id'] in coast, answers))
        
        # to keep coaster ordering
        order = {c: i for i, c in enumerate(coast)}
        linked_answers.sort(key=lambda val: order[val['id']])

        for i, a in enumerate(linked_answers):
            # add more details
            if i < len(linked_answers)-1:
                next_a_id = linked_answers[i+1]['id']
                a['relationships']['more_details'] = {'type':'answerSentence', 'id':next_a_id}
            # add less details
            if i > 0:
                prev_a_id = linked_answers[i-1]['id']
                a['relationships']['less_details'] = {'type':'answerSentence', 'id':prev_a_id}
            # for every answer, also save the full coaster
            a['relationships']['coaster'] = [{'type':'answerSentence', 'id':a_id} for a_id in coast]
            
            
            
link_coasters(SLE_COASTERS, sle_annotations)


In [312]:
with open('{}/auto_PAWLS_SPUI_annotations.json'.format(DIR), 'w') as f:
    json.dump(sle_annotations, f)
