In [1]:
import pandas as pd
import sys
import json
import numpy as np

In [39]:
test_file = '../../../data/squad/test-qar_squad_all.jsonl'

In [40]:
span_keys = [
    'answers_sentence_ir',
    'answers_sentence_bleu2',
    'answers_sentence_bleu4',
    'answers_snippet_spans_bleu2',
    'answers_snippet_spans_bleu4',
    'answers_snippet_spans_rouge',
]

def get_questions(filename):
    questions = {}
    with open(filename, 'r', encoding='utf-8') as fp_inp:
        for line in fp_inp:
            try:
                qar = json.loads(line)
            except json.JSONDecodeError:
                raise Exception('\"%s\" is not a valid json' % line)
            question = {}
            question['context'] = qar['context']
            qar = qar['qas'][0]
            for key in span_keys:
                question[key] = [a['text'] for a in qar[key] if a['text'] != '']
            question['question'] = qar['question']
            question['answers'] = qar['human_answers']
            question['qid'] = qar['id']
            questions[qar['id']] = question
        return questions

data = get_questions(test_file)

In [41]:
len(data.keys())

59527

In [42]:
np.random.seed = 1

In [43]:
ids = np.random.permutation(list(data.keys()))[:400]

In [44]:
entries = []
for key in ids:
    entries.append(data[key])

In [45]:
df = pd.DataFrame(entries)[['qid', 'question', 'context'] + span_keys + ['answers']]
def enum_value(l):
    return '\n'.join(['%d) %s' % (i + 1, val) for i, val in enumerate(l)])

for key in (span_keys + ['answers']):
    df[key] = df[key].apply(lambda x: enum_value(x))

In [46]:
df.head()

Unnamed: 0,qid,question,context,answers_sentence_ir,answers_sentence_bleu2,answers_sentence_bleu4,answers_snippet_spans_bleu2,answers_snippet_spans_bleu4,answers_snippet_spans_rouge,answers
0,65093,Does it works for HTC sensation battery?,I bought this for my HTC Sensation battery and...,1) It works with my HTC Sensation Z710e batter...,"1) The battery overhangs a bit, but works grea...","1) The battery overhangs a bit, but works grea...",1) it still charged fine. The battery overhang...,1) it still charged fine. The battery overhang...,1) quickly and it works like a charm. Charges ...,"1) Yes, the one I ordered works great. It's qu..."
1,70733,"Can you use this to make almond flour, or othe...",My wife is Gluten intolerant and likes to expe...,1) After one use I sent this back and replaced...,1) Such is life.\n2) This grinder is fantastic...,1) Such is life.\n2) This grinder is fantastic...,1) My wife is Gluten intolerant and likes to e...,1) My wife is Gluten intolerant and likes to e...,1) My wife is Gluten intolerant and likes to e...,1) No\n2) Yes\n3) No\n4) Yes
2,40475,"With 2 $700 S & W guns plus 6 mags, just what ...",since i own a S&W M&P Shield i figured what be...,"1) Well made, holds 2 guns and 6 clips.\n2) 2 ...",1) perfect little bag to take to the range!\n2...,1) I preffer to put both of my single cases an...,1) the bare minimum of packaging to the range ...,1) single case cause I only use 2 of my guns\n...,1) a comparison to the almost identicle in loo...,1) I suppose it depends on what you mean by da...
3,33830,What type of plastic are these straws made of?,"Be aware, these straws are not the clear acryl...","1) These are made of a softer plastic, but are...","1) Be aware, these straws are not the clear ac...","1) Be aware, these straws are not the clear ac...",1) acrylic type you sometimes see. These are m...,1) acrylic type you sometimes see. These are m...,"1) Be aware, these straws are not the clear ac...",1) It doesn't say on the package. There isn't ...
4,39226,Does it come with an echelon? That is the opti...,Super heavy faucet with a beautiful finish. Co...,1) Comes with a plate that you can use if you ...,1) Neither stopper will hold water in the bott...,1) The quality of the Kohler brand can't be be...,1) Neither stopper will hold water in the bott...,1) Neither stopper will hold water in the bott...,"1) contemporary style. This one is perfect, I ...",1) It does come with what I call a plate. Thi...


In [48]:
df.to_csv('/home/ubuntu/capstone/src/evaluation/span_analysis/span_annotations_new.csv', index=False)