In [1]:
# Load in necessary packages and functions/classes
import jsonlines
import json
import numpy as np
import pandas as pd
import re

from show_prediction import create_short_answer, create_long_answer, get_span_from_token_offsets, ShowPrediction
from google.cloud import storage
from google.cloud.storage import Blob

In [2]:
# Define parameters needed
INPUT_FILE = '../input/nq-dev-all.jsonl'
PREDICTIONS_FILE = '../output/nq-dev-ensemble-predictions.jsonl'   #'../output/predictions-dev-sample.jsonl'
REMOVE_HTML = True    # whether or not to include HTML tags
SAMPLE_NUM = 10      

# Optional parameters to explore single examples
QAS_ID = 3221262508309669486
START_TOKEN_LA = 1577
END_TOKEN_LA = 1673
START_TOKEN_SA = 1607
END_TOKEN_SA = 1609

In [4]:
# Load in predictions.json to data frame
pred_answers_df = pd.read_json(PREDICTIONS_FILE)
for var_name in ['long_answer_score', 'short_answers_score']:
    pred_answers_df[var_name] = pred_answers_df['predictions'].apply(lambda q: q[var_name])
pred_answers_df["long_answer"] = pred_answers_df["predictions"].apply(create_long_answer)
pred_answers_df["short_answer"] = pred_answers_df["predictions"].apply(create_short_answer)
pred_answers_df["example_id"] = pred_answers_df["predictions"].apply(lambda q: str(q["example_id"]))
pred_answers_df

Unnamed: 0,predictions,long_answer_score,short_answers_score,long_answer,short_answer,example_id
0,"{'example_id': 3531668162728794161, 'long_answ...",0.366470,0.366325,576:773,706:707,3531668162728794161
1,"{'example_id': 8933237987316701470, 'long_answ...",0.052687,0.047039,320:409,321:327,8933237987316701470
2,"{'example_id': 8477727800446299328, 'long_answ...",2.061583,1.911145,284:399,304:317,8477727800446299328
3,"{'example_id': 4610181708280547488, 'long_answ...",0.092611,0.092611,140:207,141:145,4610181708280547488
4,"{'example_id': -8081431644783557175, 'long_ans...",2.899866,2.899866,191:288,240:241,-8081431644783557175
...,...,...,...,...,...,...
7825,"{'example_id': -2384219425712559173, 'long_ans...",0.008852,0.008852,79:215,80:94,-2384219425712559173
7826,"{'example_id': -1342189058950802702, 'long_ans...",2.899709,2.899680,199:299,266:268,-1342189058950802702
7827,"{'example_id': -1570872939061821343, 'long_ans...",2.899867,2.899867,60:651,398:399,-1570872939061821343
7828,"{'example_id': -1770082893868912092, 'long_ans...",0.514848,0.500787,71:655,72:74,-1770082893868912092


In [5]:
%%time
# Define a predictions class object
show_pred = ShowPrediction(INPUT_FILE)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7830/7830 [03:55<00:00, 33.23it/s]


Wall time: 4min 26s


In [9]:
# Display predictions for random samples
for pred in np.random.choice(pred_answers_df['predictions'], SAMPLE_NUM):
    print(json.dumps(show_pred(pred, include_full_text=False, remove_html=True), indent=4))

{
    "question": "who is the main character in black midas",
    "long_answer": "Jan Rynveld Carew ( 24 September 1920 -- 6 December 2012 ) was a Guyana - born novelist , playwright , poet and educator , who lived at various times in The Netherlands , Mexico , England , France , Spain , Ghana , Jamaica , Canada and the United States . His works , diverse in form and multifaceted , make Jan Carew an important intellectual of the Caribbean world . His poetry and his first two novels , Black Midas and The Wild Coast ( both published in 1958 by Secker & Warburg in London ) , were significant landmarks of the West Indian literature then attempting to cope with its colonial past and assert its wish for autonomy . He worked with the late President Cheddi Jagan in the fight for Guianese independence . Carew also played an important part in the Black movement gaining strength in England and North America , publishing reviews and newspapers , producing programs and plays for the radio and the t

In [9]:
# Display prediction for single specified example (long or short answer)
with jsonlines.open(INPUT_FILE) as f:
    question, long_answer, short_answer = get_span_from_token_offsets(f, START_TOKEN_LA, END_TOKEN_LA,
                                                                      START_TOKEN_SA, END_TOKEN_SA, 
                                                                      QAS_ID, REMOVE_HTML)

print("\033[1m""Question: ""\033[0m", question,"?")

print("\033[1m""Predicted Long Answer: ""\033[0m", long_answer)

print("\033[1m""Predicted Short Answer: ""\033[0m", short_answer)

[1mQuestion: [0m who was the governor general of india when country became independent ?
[1mPredicted Long Answer: [0m Upon independence in August 1947 , the title of Viceroy was abolished . The representative of the British Sovereign became known once again as the Governor - General . C. Rajagopalachari became the only Indian Governor - General . However , once India acquired independence , the Governor - General 's role became almost entirely ceremonial , with power being exercised on a day - to - day basis by the Indian cabinet . After the nation became a republic in 1950 , the President of India continued to perform the same functions .
[1mPredicted Short Answer: [0m C. Rajagopalachari
