In [1]:
# Load in necessary packages and functions/classes
import jsonlines
import json
import numpy as np
import pandas as pd
import re

from show_prediction import create_short_answer, create_long_answer, get_span_from_token_offsets, ShowPrediction
from google.cloud import storage
from google.cloud.storage import Blob

In [10]:
# Define parameters needed
INPUT_FILE = '../input/nq-dev-sample.jsonl'
PREDICTIONS_FILE = '../output/predictions-dev-sample.jsonl'
REMOVE_HTML = True    # whether or not to include HTML tags
SAMPLE_NUM = 10      

# Optional parameters to explore single examples
QAS_ID = 3221262508309669486
START_TOKEN_LA = 1577
END_TOKEN_LA = 1673
START_TOKEN_SA = 1607
END_TOKEN_SA = 1609

In [3]:
# Load in predictions.json to data frame
pred_answers_df = pd.read_json(PREDICTIONS_FILE)
for var_name in ['long_answer_score', 'short_answers_score']:
    pred_answers_df[var_name] = pred_answers_df['predictions'].apply(lambda q: q[var_name])
pred_answers_df["long_answer"] = pred_answers_df["predictions"].apply(create_long_answer)
pred_answers_df["short_answer"] = pred_answers_df["predictions"].apply(create_short_answer)
pred_answers_df["example_id"] = pred_answers_df["predictions"].apply(lambda q: str(q["example_id"]))
pred_answers_df

Unnamed: 0,predictions,long_answer_score,short_answers_score,long_answer,short_answer,example_id
0,"{'example_id': 3262882280361419647, 'long_answ...",8.526313,8.526313,388:480,407:410,3262882280361419647
1,"{'example_id': 6194915277794840386, 'long_answ...",7.100236,7.100236,267:345,268:277,6194915277794840386
2,"{'example_id': 1658149178541467525, 'long_answ...",5.322349,5.322349,917:983,945:955,1658149178541467525
3,"{'example_id': -3290814144789249484, 'long_ans...",12.326050,12.326050,233:388,245:251,-3290814144789249484
4,"{'example_id': -7350738264545175214, 'long_ans...",7.656608,7.656608,458:535,532:533,-7350738264545175214
...,...,...,...,...,...,...
195,"{'example_id': -6965315175406025099, 'long_ans...",10.503764,10.503764,14:608,383:384,-6965315175406025099
196,"{'example_id': -7115492812897878586, 'long_ans...",13.244329,13.244329,291:329,305:312,-7115492812897878586
197,"{'example_id': 8112877552630777245, 'long_answ...",6.160147,6.160147,481:553,525:529,8112877552630777245
198,"{'example_id': 4398207059107866695, 'long_answ...",5.518811,5.518811,547:592,548:563,4398207059107866695


In [4]:
# Define a predictions class object
show_pred = ShowPrediction(INPUT_FILE)

In [13]:
# Display predictions for random samples
for pred in np.random.choice(pred_answers_df['predictions'], SAMPLE_NUM):
    print(json.dumps(show_pred(pred, include_full_text=False, remove_html=True), indent=4))

{
    "question": "where does the story the great gatsby take place",
    "long_answer": "The Great Gatsby is a 1925 novel written by American author F. Scott Fitzgerald that follows a cast of characters living in the fictional town of West Egg on prosperous Long Island in the summer of 1922 . The story primarily concerns the young and mysterious millionaire Jay Gatsby and his quixotic passion and obsession for the beautiful former debutante Daisy Buchanan . Considered to be Fitzgerald 's magnum opus , The Great Gatsby explores themes of decadence , idealism , resistance to change , social upheaval , and excess , creating a portrait of the Jazz Age or the Roaring Twenties that has been described as a cautionary tale regarding the American Dream .",
    "short_answers": "the fictional town of West Egg on prosperous Long Island"
}
{
    "question": "how many countries are a part of opec",
    "long_answer": "Organization of the Petroleum Exporting Countries ( OPEC , / \u02c8o\u028ap\u025

In [9]:
# Display prediction for single specified example (long or short answer)
with jsonlines.open(INPUT_FILE) as f:
    question, long_answer, short_answer = get_span_from_token_offsets(f, START_TOKEN_LA, END_TOKEN_LA,
                                                                      START_TOKEN_SA, END_TOKEN_SA, 
                                                                      QAS_ID, REMOVE_HTML)

print("\033[1m""Question: ""\033[0m", question,"?")

print("\033[1m""Predicted Long Answer: ""\033[0m", long_answer)

print("\033[1m""Predicted Short Answer: ""\033[0m", short_answer)

[1mQuestion: [0m who was the governor general of india when country became independent ?
[1mPredicted Long Answer: [0m Upon independence in August 1947 , the title of Viceroy was abolished . The representative of the British Sovereign became known once again as the Governor - General . C. Rajagopalachari became the only Indian Governor - General . However , once India acquired independence , the Governor - General 's role became almost entirely ceremonial , with power being exercised on a day - to - day basis by the Indian cabinet . After the nation became a republic in 1950 , the President of India continued to perform the same functions .
[1mPredicted Short Answer: [0m C. Rajagopalachari
