In [1]:
import torch
import numpy as np
import json

In [2]:
def ensemble_triple(model_x, model_y, model_z, tokenizer_x, tokenizer_y, tokenizer_z, question, answer_text, alpha=.5):
    """
    Combines outputs from three transformers.
    NOTE: if tokenizer doesn't encode into a tuple / dict then make sure
    to unpack dictionary, input id's and input mask.
    Alpha must be smaller than .5.
    """

    # ======== Automated solution ========
    # Get token segment id's with encode plus
    max_len = 300

    input_dict_x = tokenizer_x.encode_plus(question, answer_text, max_length=max_len, truncation=True, padding='max_length', return_tensors='pt')
    input_dict_y = tokenizer_y.encode_plus(question, answer_text, max_length=max_len, truncation=True, padding='max_length', return_tensors='pt')
    input_dict_z = tokenizer_z.encode_plus(question, answer_text, max_length=max_len, truncation=True, padding='max_length', return_tensors='pt')

    # ======== Evaluate ========
    # Run our example question through the model.
    start_scores_x, end_scores_x = model_x(**input_dict_x)
    start_scores_y, end_scores_y = model_y(**input_dict_y)
    start_scores_z, end_scores_z = model_z(**input_dict_z)

    start = []
    start.append(torch.nn.functional.softmax(start_scores_x, dim=1).detach().numpy()[0])
    start.append(torch.nn.functional.softmax(start_scores_y, dim=1).detach().numpy()[0])
    start.append(torch.nn.functional.softmax(start_scores_z, dim=1).detach().numpy()[0])

    end = []
    end.append(torch.nn.functional.softmax(end_scores_x, dim=1).detach().numpy()[0])
    end.append(torch.nn.functional.softmax(end_scores_y, dim=1).detach().numpy()[0])
    end.append(torch.nn.functional.softmax(end_scores_z, dim=1).detach().numpy()[0])

    # model, start_ind, end_ind = ensemble_confidence_prob(start, end)
    # model, start_ind, end_ind = ensemble_confidence_score(start, end)


    # print("ANSWER: %d %d %d" %(model, start_ind, end_ind))

    # ======== Reconstruct Answer ========
    # Find the tokens with the highest `start` and `end` scores.
    # RANKS
    # mode = {'equal', 'ranks', 'f1'}
    model, answer_start, answer_end = combine_scores2(start, end, mode="equal")
    
    # There's a chance this might hit index out of bounds
    if model == 0:
      answer = tokenizer_x.decode(input_dict_x['input_ids'][0, answer_start:answer_end+1], skip_special_tokens=True)
    elif model == 1:
      answer = tokenizer_y.decode(input_dict_y['input_ids'][0, answer_start:answer_end+1], skip_special_tokens=True)
    else:
      answer = tokenizer_z.decode(input_dict_z['input_ids'][0, answer_start:answer_end+1], skip_special_tokens=True)
    # print('Answer: "' + answer + '"')
    return answer

In [3]:
def ensemble_confidence_score(start_scores, end_scores):
  prob, answer_start, answer_end, model = 0, 0, 0, 0
  for i in range(len(start_scores)):
    start = np.argmax(start_scores[i])
    end = np.argmax(end_scores[i])
    if (start_scores[i][start]*end_scores[i][end]) > prob:
      prob = start_scores[i][start]*end_scores[i][end]
      model = i 
      answer_start = start 
      answer_end = end 

  return model, answer_start, answer_end

def ensemble_confidence_prob(start_prob, end_prob):
  prob, answer_start, answer_end, model = 0.0, 0, 0, 0
  for i in range(len(start_prob)):
    # print("%f %f " %(max(start_prob[i]), max(end_prob[i])))
    for j in range(len(start_prob[i])):
      temp = start_prob[i][j] * end_prob[i]
      if max(temp) > prob:
        prob = max(temp)
        model = i
        answer_start = j
        answer_end = torch.argmax(torch.Tensor(temp))
    # print("%d %d %d %f" %(model, answer_start, answer_end, prob))

  return model, answer_start, answer_end

In [4]:
def ensemble_weighted_average_score(ranks, start_scores, end_scores):
  # use F1 ranks
  prob, answer_start, answer_end, model = 0.0, 0, 0, 0
  for i in range(len(start_scores)):
    start = np.argmax(start_scores[i])
    end = np.argmax(end_scores[i])
    if (ranks[i] * start_scores[i][start]*end_scores[i][end]) > prob:
      prob = ranks[i] * start_scores[i][start]*end_scores[i][end]
      model = i 
      answer_start = start 
      answer_end = end 
    # print("%d %d %d %f" %(model, answer_start, answer_end, prob))

  return model, answer_start, answer_end

def ensemble_weighted_average_prob(ranks, start_prob, end_prob):
  # use F1 ranks
  prob, answer_start, answer_end, model = 0.0, 0, 0, 0
  for i in range(len(start_prob)):
    for j in range(len(start_prob[i])):
      temp = ranks[i] * start_prob[i][j] * end_prob[i]
      if max(temp) > prob:
        prob = max(temp)
        model = i
        answer_start = j
        answer_end = torch.argmax(torch.Tensor(temp))
    # print("%d %d %d %f" %(model, answer_start, answer_end, prob))

  return model, answer_start, answer_end

def add_lists(tup):
  ans = tup[0]

  for i in range(1, len(tup)):
    np.add(ans, tup[i])

  return ans

def ensemble_weighted_average_new(ranks, start_prob, end_prob):
  prob, answer_start, answer_end, max_model, ans_val, max_start, max_end = [], 0, 0, 0, 0.0, 0, 0
  # number of models
  for i in range(len(start_prob)):
    # number of spans
    temp = []
    for j in range(len(start_prob[i])):
      t = ranks[i] * start_prob[i][j] * np.asarray(end_prob[i])
      temp.append(t)
      temp_val = max(t)
      if temp_val > ans_val:
        ans_val = temp_val 
        max_model = i
        max_start = j
        max_end = np.where(t == temp_val)
    
    prob.append(temp)

  # print(prob )
  answer_mat = np.asarray(prob[0])
  for i in range(1, len(prob)):
    answer_mat = np.add(answer_mat, np.asarray(prob[i]))

  # print(answer_mat)
  # print(answer_mat) 
  a = np.amax(answer_mat)
  answer_start = np.where(answer_mat == a)
  answer_end = int(answer_start[1][0])
  answer_start = int(answer_start[0][0])
    # print("%d %d %d %f" %(model, answer_start, answer_end, prob))

  return max_model, max_start, max_end, answer_start, answer_end

def combine_scores2(start_scores, end_scores, mode="equal"):
  if mode == "equal":
    l = len(start_scores)
    ranks = [1.0/l] * l
    model, max_start, max_end, start, end = ensemble_weighted_average_new(ranks, start_scores, end_scores)
    return model, start, end

  elif mode == "ranks":
    l = len(start_scores)
    ranks = list(range(1, l+1))
    l = l*(l+1)//2
    ranks = [x/float(l) for x in ranks]
    model, max_start, max_end, start, end = ensemble_weighted_average_new(ranks, start_scores, end_scores)
    return model, start, end

  elif mode == "f1":
    ranks = []
    model, max_start, max_end, start, end = ensemble_weighted_average_new(ranks, start_scores, end_scores)
    return model, start, end

In [5]:
from transformers import BertForQuestionAnswering, BertTokenizer

bert_mini_model = BertForQuestionAnswering.from_pretrained('mrm8488/bert-mini-5-finetuned-squadv2')
bert_mini_tokenizer = BertTokenizer.from_pretrained('mrm8488/bert-mini-5-finetuned-squadv2')
print("Bert Mini loaded...")

bert_small_model = BertForQuestionAnswering.from_pretrained('mrm8488/bert-small-finetuned-squadv2')
bert_small_tokenizer = BertTokenizer.from_pretrained('mrm8488/bert-small-finetuned-squadv2')
print("Bert Small loaded...")

bert_med_model = BertForQuestionAnswering.from_pretrained('mrm8488/bert-medium-finetuned-squadv2')
bert_med_tokenizer = BertTokenizer.from_pretrained('mrm8488/bert-medium-finetuned-squadv2')
print("Bert Medium loaded...")

Bert Mini loaded...
Bert Small loaded...
Bert Medium loaded...


In [6]:
def answer_question(question, text, alpha=.5):
  """
  Answering wrapper, change which function you use and which models you use it with
  """
  
  return ensemble_triple(bert_mini_model, bert_small_model, bert_med_model, 
                         bert_mini_tokenizer, bert_small_tokenizer, bert_med_tokenizer,question, text, alpha)

In [7]:
# Builtin preprocessor
from transformers.data.processors.squad import SquadV2Processor

processor = SquadV2Processor()
squad_data_dir='./SQUAD_V2'
eval_examples = processor.get_dev_examples(squad_data_dir,filename='dev-v2.0.json')

100%|██████████| 35/35 [00:05<00:00,  6.72it/s]


In [8]:
from tqdm import tqdm
for i,example in enumerate(tqdm(eval_examples)):
    if eval_examples[i].is_impossible:
        print(eval_examples[i].qas_id)
        print(eval_examples[i].question_text)
        print(eval_examples[i].context_text)
        print(eval_examples[i].answer_text)
        print(eval_examples[i].title)
        print(eval_examples[i].is_impossible)
        print(eval_examples[i].answers)
        print(eval_examples[i].start_position)
        print(eval_examples[i].end_position)

  2%|▏         | 288/11873 [00:00<00:04, 2434.21it/s]

5ad39d53604f3c001a3fe8d1
Who gave their name to Normandy in the 1000's and 1100's
The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse ("Norman" comes from "Norseman") raiders and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear fealty to King Charles III of West Francia. Through generations of assimilation and mixing with the native Frankish and Roman-Gaulish populations, their descendants would gradually merge with the Carolingian-based cultures of West Francia. The distinct cultural and ethnic identity of the Normans emerged initially in the first half of the 10th century, and it continued to evolve over the succeeding centuries.
None
Normans
True
[]
0
0
5ad39d53604f3c001a3fe8d2
What is France a region of?
The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10t

  6%|▌         | 699/11873 [00:00<00:05, 2218.09it/s]


Computational_complexity_theory
True
[]
0
0
5ad561c85b96ef001a10ad3e
What expression does not usually contain DTIME(n)?
For the complexity classes defined in this way, it is desirable to prove that relaxing the requirements on (say) computation time indeed defines a bigger set of problems. In particular, although DTIME(n) is contained in DTIME(n2), it would be interesting to know if the inclusion is strict. For time and space requirements, the answer to such questions is given by the time and space hierarchy theorems respectively. They are called hierarchy theorems because they induce a proper hierarchy on the classes defined by constraining the respective resources. Thus there are pairs of complexity classes such that one is properly included in the other. Having deduced such proper set inclusions, we can proceed to make quantitative statements about how much more additional time or space is needed in order to increase the number of problems that can be solved.
None
Computational_com

 10%|▉         | 1147/11873 [00:00<00:04, 2216.13it/s]

5ad04de377cf76001a686fa7
What is the busiest container port in the United States?
Southern California is also home to the Port of Los Angeles, the United States' busiest commercial port; the adjacent Port of Long Beach, the United States' second busiest container port; and the Port of San Diego.
None
Southern_California
True
[]
0
0
5ad04de377cf76001a686fa8
What is the Port of San Diego adjacent to?
Southern California is also home to the Port of Los Angeles, the United States' busiest commercial port; the adjacent Port of Long Beach, the United States' second busiest container port; and the Port of San Diego.
None
Southern_California
True
[]
0
0
5ad04de377cf76001a686fa9
What is the United State's second-busiest commercial port?
Southern California is also home to the Port of Los Angeles, the United States' busiest commercial port; the adjacent Port of Long Beach, the United States' second busiest container port; and the Port of San Diego.
None
Southern_California
True
[]
0
0
5ad04ecf77

 13%|█▎        | 1521/11873 [00:00<00:05, 1733.66it/s]

0
0
5a63787868151a001a9222fa
When were writs issued for the election of the New South Wales president?
On 1 July 1851, writs were issued for the election of the first Victorian Legislative Council, and the absolute independence of Victoria from New South Wales was established proclaiming a new Colony of Victoria. Days later, still in 1851 gold was discovered near Ballarat, and subsequently at Bendigo. Later discoveries occurred at many sites across Victoria. This triggered one of the largest gold rushes the world has ever seen. The colony grew rapidly in both population and economic power. In ten years the population of Victoria increased sevenfold from 76,000 to 540,000. All sorts of gold records were produced including the "richest shallow alluvial goldfield in the world" and the largest gold nugget. Victoria produced in the decade 1851–1860 20 million ounces of gold, one third of the world's output[citation needed].
None
Victoria_(Australia)
True
[]
0
0
5a63787868151a001a9222fb
When

 14%|█▍        | 1687/11873 [00:00<00:06, 1515.84it/s]

What was one occupation of the native English?
Other evidence of the Walloons and Huguenots in Canterbury includes a block of houses in Turnagain Lane, where weavers' windows survive on the top floor, as many Huguenots worked as weavers. The Weavers, a half-timbered house by the river, was the site of a weaving school from the late 16th century to about 1830. (It has been adapted as a restaurant—see illustration above. The house derives its name from a weaving school which was moved there in the last years of the 19th century, reviving an earlier use.) Others refugees practised the variety of occupations necessary to sustain the community as distinct from the indigenous population. Such economic separation was the condition of the refugees' initial acceptance in the City. They also settled elsewhere in Kent, particularly Sandwich, Faversham and Maidstone—towns in which there used to be refugee churches.
None
Huguenot
True
[]
0
0
5ad26c87d7d075001a42939c
Which English town had the large

 18%|█▊        | 2119/11873 [00:01<00:05, 1788.02it/s]


[]
0
0
5ad40ec6604f3c001a400130
What happens to the lead fusible plugs if the water level of the fire drops?
None
Steam_engine
True
[]
0
0
5ad40ec6604f3c001a400131
What happens after the firebox melts?
None
Steam_engine
True
[]
0
0
5ad40ec6604f3c001a400132
After the operators are warned by the escape of the firebox, what may they then do?
None
Steam_engine
True
[]
0
0
5ad40ec6604f3c001a400133
What is the escape of the firebox unlikely to accomplish in all but the smallest boilers?
None
Steam_engine
True
[]
0
0
5ad3bf63604f3c001a3fef5b
Who patented a steam engine in 1883?
In 1781 James Watt patented a steam engine that produced continuous rotary motion. Watt's ten-horsepower engines enabled a wide range of manufacturing machinery to be powered. The engines could be sited anywhere that water and coal or wood fuel could be obtained. By 1883, engines that could provide 10,000 hp had become feasible. The stationary steam engine was a key component of the Industrial Revolution, allowing fac

 22%|██▏       | 2592/11873 [00:01<00:04, 2012.72it/s]


0
5ad26235d7d075001a429082
What is AlO?
Oxygen is present in the atmosphere in trace quantities in the form of carbon dioxide (CO
2). The Earth's crustal rock is composed in large part of oxides of silicon (silica SiO
2, as found in granite and quartz), aluminium (aluminium oxide Al
2O
3, in bauxite and corundum), iron (iron(III) oxide Fe
2O
3, in hematite and rust), and calcium carbonate (in limestone). The rest of the Earth's crust is also made of oxygen compounds, in particular various complex silicates (in silicate minerals). The Earth's mantle, of much larger mass than the crust, is largely composed of silicates of magnesium and iron.
None
Oxygen
True
[]
0
0
5ad26235d7d075001a429083
What is Fe2O?
Oxygen is present in the atmosphere in trace quantities in the form of carbon dioxide (CO
2). The Earth's crustal rock is composed in large part of oxides of silicon (silica SiO
2, as found in granite and quartz), aluminium (aluminium oxide Al
2O
3, in bauxite and corundum), iron (iron(I

 26%|██▌       | 3101/11873 [00:01<00:04, 2114.74it/s]


0
0
5a75066297ca42001a521ded
How much did they say it was fair to raise the prices? 
On October 6, 1973, Syria and Egypt, with support from other Arab nations, launched a surprise attack on Israel, on Yom Kippur. This renewal of hostilities in the Arab–Israeli conflict released the underlying economic pressure on oil prices. At the time, Iran was the world's second-largest oil exporter and a close US ally. Weeks later, the Shah of Iran said in an interview: "Of course [the price of oil] is going to rise... Certainly! And how!... You've [Western nations] increased the price of the wheat you sell us by 300 percent, and the same for sugar and cement... You buy our crude oil and sell it back to us, refined as petrochemicals, at a hundred times the price you've paid us... It's only fair that, from now on, you should pay more for oil. Let's say ten times more."
None
1973_oil_crisis
True
[]
0
0
5a38b29da4b263001a8c18a7
What did Geroge Lenczowski do to the price of oil on October 16, 1973?
In

 31%|███       | 3628/11873 [00:01<00:03, 2277.82it/s]


What did Article 65 not agree upon?
EU Competition law has its origins in the European Coal and Steel Community (ECSC) agreement between France, Italy, Belgium, the Netherlands, Luxembourg and Germany in 1951 following the second World War. The agreement aimed to prevent Germany from re-establishing dominance in the production of coal and steel as members felt that its dominance had contributed to the outbreak of the war. Article 65 of the agreement banned cartels and article 66 made provisions for concentrations, or mergers, and the abuse of a dominant position by companies. This was the first time that competition law principles were included in a plurilateral regional agreement and established the trans-European model of competition law. In 1957 competition rules were included in the Treaty of Rome, also known as the EC Treaty, which established the European Economic Community (EEC). The Treaty of Rome established the enactment of competition law as one of the main aims of the EEC 

 35%|███▍      | 4106/11873 [00:02<00:03, 2106.41it/s]

0
0
5a837038e60761001a2eb6f7
Why do do ciliary rosettes need to decrease density in seawater?
It is uncertain how ctenophores control their buoyancy, but experiments have shown that some species rely on osmotic pressure to adapt to water of different densities. Their body fluids are normally as concentrated as seawater. If they enter less dense brackish water, the ciliary rosettes in the body cavity may pump this into the mesoglea to increase its bulk and decrease its density, to avoid sinking. Conversely if they move from brackish to full-strength seawater, the rosettes may pump water out of the mesoglea to reduce its volume and increase its density.
None
Ctenophora
True
[]
0
0
5a8371dfe60761001a2eb725
What is the largest feature in the mouth?
The largest single sensory feature is the aboral organ (at the opposite end from the mouth). Its main component is a statocyst, a balance sensor consisting of a statolith, a solid particle supported on four bundles of cilia, called "balancers", 

 36%|███▋      | 4326/11873 [00:02<00:03, 2075.26it/s]

None
Fresno,_California
True
[]
0
0
5a7b359f21c2de001afe9dd2
What did the transportation law ban?
Formed in 1946, Sierra Sky Park Airport is a residential airport community born of a unique agreement in transportation law to allow personal aircraft and automobiles to share certain roads. Sierra Sky Park was the first aviation community to be built[citation needed] and there are now numerous such communities across the United States and around the world. Developer William Smilie created the nation's first planned aviation community. Still in operation today, the public use airport provides a unique neighborhood that spawned interest and similar communities nationwide.
None
Fresno,_California
True
[]
0
0
5a7b359f21c2de001afe9dd3
What did planes and automobiles have to divide separately?
Formed in 1946, Sierra Sky Park Airport is a residential airport community born of a unique agreement in transportation law to allow personal aircraft and automobiles to share certain roads. Sierra Sky Pa

 40%|███▉      | 4742/11873 [00:02<00:03, 1850.98it/s]

True
[]
0
0
5a6695e8f038b7001ab0c016
How were TRANSPAC and TYMNET modified?
There were two kinds of X.25 networks. Some such as DATAPAC and TRANSPAC were initially implemented with an X.25 external interface. Some older networks such as TELENET and TYMNET were modified to provide a X.25 host interface in addition to older host connection schemes. DATAPAC was developed by Bell Northern Research which was a joint venture of Bell Canada (a common carrier) and Northern Telecom (a telecommunications equipment supplier). Northern Telecom sold several DATAPAC clones to foreign PTTs including the Deutsche Bundespost. X.75 and X.121 allowed the interconnection of national X.25 networks. A user or host could call a host on a foreign network by including the DNIC of the remote network as part of the destination address.[citation needed]
None
Packet_switching
True
[]
0
0
5a6695e8f038b7001ab0c017
Bell Northern Research developed TRANSPAC as a joint venture between which two companies?
There were tw

 43%|████▎     | 5123/11873 [00:02<00:03, 1761.79it/s]


Why is the most recent era expanded in the first scale?
The following four timelines show the geologic time scale. The first shows the entire time from the formation of the Earth to the present, but this compresses the most recent eon. Therefore, the second scale shows the most recent eon with an expanded scale. The second scale compresses the most recent era, so the most recent era is expanded in the third scale. Since the Quaternary is a very short period with short epochs, it is further expanded in the fourth scale. The second, third, and fourth timelines are therefore each subsections of their preceding timeline as indicated by asterisks. The Holocene (the latest epoch) is too small to be shown clearly on the third timeline on the right, another reason for expanding the fourth scale. The Pleistocene (P) epoch. Q stands for the Quaternary period.
None
Geology
True
[]
0
0
5a57d364770dc0001aeefdcb
Where is the Holocene further expanded since it is very short with short epochs?
The fo

 47%|████▋     | 5534/11873 [00:02<00:03, 1850.92it/s]

What did Hutton believe was formed in one catastrophic event?
Sir Charles Lyell first published his famous book, Principles of Geology, in 1830. This book, which influenced the thought of Charles Darwin, successfully promoted the doctrine of uniformitarianism. This theory states that slow geological processes have occurred throughout the Earth's history and are still occurring today. In contrast, catastrophism is the theory that Earth's features formed in single, catastrophic events and remained unchanged thereafter. Though Hutton believed in uniformitarianism, the idea was not widely accepted at the time.
None
Geology
True
[]
0
0
5a592f3f3e1742001a15cffe
What book did Darwin publish in 1830?
Sir Charles Lyell first published his famous book, Principles of Geology, in 1830. This book, which influenced the thought of Charles Darwin, successfully promoted the doctrine of uniformitarianism. This theory states that slow geological processes have occurred throughout the Earth's history and 

 50%|█████     | 5976/11873 [00:02<00:02, 2008.31it/s]

None
Civil_disobedience
True
[]
0
0
5a6681d1f038b7001ab0bf02
What action did Brownlee take against GCSB Waihopai that caused them to close temporarily?
Some forms of civil disobedience, such as illegal boycotts, refusals to pay taxes, draft dodging, distributed denial-of-service attacks, and sit-ins, make it more difficult for a system to function. In this way, they might be considered coercive. Brownlee notes that "although civil disobedients are constrained in their use of coercion by their conscientious aim to engage in moral dialogue, nevertheless they may find it necessary to employ limited coercion in order to get their issue onto the table." The Plowshares organization temporarily closed GCSB Waihopai by padlocking the gates and using sickles to deflate one of the large domes covering two satellite dishes.
None
Civil_disobedience
True
[]
0
0
5a6681d1f038b7001ab0bf03
What method does the government use to get their issue heard?
Some forms of civil disobedience, such as illegal bo

 54%|█████▍    | 6395/11873 [00:03<00:02, 2012.79it/s]

[]
0
0
5a67986af038b7001ab0c312
What do public schools operate outside of in Germany?
Ergänzungsschulen are secondary or post-secondary (non-tertiary) schools, which are run by private individuals, private organizations or rarely, religious groups and offer a type of education which is not available at public schools. Most of these schools are vocational schools. However, these vocational schools are not part of the German dual education system. Ergänzungsschulen have the freedom to operate outside of government regulation and are funded in whole by charging their students tuition fees.
None
Private_school
True
[]
0
0
5a67986af038b7001ab0c313
How are public schools funded?
Ergänzungsschulen are secondary or post-secondary (non-tertiary) schools, which are run by private individuals, private organizations or rarely, religious groups and offer a type of education which is not available at public schools. Most of these schools are vocational schools. However, these vocational schools are 

 58%|█████▊    | 6861/11873 [00:03<00:02, 2161.45it/s]


0
5a82458131013a001a335389
In what office has Barack Obama recently served his last term?
Politics: U.N. Secretary General Ban Ki-moon; American political leaders John Hancock, John Adams, John Quincy Adams, Rutherford B. Hayes, Theodore Roosevelt, Franklin D. Roosevelt, John F. Kennedy, Al Gore, George W. Bush and Barack Obama; Chilean President Sebastián Piñera; Colombian President Juan Manuel Santos; Costa Rican President José María Figueres; Mexican Presidents Felipe Calderón, Carlos Salinas de Gortari and Miguel de la Madrid; Mongolian President Tsakhiagiin Elbegdorj; Peruvian President Alejandro Toledo; Taiwanese President Ma Ying-jeou; Canadian Governor General David Lloyd Johnston; Indian Member of Parliament Jayant Sinha; Albanian Prime Minister Fan S. Noli; Canadian Prime Ministers Mackenzie King and Pierre Trudeau; Greek Prime Minister Antonis Samaras; Israeli Prime Minister Benjamin Netanyahu; former Pakistani Prime Minister Benazir Bhutto; U. S. Secretary of Housing and U

 62%|██████▏   | 7340/11873 [00:03<00:02, 2181.19it/s]

[]
0
0
5ad25f42d7d075001a428f72
What characteristic in recent years has been weakly tied with health in developed countries?
In recent years the characteristic that has strongly correlated with health in developed countries is income inequality. Creating an index of "Health and Social Problems" from nine factors, authors Richard Wilkinson and Kate Pickett found health and social problems "more common in countries with bigger income inequalities", and more common among states in the US with larger income inequalities. Other studies have confirmed this relationship. The UNICEF index of "child well-being in rich countries", studying 40 indicators in 22 countries, correlates with greater equality but not per capita income.
None
Economic_inequality
True
[]
0
0
5ad25f42d7d075001a428f73
Who rejected an index of health and social problems?
In recent years the characteristic that has strongly correlated with health in developed countries is income inequality. Creating an index of "Health and So

 66%|██████▌   | 7781/11873 [00:03<00:01, 2151.86it/s]


0
0
5acf9d3477cf76001a685516
What year did the College have the highest acceptance rate in its history?
In the fall quarter of 2014, the University of Chicago enrolled 5,792 students in the College, 3,468 students in its four graduate divisions, 5,984 students in its professional schools, and 15,244 students overall. In the 2012 Spring Quarter, international students comprised almost 19% of the overall study body, over 26% of students were domestic ethnic minorities, and about 44% of enrolled students were female. Admissions to the University of Chicago is highly selective. The middle 50% band of SAT scores for the undergraduate class of 2015, excluding the writing section, was 1420–1530, the average MCAT score for entering students in the Pritzker School of Medicine in 2011 was 36, and the median LSAT score for entering students in the Law School in 2011 was 171. In 2015, the College of the University of Chicago had an acceptance rate of 7.8% for the Class of 2019, the lowest in the 

 70%|██████▉   | 8260/11873 [00:04<00:01, 2156.72it/s]


Since its invention in 1269, the 'Phags-pa script, a unified script for spelling Mongolian, Tibetan, and Chinese languages, was preserved in the court until the end of the dynasty. Most of the Emperors could not master written Chinese, but they could generally converse well in the language. The Mongol custom of long standing quda/marriage alliance with Mongol clans, the Onggirat, and the Ikeres, kept the imperial blood purely Mongol until the reign of Tugh Temur, whose mother was a Tangut concubine. The Mongol Emperors had built large palaces and pavilions, but some still continued to live as nomads at times. Nevertheless, a few other Yuan emperors actively sponsored cultural activities; an example is Tugh Temur (Emperor Wenzong), who wrote poetry, painted, read Chinese classical texts, and ordered the compilation of books.
None
Yuan_dynasty
True
[]
0
0
5ad40955604f3c001a3fff2d
 How poorly did the Mongol Emperors know spoken Chinese?
Since its invention in 1269, the 'Phags-pa script, 

 71%|███████▏  | 8482/11873 [00:04<00:01, 2085.02it/s]


Unlike animals, plants lack phagocytic cells, but many plant immune responses involve systemic chemical signals that are sent through a plant. Individual plant cells respond to molecules associated with pathogens known as Pathogen-associated molecular patterns or PAMPs. When a part of a plant becomes infected, the plant produces a localized hypersensitive response, whereby cells at the site of infection undergo rapid apoptosis to prevent the spread of the disease to other parts of the plant. Systemic acquired resistance (SAR) is a type of defensive response used by plants that renders the entire plant resistant to a particular infectious agent. RNA silencing mechanisms are particularly important in this systemic response as they can block virus replication.
None
Immune_system
True
[]
0
0
5ad4e3a15b96ef001a10a564
What is unimportant in SAR?
Unlike animals, plants lack phagocytic cells, but many plant immune responses involve systemic chemical signals that are sent through a plant. Indi

 75%|███████▌  | 8923/11873 [00:04<00:01, 2080.69it/s]


0
0
5a3e539a378766001a00258b
What date was the Fourth Assessment Report published?
In February 2010, in response to controversies regarding claims in the Fourth Assessment Report, five climate scientists – all contributing or lead IPCC report authors – wrote in the journal Nature calling for changes to the IPCC. They suggested a range of new organizational options, from tightening the selection of lead authors and contributors, to dumping it in favor of a small permanent body, or even turning the whole climate science assessment process into a moderated "living" Wikipedia-IPCC. Other recommendations included that the panel employ a full-time staff and remove government oversight from its processes to avoid political interference.
None
Intergovernmental_Panel_on_Climate_Change
True
[]
0
0
5a3e539a378766001a00258c
In which journal did five lead IPCC authors call for support of the IPCC?
In February 2010, in response to controversies regarding claims in the Fourth Assessment Report, five

 79%|███████▉  | 9399/11873 [00:04<00:01, 2198.45it/s]


0
0
5ad275c8d7d075001a429506
What is the last major city in the stream of the Rhine called?
In the centre of Basel, the first major city in the course of the stream, is located the "Rhine knee"; this is a major bend, where the overall direction of the Rhine changes from West to North. Here the High Rhine ends. Legally, the Central Bridge is the boundary between High and Upper Rhine. The river now flows North as Upper Rhine through the Upper Rhine Plain, which is about 300 km long and up to 40 km wide. The most important tributaries in this area are the Ill below of Strasbourg, the Neckar in Mannheim and the Main across from Mainz. In Mainz, the Rhine leaves the Upper Rhine Valley and flows through the Mainz Basin.
None
Rhine
True
[]
0
0
5ad275c8d7d075001a429507
How long is the Central Bridge?
In the centre of Basel, the first major city in the course of the stream, is located the "Rhine knee"; this is a major bend, where the overall direction of the Rhine changes from West to North. H

 83%|████████▎ | 9840/11873 [00:04<00:00, 2098.13it/s]


5ad02a9777cf76001a686c5e
Parliament elects two MSPs to serve as what officers?
After each election to the Scottish Parliament, at the beginning of each parliamentary session, Parliament elects one MSP to serve as Presiding Officer, the equivalent of the speaker (currently Tricia Marwick), and two MSPs to serve as deputies (currently Elaine Smith and John Scott). The Presiding Officer and deputies are elected by a secret ballot of the 129 MSPs, which is the only secret ballot conducted in the Scottish Parliament. Principally, the role of the Presiding Officer is to chair chamber proceedings and the Scottish Parliamentary Corporate Body. When chairing meetings of the Parliament, the Presiding Officer and his/her deputies must be politically impartial. During debates, the Presiding Officer (or the deputy) is assisted by the parliamentary clerks, who give advice on how to interpret the standing orders that govern the proceedings of meetings. A vote clerk sits in front of the Presiding Off

 87%|████████▋ | 10291/11873 [00:05<00:00, 2135.71it/s]

Who was not the ideologue of the Iranian Revolution?
The views of Ali Shariati, ideologue of the Iranian Revolution, had resemblance with Mohammad Iqbal, ideological father of the State of Pakistan, but Khomeini's beliefs is perceived to be placed somewhere between beliefs of Sunni Islamic thinkers like Mawdudi and Qutb. He believed that complete imitation of the Prophet Mohammad and his successors such as Ali for restoration of Sharia law was essential to Islam, that many secular, Westernizing Muslims were actually agents of the West serving Western interests, and that the acts such as "plundering" of Muslim lands was part of a long-term conspiracy against Islam by the Western governments.
None
Islamism
True
[]
0
0
5acfeae177cf76001a68649f
 Mohammad Iqbal was what type of enemy to the State of Pakistan?
The views of Ali Shariati, ideologue of the Iranian Revolution, had resemblance with Mohammad Iqbal, ideological father of the State of Pakistan, but Khomeini's beliefs is perceived to

 90%|█████████ | 10719/11873 [00:05<00:00, 2088.41it/s]


True
[]
0
0
5ad02fda77cf76001a686d90
 When was the military-political complex reflected upon not within the scope of understanding imperialism?
The correlation between capitalism, aristocracy, and imperialism has long been debated among historians and political theorists. Much of the debate was pioneered by such theorists as J. A. Hobson (1858–1940), Joseph Schumpeter (1883–1950), Thorstein Veblen (1857–1929), and Norman Angell (1872–1967). While these non-Marxist writers were at their most prolific before World War I, they remained active in the interwar years. Their combined work informed the study of imperialism and it's impact on Europe, as well as contributed to reflections on the rise of the military-political complex in the United States from the 1950s. Hobson argued that domestic social reforms could cure the international disease of imperialism by removing its economic foundation. Hobson theorized that state intervention through taxation could boost broader consumption, creat

 94%|█████████▍| 11172/11873 [00:05<00:00, 2015.60it/s]

0
0
5ad4d9be5b96ef001a10a3b0
What style was he Paris Philharmony edifice built in?
Exceptional examples of the bourgeois architecture of the later periods were not restored by the communist authorities after the war (like mentioned Kronenberg Palace and Insurance Company Rosja building) or they were rebuilt in socialist realism style (like Warsaw Philharmony edifice originally inspired by Palais Garnier in Paris). Despite that the Warsaw University of Technology building (1899–1902) is the most interesting of the late 19th-century architecture. Some 19th-century buildings in the Praga district (the Vistula’s right bank) have been restored although many have been poorly maintained. Warsaw’s municipal government authorities have decided to rebuild the Saxon Palace and the Brühl Palace, the most distinctive buildings in prewar Warsaw.
None
Warsaw
True
[]
0
0
5ad4d9be5b96ef001a10a3b1
What were the Saxon Palace and Bruhl Palace in prewar Paris?
Exceptional examples of the bourgeois architec

 96%|█████████▌| 11380/11873 [00:05<00:00, 1874.49it/s]

5ad3a79a604f3c001a3fead7
What was abnormal British defense?
At the start of the war, no French regular army troops were stationed in North America, and few British troops. New France was defended by about 3,000 troupes de la marine, companies of colonial regulars (some of whom had significant woodland combat experience). The colonial government recruited militia support when needed. Most British colonies mustered local militia companies, generally ill trained and available only for short periods, to deal with native threats, but did not have any standing forces.
None
French_and_Indian_War
True
[]
0
0
5ad3a7ef604f3c001a3feadd
What was intent of Celeron's expedition?
Céloron's expedition force consisted of about 200 Troupes de la marine and 30 Indians. The expedition covered about 3,000 miles (4,800 km) between June and November 1749. It went up the St. Lawrence, continued along the northern shore of Lake Ontario, crossed the portage at Niagara, and followed the southern shore of Lake Er

100%|██████████| 11873/11873 [00:05<00:00, 2025.59it/s]


0
5ad3bb12604f3c001a3feebd
What naval battles did France lose in 1795?
In the aftermath of generally poor French results in most theaters of the Seven Years' War in 1758, France's new foreign minister, the duc de Choiseul, decided to focus on an invasion of Britain, to draw British resources away from North America and the European mainland. The invasion failed both militarily and politically, as Pitt again planned significant campaigns against New France, and sent funds to Britain's ally on the mainland, Prussia, and the French Navy failed in the 1759 naval battles at Lagos and Quiberon Bay. In one piece of good fortune, some French supply ships managed to depart France, eluding the British blockade of the French coast.
None
French_and_Indian_War
True
[]
0
0
5ad3bb68604f3c001a3feec3
Who was defeated by Montcalm at Quebec?
British victories continued in all theaters in the Annus Mirabilis of 1759, when they finally captured Ticonderoga, James Wolfe defeated Montcalm at Quebec (in a ba




In [9]:
from transformers import squad_convert_examples_to_features
# from transformers.data.processors.squad import squad_convert_examples_to_features

In [10]:
# import multiprocessing
# features, dataset = squad_convert_examples_to_features(
#             examples=eval_examples,
#             tokenizer=bert_mini_tokenizer,
#             max_seq_length=384,
#             doc_stride=128,
#             max_query_length=100,
#             is_training=False,
#             return_dataset="pt",
#             threads=multiprocessing.cpu_count(),
#         )

In [11]:
"""
self.qas_id = qas_id
self.question_text = question_text
self.context_text = context_text
self.answer_text = answer_text
self.title = title
self.is_impossible = is_impossible
self.answers = answers
"""
print(eval_examples[0].qas_id)
print(eval_examples[0].question_text)
print(eval_examples[0].context_text)
print(eval_examples[0].answer_text)
print(eval_examples[0].title)
print(eval_examples[0].is_impossible)
print(eval_examples[0].answers)

56ddde6b9a695914005b9628
In what country is Normandy located?
The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse ("Norman" comes from "Norseman") raiders and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear fealty to King Charles III of West Francia. Through generations of assimilation and mixing with the native Frankish and Roman-Gaulish populations, their descendants would gradually merge with the Carolingian-based cultures of West Francia. The distinct cultural and ethnic identity of the Normans emerged initially in the first half of the 10th century, and it continued to evolve over the succeeding centuries.
None
Normans
False
[{'text': 'France', 'answer_start': 159}, {'text': 'France', 'answer_start': 159}, {'text': 'France', 'answer_start': 159}, {'text': 'France', 'answer_start': 159}]


In [12]:
from tqdm import tqdm
def run_predictions(eval_set, out_filename, n_examples=None, padding=False):
  """
  Automatic prediction script with options for checkpointing, error coverage,
  padding of incomplete experiments and filedumping.
  """
  predictions = dict()
  error_count = 0

  if not n_examples:
    for i, example in enumerate(tqdm(eval_set)):
      try:
        predictions[example.qas_id] = answer_question(example.question_text, example.context_text)
      except:
        predictions[example.qas_id] = ""
        error_count += 1
  else:
    for i in tqdm(range(n_examples)):
      example = eval_set[i]
      predictions[example.qas_id] = answer_question(example.question_text, example.context_text)
        
    if padding:
      for i in range(n_examples, len(eval_set), 1):
        example = eval_set[i]
        predictions[example.qas_id] = ""

  with open('./SQUAD_V2/'+out_filename, 'w') as fp:
      json.dump(predictions, fp)

In [13]:
run_predictions(eval_examples, "mini_small_med_eq.json")
print("Done")

100%|██████████| 11873/11873 [1:14:18<00:00,  2.66it/s]    

Done





In [14]:
import argparse
import collections
import json
import numpy as np
import os
import re
import string
import sys

In [15]:
class Config(object):
    def __init__(self,):
        self.data_file='./SQUAD_V2/dev-v2.0.json'
        self.pred_file='./SQUAD_V2/mini_small_med_eq.json'
        self.out_file='./SQUAD_V2/eval.json'
        self.na_prob_file=None
        self.na_prob_thresh=1.0
        self.out_image_dir='./SQUAD_V2'
OPTS=Config()

In [16]:
def make_qid_to_has_ans(dataset):
  qid_to_has_ans = {}
  for article in dataset:
    for p in article['paragraphs']:
      for qa in p['qas']:
        qid_to_has_ans[qa['id']] = bool(qa['answers'])
  return qid_to_has_ans

def normalize_answer(s):
  """Lower text and remove punctuation, articles and extra whitespace."""
  def remove_articles(text):
    regex = re.compile(r'\b(a|an|the)\b', re.UNICODE)
    return re.sub(regex, ' ', text)
  def white_space_fix(text):
    return ' '.join(text.split())
  def remove_punc(text):
    exclude = set(string.punctuation)
    return ''.join(ch for ch in text if ch not in exclude)
  def lower(text):
    return text.lower()
  return white_space_fix(remove_articles(remove_punc(lower(s))))

def get_tokens(s):
  if not s: return []
  return normalize_answer(s).split()

def compute_exact(a_gold, a_pred):
  return int(normalize_answer(a_gold) == normalize_answer(a_pred))

def compute_f1(a_gold, a_pred):
  gold_toks = get_tokens(a_gold)
  pred_toks = get_tokens(a_pred)
  common = collections.Counter(gold_toks) & collections.Counter(pred_toks)
  num_same = sum(common.values())
  if len(gold_toks) == 0 or len(pred_toks) == 0:
    # If either is no-answer, then F1 is 1 if they agree, 0 otherwise
    return int(gold_toks == pred_toks)
  if num_same == 0:
    return 0
  precision = 1.0 * num_same / len(pred_toks)
  recall = 1.0 * num_same / len(gold_toks)
  f1 = (2 * precision * recall) / (precision + recall)
  return f1

def get_raw_scores(dataset, preds):
  exact_scores = {}
  f1_scores = {}
  for article in dataset:
    for p in article['paragraphs']:
      for qa in p['qas']:
        qid = qa['id']
        gold_answers = [a['text'] for a in qa['answers']
                        if normalize_answer(a['text'])]
        if not gold_answers:
          # For unanswerable questions, only correct answer is empty string
          gold_answers = ['']
        if qid not in preds:
          print('Missing prediction for %s' % qid)
          continue
        a_pred = preds[qid]
        # Take max over all gold answers
        exact_scores[qid] = max(compute_exact(a, a_pred) for a in gold_answers)
        f1_scores[qid] = max(compute_f1(a, a_pred) for a in gold_answers)
  return exact_scores, f1_scores

def apply_no_ans_threshold(scores, na_probs, qid_to_has_ans, na_prob_thresh):
  new_scores = {}
  for qid, s in scores.items():
    pred_na = na_probs[qid] > na_prob_thresh
    if pred_na:
      new_scores[qid] = float(not qid_to_has_ans[qid])
    else:
      new_scores[qid] = s
  return new_scores

def make_eval_dict(exact_scores, f1_scores, qid_list=None):
  if not qid_list:
    total = len(exact_scores)
    return collections.OrderedDict([
        ('exact', 100.0 * sum(exact_scores.values()) / total),
        ('f1', 100.0 * sum(f1_scores.values()) / total),
        ('total', total),
    ])
  else:
    total = len(qid_list)
    return collections.OrderedDict([
        ('exact', 100.0 * sum(exact_scores[k] for k in qid_list) / total),
        ('f1', 100.0 * sum(f1_scores[k] for k in qid_list) / total),
        ('total', total),
    ])

def merge_eval(main_eval, new_eval, prefix):
  for k in new_eval:
    main_eval['%s_%s' % (prefix, k)] = new_eval[k]

def plot_pr_curve(precisions, recalls, out_image, title):
  plt.step(recalls, precisions, color='b', alpha=0.2, where='post')
  plt.fill_between(recalls, precisions, step='post', alpha=0.2, color='b')
  plt.xlabel('Recall')
  plt.ylabel('Precision')
  plt.xlim([0.0, 1.05])
  plt.ylim([0.0, 1.05])
  plt.title(title)
  plt.savefig(out_image)
  plt.clf()

def make_precision_recall_eval(scores, na_probs, num_true_pos, qid_to_has_ans,
                               out_image=None, title=None):
  qid_list = sorted(na_probs, key=lambda k: na_probs[k])
  true_pos = 0.0
  cur_p = 1.0
  cur_r = 0.0
  precisions = [1.0]
  recalls = [0.0]
  avg_prec = 0.0
  for i, qid in enumerate(qid_list):
    if qid_to_has_ans[qid]:
      true_pos += scores[qid]
    cur_p = true_pos / float(i+1)
    cur_r = true_pos / float(num_true_pos)
    if i == len(qid_list) - 1 or na_probs[qid] != na_probs[qid_list[i+1]]:
      # i.e., if we can put a threshold after this point
      avg_prec += cur_p * (cur_r - recalls[-1])
      precisions.append(cur_p)
      recalls.append(cur_r)
  if out_image:
    plot_pr_curve(precisions, recalls, out_image, title)
  return {'ap': 100.0 * avg_prec}

def run_precision_recall_analysis(main_eval, exact_raw, f1_raw, na_probs, 
                                  qid_to_has_ans, out_image_dir):
  if out_image_dir and not os.path.exists(out_image_dir):
    os.makedirs(out_image_dir)
  num_true_pos = sum(1 for v in qid_to_has_ans.values() if v)
  if num_true_pos == 0:
    return
  pr_exact = make_precision_recall_eval(
      exact_raw, na_probs, num_true_pos, qid_to_has_ans,
      out_image=os.path.join(out_image_dir, 'pr_exact.png'),
      title='Precision-Recall curve for Exact Match score')
  pr_f1 = make_precision_recall_eval(
      f1_raw, na_probs, num_true_pos, qid_to_has_ans,
      out_image=os.path.join(out_image_dir, 'pr_f1.png'),
      title='Precision-Recall curve for F1 score')
  oracle_scores = {k: float(v) for k, v in qid_to_has_ans.items()}
  pr_oracle = make_precision_recall_eval(
      oracle_scores, na_probs, num_true_pos, qid_to_has_ans,
      out_image=os.path.join(out_image_dir, 'pr_oracle.png'),
      title='Oracle Precision-Recall curve (binary task of HasAns vs. NoAns)')
  merge_eval(main_eval, pr_exact, 'pr_exact')
  merge_eval(main_eval, pr_f1, 'pr_f1')
  merge_eval(main_eval, pr_oracle, 'pr_oracle')

def histogram_na_prob(na_probs, qid_list, image_dir, name):
  if not qid_list:
    return
  x = [na_probs[k] for k in qid_list]
  weights = np.ones_like(x) / float(len(x))
  plt.hist(x, weights=weights, bins=20, range=(0.0, 1.0))
  plt.xlabel('Model probability of no-answer')
  plt.ylabel('Proportion of dataset')
  plt.title('Histogram of no-answer probability: %s' % name)
  plt.savefig(os.path.join(image_dir, 'na_prob_hist_%s.png' % name))
  plt.clf()

def find_best_thresh(preds, scores, na_probs, qid_to_has_ans):
  num_no_ans = sum(1 for k in qid_to_has_ans if not qid_to_has_ans[k])
  cur_score = num_no_ans
  best_score = cur_score
  best_thresh = 0.0
  qid_list = sorted(na_probs, key=lambda k: na_probs[k])
  for i, qid in enumerate(qid_list):
    if qid not in scores: continue
    if qid_to_has_ans[qid]:
      diff = scores[qid]
    else:
      if preds[qid]:
        diff = -1
      else:
        diff = 0
    cur_score += diff
    if cur_score > best_score:
      best_score = cur_score
      best_thresh = na_probs[qid]
  return 100.0 * best_score / len(scores), best_thresh

def find_all_best_thresh(main_eval, preds, exact_raw, f1_raw, na_probs, qid_to_has_ans):
  best_exact, exact_thresh = find_best_thresh(preds, exact_raw, na_probs, qid_to_has_ans)
  best_f1, f1_thresh = find_best_thresh(preds, f1_raw, na_probs, qid_to_has_ans)
  main_eval['best_exact'] = best_exact
  main_eval['best_exact_thresh'] = exact_thresh
  main_eval['best_f1'] = best_f1
  main_eval['best_f1_thresh'] = f1_thresh

In [17]:
  with open(OPTS.data_file) as f:
    dataset_json = json.load(f)
    dataset = dataset_json['data']
  with open(OPTS.pred_file) as f:
    preds = json.load(f)
  if OPTS.na_prob_file:
    with open(OPTS.na_prob_file) as f:
      na_probs = json.load(f)
  else:
    na_probs = {k: 0.0 for k in preds}
  qid_to_has_ans = make_qid_to_has_ans(dataset)  # maps qid to True/False
  has_ans_qids = [k for k, v in qid_to_has_ans.items() if v]
  no_ans_qids = [k for k, v in qid_to_has_ans.items() if not v]
  exact_raw, f1_raw = get_raw_scores(dataset, preds)
  exact_thresh = apply_no_ans_threshold(exact_raw, na_probs, qid_to_has_ans,
                                        OPTS.na_prob_thresh)
  f1_thresh = apply_no_ans_threshold(f1_raw, na_probs, qid_to_has_ans,
                                     OPTS.na_prob_thresh)
  out_eval = make_eval_dict(exact_thresh, f1_thresh)
  if has_ans_qids:
    has_ans_eval = make_eval_dict(exact_thresh, f1_thresh, qid_list=has_ans_qids)
    merge_eval(out_eval, has_ans_eval, 'HasAns')
  if no_ans_qids:
    no_ans_eval = make_eval_dict(exact_thresh, f1_thresh, qid_list=no_ans_qids)
    merge_eval(out_eval, no_ans_eval, 'NoAns')
  if OPTS.na_prob_file:
    find_all_best_thresh(out_eval, preds, exact_raw, f1_raw, na_probs, qid_to_has_ans)
  if OPTS.na_prob_file and OPTS.out_image_dir:
    run_precision_recall_analysis(out_eval, exact_raw, f1_raw, na_probs, 
                                  qid_to_has_ans, OPTS.out_image_dir)
    histogram_na_prob(na_probs, has_ans_qids, OPTS.out_image_dir, 'hasAns')
    histogram_na_prob(na_probs, no_ans_qids, OPTS.out_image_dir, 'noAns')
  if OPTS.out_file:
    with open(OPTS.out_file, 'w') as f:
      json.dump(out_eval, f)
  else:
    print(json.dumps(out_eval, indent=2))