In [1]:
# Source: https://towardsdatascience.com/question-answering-with-bert-xlnet-xlm-and-distilbert-using-simple-transformers-4d8785ee762a

In [2]:
import json
from simpletransformers.question_answering import QuestionAnsweringModel
import os

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [3]:
with open('data/train-v2.0.json', 'r') as f:
    train_data = json.load(f)

train_data = [item for topic in train_data['data'] for item in topic['paragraphs'] ]

In [4]:
train_args = {
    'learning_rate': 3e-5,
    'num_train_epochs': 2,
    'max_seq_length': 384,
    'doc_stride': 128,
    'overwrite_output_dir': True,
    'reprocess_input_data': False,
    'train_batch_size': 1,
    'gradient_accumulation_steps': 4
}

model = QuestionAnsweringModel('distilbert', 'distilbert-base-uncased-distilled-squad', args=train_args)

KeyboardInterrupt: 

In [None]:
model.train_model(train_data)

In [None]:
with open('data/dev-v2.0.json', 'r') as f:
    dev_data = json.load(f)

dev_data = [item for topic in dev_data['data'] for item in topic['paragraphs'] ]

preds = model.predict(dev_data[:200])

os.makedirs('results', exist_ok=True)

submission = {pred['id']: pred['answer'] for pred in preds}

with open('results/submission.json', 'w') as f:
    json.dump(submission, f)

In [None]:
with open('data/dev-v2.0.json', 'r') as f:
    dev_data = json.load(f)
    
dev_data = [item for topic in dev_data['data'] for item in topic['paragraphs'] ]

answer_key = {}
for i in range(len(dev_data)):
    qas = dev_data[i]['qas']
    for entry in qas:
        sa = set([])
        for ae in entry['answers']:
            sa.add(ae['text'])
        
        answer_key[entry['id']] = sa

In [None]:
total = 0
hit = 0

for key, pred in submission.items():
    answers = answer_key[key]
    if len(answers) == 0:
        continue
        
    if pred in answers:
        hit += 1
    total += 1
    
print(hit, '/', total, '=', float(hit)/total)

In [33]:
submission

{'56ddde6b9a695914005b9628': 'France.',
 '56ddde6b9a695914005b9629': '10th and 11th centuries',
 '56ddde6b9a695914005b962a': 'Denmark, Iceland and Norway',
 '56ddde6b9a695914005b962b': 'Rollo,',
 '56ddde6b9a695914005b962c': '10th',
 '5ad39d53604f3c001a3fe8d1': 'The Normans',
 '5ad39d53604f3c001a3fe8d2': 'Normandy,',
 '5ad39d53604f3c001a3fe8d3': 'West Francia.',
 '5ad39d53604f3c001a3fe8d4': 'first half of the 10th century',
 '56dddf4066d3e219004dad5f': 'William the Conqueror,',
 '56dddf4066d3e219004dad60': 'Richard I',
 '56dddf4066d3e219004dad61': 'Catholic',
 '5ad3a266604f3c001a3fea27': 'political, cultural and military',
 '5ad3a266604f3c001a3fea28': 'The Normans',
 '5ad3a266604f3c001a3fea29': 'The Normans',
 '5ad3a266604f3c001a3fea2a': 'Richard I',
 '5ad3a266604f3c001a3fea2b': 'Antioch',
 '56dde0379a695914005b9636': 'plural of Normant,',
 '56dde0379a695914005b9637': '9th century',
 '5ad3ab70604f3c001a3feb89': '"Normans"',
 '5ad3ab70604f3c001a3feb8a': '9th century',
 '56dde0ba66d3e2190

In [5]:
dev_data[0]

{'qas': [{'question': 'In what country is Normandy located?',
   'id': '56ddde6b9a695914005b9628',
   'answers': [{'text': ' ', 'answer_start': 0}],
   'is_impossible': False},
  {'question': 'When were the Normans in Normandy?',
   'id': '56ddde6b9a695914005b9629',
   'answers': [{'text': ' ', 'answer_start': 0}],
   'is_impossible': False},
  {'question': 'From which countries did the Norse originate?',
   'id': '56ddde6b9a695914005b962a',
   'answers': [{'text': ' ', 'answer_start': 0}],
   'is_impossible': False},
  {'question': 'Who was the Norse leader?',
   'id': '56ddde6b9a695914005b962b',
   'answers': [{'text': ' ', 'answer_start': 0}],
   'is_impossible': False},
  {'question': 'What century did the Normans first gain their separate identity?',
   'id': '56ddde6b9a695914005b962c',
   'answers': [{'text': ' ', 'answer_start': 0}],
   'is_impossible': False},
  {'plausible_answers': [{'text': 'Normans', 'answer_start': 4}],
   'question': "Who gave their name to Normandy in th

In [31]:

submission = {pred['id']: pred['answer'] for pred in preds}

In [35]:
with open('data/dev-v2.0.json', 'r') as f:
    dev_data = json.load(f)
    
dev_data = [item for topic in dev_data['data'] for item in topic['paragraphs'] ]

In [9]:
model.predict(dev_data[:2])


  0%|          | 0/17 [00:00<?, ?it/s][A
 47%|████▋     | 8/17 [00:00<00:00, 76.37it/s][A

Converting to features started.



100%|██████████| 17/17 [00:00<00:00, 60.44it/s][A


HBox(children=(IntProgress(value=0, max=3), HTML(value='')))




[{'id': '56ddde6b9a695914005b9628', 'answer': 'France.'},
 {'id': '56ddde6b9a695914005b9629', 'answer': '10th and 11th centuries'},
 {'id': '56ddde6b9a695914005b962a', 'answer': 'Denmark, Iceland and Norway'},
 {'id': '56ddde6b9a695914005b962b', 'answer': 'Rollo,'},
 {'id': '56ddde6b9a695914005b962c', 'answer': '10th'},
 {'id': '5ad39d53604f3c001a3fe8d1', 'answer': 'The Normans'},
 {'id': '5ad39d53604f3c001a3fe8d2', 'answer': 'Normandy,'},
 {'id': '5ad39d53604f3c001a3fe8d3', 'answer': 'West Francia.'},
 {'id': '5ad39d53604f3c001a3fe8d4',
  'answer': 'first half of the 10th century'},
 {'id': '56dddf4066d3e219004dad5f', 'answer': 'William the Conqueror,'},
 {'id': '56dddf4066d3e219004dad60', 'answer': 'Richard I'},
 {'id': '56dddf4066d3e219004dad61', 'answer': 'Catholic'},
 {'id': '5ad3a266604f3c001a3fea27',
  'answer': 'political, cultural and military'},
 {'id': '5ad3a266604f3c001a3fea28', 'answer': 'The Normans'},
 {'id': '5ad3a266604f3c001a3fea29', 'answer': 'The Normans'},
 {'id': 