In [2]:
from transformers import BertTokenizer, BertForQuestionAnswering
import torch

  from .autonotebook import tqdm as notebook_tqdm


### Load model and tokenizer, use example 

In [3]:
tokenizer = BertTokenizer.from_pretrained('deepset/bert-base-cased-squad2')
model = BertForQuestionAnswering.from_pretrained('deepset/bert-base-cased-squad2', return_dict=True)


In [4]:
question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"

In [5]:
inputs = tokenizer(question, text, return_tensors='pt')
tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'][0])

In [6]:
with torch.no_grad():
    outputs = model(**inputs)

# Outputs are the logits for each of the positions to be start or end.
outputs

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.0374, -6.9344, -6.9556, -2.8814, -7.0174, -8.2111, -7.6869, -7.3035,
          0.8260, -4.2656, -5.2627,  0.3830,  7.0740,  5.2306,  5.6687, -7.3035]]), end_logits=tensor([[ 0.6911, -7.3395, -7.9609, -7.4926, -8.3033, -2.5775, -7.3290, -6.7480,
         -4.3683, -6.6555, -0.8412, -1.0332, -1.5339,  0.9238,  6.3177, -6.7480]]), hidden_states=None, attentions=None)

### Compare losses when target is equal to output or different.

In [7]:
# Reshape to convert scalar to vector.
answer_start_index = outputs.start_logits.argmax().reshape(-1)
answer_end_index = outputs.end_logits.argmax().reshape(-1)

# Use the same target start and end index and check the loss.
outputs = model(**inputs, start_positions=answer_start_index, end_positions=answer_end_index)
outputs.loss

tensor(0.1765, grad_fn=<DivBackward0>)

In [14]:
# Use other target start and end index and check the loss
target_start_index = torch.tensor([14])
target_end_index = torch.tensor([15])

outputs = model(**inputs, start_positions=target_start_index, end_positions=target_end_index)

In [19]:
outputs

QuestionAnsweringModelOutput(loss=tensor(7.4121, grad_fn=<DivBackward0>), start_logits=tensor([[ 1.0374, -6.9344, -6.9556, -2.8814, -7.0174, -8.2111, -7.6869, -7.3035,
          0.8261, -4.2656, -5.2627,  0.3830,  7.0740,  5.2306,  5.6687, -7.3035]],
       grad_fn=<CloneBackward0>), end_logits=tensor([[ 0.6911, -7.3395, -7.9609, -7.4926, -8.3033, -2.5775, -7.3290, -6.7480,
         -4.3683, -6.6555, -0.8412, -1.0332, -1.5339,  0.9238,  6.3177, -6.7480]],
       grad_fn=<CloneBackward0>), hidden_states=None, attentions=None)

In [15]:
outputs.loss

tensor(7.4121, grad_fn=<DivBackward0>)