# Q&A with BERT

In [1]:
from transformers import BertTokenizer, BertForQuestionAnswering
import torch

import warnings
warnings.filterwarnings('ignore')

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load BERT model fine-tuned on SQuAD (Stanford Question Answering Dataset)
model_name = "bert-large-uncased-whole-word-masking-finetuned-squad"

# Initialize BERT tokenizer for text preprocessing
tokenizer = BertTokenizer.from_pretrained(model_name)

# Initialize BERT model specifically for question answering tasks
model = BertForQuestionAnswering.from_pretrained(model_name)

def predict_answer(context, question):
    """
    Extracts answer from context for given question using BERT model.
    
    Args:
        context (str): Text passage containing the answer
        question (str): Question to be answered
    
    Process:
    1. Tokenize text and question using BERT tokenizer
    2. Generate model predictions for answer position
    3. Extract answer span using start/end indices
    4. Convert tokens back to readable text
    5. Return the answer string
    """
    # Tokenize input text and add special tokens for BERT
    encoding = tokenizer.encode_plus(question, context, return_tensors='pt', max_length=512, truncation=True)  # pt: PyTorch format

    # Extract model inputs from encoding
    input_ids = encoding['input_ids']  # Tokenized and encoded input text
    attention_mask = encoding['attention_mask']  # Mask to identify valid input tokens

    # Generate prediction scores for answer position
    with torch.no_grad():
        start_scores, end_scores = model(input_ids, attention_mask=attention_mask, return_dict=False)

    # Find most probable start and end positions
    start_index = torch.argmax(start_scores, dim=1).item()
    end_index = torch.argmax(end_scores, dim=1).item()

    # Extract answer tokens from predicted span
    answer_tokens = tokenizer.convert_ids_to_tokens(input_ids[0][start_index:end_index+1])

    # Convert tokens to readable text
    answer = tokenizer.convert_tokens_to_string(answer_tokens)

    return answer

Some weights of the model checkpoint at bert-large-uncased-whole-word-masking-finetuned-squad were not used when initializing BertForQuestionAnswering: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [3]:
question = "What is the capital of France?"
context = "France, officially the French Republic, is a country whose capital is Paris."
answer = predict_answer(context, question)
print(f"Question: {question}\nAnswer: {answer}")


Question: What is the capital of France?
Answer: paris
