In [4]:
import wikipedia
import re
import torch
from transformers import AutoTokenizer, AutoModelForQuestionAnswering

# Set language of wikipedia
wikipedia.set_lang("en")

# Load pre-trained BERT model and tokenizer
tokenizer = AutoTokenizer.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')
model = AutoModelForQuestionAnswering.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')

# Define a function to create context from a question using Wikipedia
def create_context(question):
    # Remove question words from the question
    question = re.sub(r'^(what|who|when|where|which|how)\s', '', question, flags=re.IGNORECASE)

    # Get the Wikipedia page for the modified question
    try:
        page = wikipedia.page(question)
    except wikipedia.exceptions.DisambiguationError as e:
        # If the question is ambiguous, choose the first suggestion
        page = wikipedia.page(e.options[0])

    # Get the text content of the page
    context = page.content

    # Truncate the context to fit the maximum input size of the model
    max_length = 1024 - len(tokenizer.encode(question))
    context = context[:max_length]

    return context

# Define a function to perform question answering
def answer_question(question, context):
    # Tokenize the question and context
    input_ids = tokenizer.encode(question, context, add_special_tokens=True)
    input_ids = input_ids[:512]  # Truncate to fit within BERT's input length
    tokens = tokenizer.convert_ids_to_tokens(input_ids)
    
    # Create attention mask
    attention_mask = [1] * len(input_ids)
    
    # Pad input IDs and attention mask to BERT's input length
    padding_length = 512 - len(input_ids)
    input_ids = input_ids + [0] * padding_length
    attention_mask = attention_mask + [0] * padding_length
    
    # Convert input IDs and attention mask to tensors
    input_ids = torch.tensor(input_ids).unsqueeze(0)
    attention_mask = torch.tensor(attention_mask).unsqueeze(0)
    
    # Pass input IDs and attention mask to BERT model
    inputs = {'input_ids': input_ids,
              'attention_mask': attention_mask}
    outputs = model(**inputs)
    
    # Find the start and end token positions for the answer
    start_scores = outputs.start_logits
    end_scores = outputs.end_logits
    start_index = torch.argmax(start_scores)
    end_index = torch.argmax(end_scores) + 1
    
    # Convert the answer tokens back to text
    answer_tokens = tokens[start_index:end_index]
    answer = tokenizer.convert_tokens_to_string(answer_tokens)
    
    return answer

query=input()
context = create_context(query)
answer = answer_question(query, context)

print("________________________________")
print(answer)
print("________________________________")

What is the real name of Iron Man?
________________________________
tony stark
________________________________
