# ðŸ¤” Question Answering: BERT & T5 for QA

Build extractive and generative QA systems.

## Learning Outcomes
- Extractive QA with BERT/RoBERTa
- Generative QA with T5/FLAN-T5
- Multi-document QA
- Production deployment

**Level**: Advanced | **Time**: 60 min | **GPU**: Recommended

In [None]:
import torch
from transformers import (
    AutoModelForQuestionAnswering, AutoTokenizer,
    T5ForConditionalGeneration, T5Tokenizer,
    pipeline
)
import warnings
warnings.filterwarnings('ignore')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")

## 1. Extractive QA with BERT

In [None]:
# Load pretrained QA model
qa_model_name = 'distilbert-base-cased-distilled-squad'
qa_tokenizer = AutoTokenizer.from_pretrained(qa_model_name)
qa_model = AutoModelForQuestionAnswering.from_pretrained(qa_model_name).to(device)
qa_model.eval()

print(f"Model: {qa_model_name}")
print(f"Parameters: {sum(p.numel() for p in qa_model.parameters()):,}")

In [None]:
def extract_answer(question, context, model, tokenizer):
    """Extract answer span from context."""
    inputs = tokenizer(question, context, return_tensors='pt', truncation=True, max_length=512).to(device)
    
    with torch.no_grad():
        outputs = model(**inputs)
    
    start_idx = outputs.start_logits.argmax()
    end_idx = outputs.end_logits.argmax()
    
    tokens = inputs.input_ids[0][start_idx:end_idx+1]
    answer = tokenizer.decode(tokens, skip_special_tokens=True)
    
    confidence = (outputs.start_logits[0][start_idx] + outputs.end_logits[0][end_idx]).item() / 2
    
    return answer, confidence

# Example
context = """Machine learning is a subset of artificial intelligence that enables computers 
to learn from data without being explicitly programmed. It was founded by Arthur Samuel in 1959. 
Today, ML is used in recommendation systems, autonomous vehicles, and medical diagnosis."""

questions = [
    "What is machine learning?",
    "Who founded machine learning?",
    "What is ML used for today?"
]

print("ðŸ“š Context:", context[:100], "...\n")
for q in questions:
    answer, conf = extract_answer(q, context, qa_model, qa_tokenizer)
    print(f"Q: {q}")
    print(f"A: {answer} (confidence: {conf:.2f})\n")

## 2. Using Pipeline

In [None]:
# Simple pipeline approach
qa_pipeline = pipeline('question-answering', model=qa_model_name, device=0 if torch.cuda.is_available() else -1)

result = qa_pipeline(question="When was ML founded?", context=context)
print(f"Answer: {result['answer']}")
print(f"Score: {result['score']:.4f}")
print(f"Span: {result['start']} - {result['end']}")

## 3. Generative QA with T5

In [None]:
# Load T5 for generative QA
t5_tokenizer = T5Tokenizer.from_pretrained('t5-small')
t5_model = T5ForConditionalGeneration.from_pretrained('t5-small').to(device)
t5_model.eval()

def generative_qa(question, context, max_length=100):
    """Generate answer using T5."""
    input_text = f"question: {question} context: {context}"
    inputs = t5_tokenizer(input_text, return_tensors='pt', truncation=True, max_length=512).to(device)
    
    with torch.no_grad():
        outputs = t5_model.generate(**inputs, max_length=max_length)
    
    return t5_tokenizer.decode(outputs[0], skip_special_tokens=True)

# Test generative QA
answer = generative_qa("Explain what machine learning does", context)
print(f"ðŸ¤– Generative Answer: {answer}")

## 4. Multi-Document QA

In [None]:
documents = [
    "Python is a high-level programming language created by Guido van Rossum in 1991.",
    "JavaScript was created by Brendan Eich in 1995 for web browsers.",
    "Java was developed by James Gosling at Sun Microsystems in 1995."
]

def multi_doc_qa(question, documents, top_k=1):
    """Answer question from multiple documents."""
    results = []
    for doc in documents:
        answer, score = extract_answer(question, doc, qa_model, qa_tokenizer)
        if answer.strip():
            results.append({'answer': answer, 'score': score, 'source': doc[:50]})
    
    results.sort(key=lambda x: x['score'], reverse=True)
    return results[:top_k]

# Test multi-doc QA
question = "Who created Python?"
results = multi_doc_qa(question, documents)
print(f"Q: {question}")
for r in results:
    print(f"A: {r['answer']} (from: {r['source']}...)")

## 5. Model Comparison

In [None]:
import pandas as pd

comparison = pd.DataFrame({
    'Model': ['BERT-base', 'DistilBERT', 'RoBERTa', 'T5', 'FLAN-T5'],
    'Type': ['Extractive', 'Extractive', 'Extractive', 'Generative', 'Generative'],
    'SQuAD F1': ['88.5', '86.9', '91.5', '89.0', '91.2'],
    'Speed': ['1x', '2x', '0.9x', '0.5x', '0.5x'],
    'Best For': ['General', 'Fast', 'Accuracy', 'Complex', 'Instructions']
})

print("ðŸ“Š QA Model Comparison:")
display(comparison)

## 6. Production QA System

In [None]:
class QASystem:
    """Production-ready QA system."""
    
    def __init__(self, model_name='distilbert-base-cased-distilled-squad'):
        self.pipeline = pipeline('question-answering', model=model_name)
        self.knowledge_base = []
    
    def add_documents(self, documents):
        self.knowledge_base.extend(documents)
    
    def answer(self, question, threshold=0.1):
        best_answer = None
        best_score = 0
        
        for doc in self.knowledge_base:
            result = self.pipeline(question=question, context=doc)
            if result['score'] > best_score:
                best_score = result['score']
                best_answer = result['answer']
        
        if best_score < threshold:
            return "I don't have enough information to answer that."
        
        return best_answer

# Demo
qa_system = QASystem()
qa_system.add_documents(documents)
print(f"Answer: {qa_system.answer('Who created JavaScript?')}")

## ðŸŽ¯ Key Takeaways
1. Extractive QA finds answer spans
2. Generative QA synthesizes answers
3. Multi-doc QA for knowledge bases
4. Confidence thresholds prevent hallucination

## ðŸ“š Further Reading
- Devlin et al., "BERT" (2019)
- Rajpurkar et al., "SQuAD" dataset
- Lewis et al., "RAG" for retrieval-augmented QA