In [1]:
import torch
import numpy as np
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
from typing import Dict, List, Tuple
import gradio as gr

class QuestionAnsweringSystem:
    def __init__(self, model_checkpoint: str = "distilbert-base-cased-distilled-squad"):
        """Initialize the QA system with a pre-trained model."""
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
        self.model = AutoModelForQuestionAnswering.from_pretrained(model_checkpoint).to(self.device)
        self.max_length = 384
        self.stride = 128
        self.n_best = 20
        self.max_answer_length = 30

    def preprocess_for_prediction(self, question: str, context: str) -> Dict:
        """Preprocess question and context for prediction."""
        inputs = self.tokenizer(
            question,
            context,
            max_length=self.max_length,
            truncation="only_second",
            stride=self.stride,
            return_overflowing_tokens=True,
            return_offsets_mapping=True,
            padding="max_length",
            return_tensors="pt"
        )
        return inputs

    def predict_answer(self, question: str, context: str) -> Tuple[str, float]:
        """Predict answer for a given question and context."""
        # Preprocess inputs
        inputs = self.preprocess_for_prediction(question, context)
        
        # Move to device
        input_ids = inputs["input_ids"].to(self.device)
        attention_mask = inputs["attention_mask"].to(self.device)
        token_type_ids = inputs["token_type_ids"].to(self.device) if "token_type_ids" in inputs else None
        
        # Get model predictions
        with torch.no_grad():
            if token_type_ids is not None:
                outputs = self.model(input_ids=input_ids, 
                                   attention_mask=attention_mask,
                                   token_type_ids=token_type_ids)
            else:
                outputs = self.model(input_ids=input_ids, 
                                   attention_mask=attention_mask)
        
        start_logits = outputs.start_logits.cpu().numpy()
        end_logits = outputs.end_logits.cpu().numpy()
        offset_mapping = inputs["offset_mapping"]
        
        # Find best answer across all chunks
        best_answer = ""
        best_score = float('-inf')
        
        for i in range(len(start_logits)):
            start_logit = start_logits[i]
            end_logit = end_logits[i]
            offsets = offset_mapping[i]
            
            # Get top predictions
            start_indexes = np.argsort(start_logit)[-1:-self.n_best-1:-1].tolist()
            end_indexes = np.argsort(end_logit)[-1:-self.n_best-1:-1].tolist()
            
            # Find sequence boundaries
            sequence_ids = inputs.sequence_ids(i)
            context_start = sequence_ids.index(1) if 1 in sequence_ids else 0
            context_end = len(sequence_ids) - 1 - sequence_ids[::-1].index(1) if 1 in sequence_ids else len(sequence_ids) - 1
            
            for start_index in start_indexes:
                for end_index in end_indexes:
                    # Skip invalid answers
                    if (start_index < context_start or 
                        end_index > context_end or
                        start_index > end_index or
                        end_index - start_index + 1 > self.max_answer_length):
                        continue
                    
                    # Skip answers not in context
                    if (offsets[start_index] is None or 
                        offsets[end_index] is None):
                        continue
                    
                    # Calculate score and extract answer
                    score = start_logit[start_index] + end_logit[end_index]
                    if score > best_score:
                        best_score = score
                        answer_start = offsets[start_index][0]
                        answer_end = offsets[end_index][1]
                        best_answer = context[answer_start:answer_end]
        
        return best_answer, float(best_score)

    def batch_predict(self, questions: List[str], contexts: List[str]) -> List[Tuple[str, float]]:
        """Predict answers for multiple question-context pairs."""
        results = []
        for question, context in zip(questions, contexts):
            answer, score = self.predict_answer(question, context)
            results.append((answer, score))
        return results

# Initialize the QA system
print("Initializing QA System...")
qa_system = QuestionAnsweringSystem()
print("QA System ready!")

def gradio_interface(question: str, context: str) -> str:
    """Gradio interface function."""
    if not question.strip() or not context.strip():
        return "Please provide both question and context."
    
    try:
        answer, confidence = qa_system.predict_answer(question, context)
        if not answer:
            return "I couldn't find an answer in the given context."
        return f"Answer: {answer}\nConfidence Score: {confidence:.2f}"
    except Exception as e:
        return f"Error: {str(e)}"

def create_gradio_app():
    """Create and launch Gradio app."""
    iface = gr.Interface(
        fn=gradio_interface,
        inputs=[
            gr.Textbox(
                label="Question", 
                placeholder="Enter your question here...",
                lines=2
            ),
            gr.Textbox(
                label="Context", 
                placeholder="Enter the context/passage here...",
                lines=10
            )
        ],
        outputs=gr.Textbox(label="Answer", lines=3),
        title="🤖 Question Answering System",
        description="Ask questions about any given text passage. The system will find the most relevant answer from the context.",
        examples=[
            [
                "To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?",
                "Architecturally, the school has a Catholic character. Atop the Main Building's gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend \"Venite Ad Me Omnes\". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary."
            ]
        ],
        theme=gr.themes.Soft()
    )
    return iface

# Test the system
def test_qa_system():
    """Test function to verify the system works."""
    test_question = "To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?"
    test_context = """Architecturally, the school has a Catholic character. Atop the Main Building's gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend "Venite Ad Me Omnes". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary."""
    
    try:
        answer, confidence = qa_system.predict_answer(test_question, test_context)
        print(f"Test Question: {test_question}")
        print(f"Test Answer: {answer}")
        print(f"Confidence: {confidence:.2f}")
        return True
    except Exception as e:
        print(f"Test failed with error: {e}")
        return False

# Run test
print("\nTesting the system...")
if test_qa_system():
    print("✅ System test passed!")
    
    # Launch Gradio interface
    print("\nLaunching Gradio interface...")
    iface = create_gradio_app()
    iface.launch(share=True)
else:
    print("❌ System test failed!")

Initializing QA System...


tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/473 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

2025-07-21 09:26:51.058203: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1753090011.293118      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1753090011.362167      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


model.safetensors:   0%|          | 0.00/261M [00:00<?, ?B/s]

QA System ready!

Testing the system...
Test Question: To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?
Test Answer: Saint Bernadette Soubirous
Confidence: 26.12
✅ System test passed!

Launching Gradio interface...
* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://701d5bbaf502a04728.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
