In [None]:
!pip install nbstripout

In [None]:
!nbstripout --install

In [None]:
!nbstripout Question_Answering_with_Transformers.ipynb

In [None]:
from google.colab import userdata
key = userdata.get('GITHUB_TOKEN_KEY')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
%cd /content/drive/MyDrive/Internship Tasks

In [None]:
!git clone https://github.com/Zayaad-Wajid/Question-Answering-with-Transformers.git

In [None]:
!git config --global user.email "zayaadw@example.com"
!git config --global user.name "Zayaad-Wajid"

In [None]:
%cd /content/drive/MyDrive/Internship Tasks/3-Question-Answering-with-Transformers

In [None]:
!git add .

In [None]:
!git commit -m "Initial Commits"

In [None]:
!git push https://Zayaad-Wajid:{key}@github.com/Zayaad-Wajid/Question-Answering-with-Transformers.git main

In [None]:
from google.colab import files
uploaded = files.upload()

In [None]:
!pip install transformers datasets evaluate
!pip install evaluate

In [None]:
import json
import torch
import evaluate
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForQuestionAnswering, pipeline

In [None]:
with open("train-v1.1.json", "r") as f:
  squad_data = json.load(f)

In [None]:
examples = []
for article in squad_data["data"]:
  for paragraph in article["paragraphs"]:
    context = paragraph["context"]
    for qa in paragraph["qas"]:
      question = qa["question"]
      id = qa["id"]
      if qa["answers"]:
        answer_texts = [a["text"] for a in qa["answers"]]
        answer_starts = [a["answer_start"] for a in qa["answers"]]
        examples.append({
            "id": id,
            "context": context,
            "question": question,
            "answers": {
                "text": answer_texts,
                "answer_start": answer_starts
            }
        })

examples = examples[:1000]
print(examples)

# Loading Pre-trained QA Pipeline

In [None]:
model = "distilbert-base-uncased-distilled-squad"

tokenizer = AutoTokenizer.from_pretrained(model)
model = AutoModelForQuestionAnswering.from_pretrained(model)

qa_pipleline = pipeline("question-answering", model = model, tokenizer = tokenizer)

In [None]:
predictions = []
references = []

for example in tqdm(examples):
  question = example["question"]
  context = example["context"]
  result = qa_pipleline(question = question, context = context)

  predictions.append({
      "id": example["id"],
      "prediction_text": result["answer"]
  })

  references.append({
      "id": example["id"],
      "answers": {
          "text":example["answers"]["text"],
          "answer_start": example["answers"]["answer_start"]
      }
  })

In [None]:
squad_metric = evaluate.load("squad")

results = squad_metric.compute(predictions = predictions, references = references)

print(f"Exact Match (EM): {results['exact_match']:.2f}")
print(f"F1 Score: {results['f1']:.2f}")

#Bonus Tasks

# Comparing BERT and ALBERT

In [None]:
models = {
    "BERT": "bert-large-uncased-whole-word-masking-finetuned-squad",
    "ALBERT": "twmkn9/albert-base-v2-squad2"
}


In [None]:
def evaluate_model(model_name, examples):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForQuestionAnswering.from_pretrained(model_name)

    qa_pipeline_model = pipeline("question-answering", model=model, tokenizer=tokenizer)

    predictions = []
    references = []

    for example in tqdm(examples):
        question = example["question"]
        context = example["context"]
        result = qa_pipeline_model(question=question, context=context)

        predictions.append({
            "id": example["id"],
            "prediction_text": result["answer"]
        })

        references.append({
            "id": example["id"],
            "answers": {
                "text": example["answers"]["text"],
                "answer_start": example["answers"]["answer_start"]
            }
        })

    squad_metric = evaluate.load("squad")
    results = squad_metric.compute(predictions=predictions, references=references)
    return results


In [None]:
results_dict = {}

for name, model_id in models.items():
    print(f"Evaluating {name}...")
    results = evaluate_model(model_id, examples)
    results_dict[name] = results
    print(f"{name}  Exact Match (EM): {results['exact_match']:.2f}")
    print(f"{name}  F1 Score: {results['f1']:.2f}")


In [None]:
!pip install streamlit -q
!pip install pyngrok


In [None]:
%%writefile app.py
import streamlit as st
from transformers import pipeline

st.set_page_config(page_title="Question Answering App")
st.title("📘 Question Answering with DistilBERT")

# Load DistilBERT QA pipeline
MODEL_NAME = "distilbert-base-uncased-distilled-squad"
qa_pipeline = pipeline("question-answering", model=MODEL_NAME, tokenizer=MODEL_NAME)

# Input fields
context = st.text_area("Enter the context for the question:")
question = st.text_input("Enter your question:")

# Get answer
if st.button("Get Answer"):
    if context.strip() == "" or question.strip() == "":
        st.warning("Please enter both context and question.")
    else:
        result = qa_pipeline(question=question, context=context)
        st.success(f"Answer: {result['answer']}")
        st.info(f"Confidence Score: {round(result['score'] * 100, 2)}%")


In [None]:
from pyngrok import ngrok
import time
import subprocess

ngrok.set_auth_token("30xQVbf58vsP56YVwbM2GHtOkkm_4XCq5cT8vz3vznACv79YN")

ngrok.kill()

subprocess.Popen(["streamlit", "run", "app.py", "--server.port=8501"])

time.sleep(5)

public_url = ngrok.connect(8501)
print("🔗 Streamlit app is live at:", public_url)
