In [1]:
from transformers import (
    AutoTokenizer,
    AutoModelForQuestionAnswering,
    pipeline
)

In [2]:
from datasets import load_dataset
import evaluate

In [3]:
CHECKPOINT = "./qa_roberta_checkpoint"    # ← adjust this to your saved model folder

# 2) Re‑load tokenizer & model
tokenizer = AutoTokenizer.from_pretrained(CHECKPOINT)
model     = AutoModelForQuestionAnswering.from_pretrained(CHECKPOINT)


In [4]:
qa = pipeline(
    "question-answering",
    model=model,
    tokenizer=tokenizer,
    device=0    # set to -1 if you want CPU
)


Device set to use cuda:0


In [5]:
squad = evaluate.load("squad")
raw = load_dataset("json", data_files={"validation":"val.json"}, field="data")[
    "validation"
][0]["paragraphs"][0]
context = raw["context"]



Downloading builder script:   0%|          | 0.00/4.53k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/3.32k [00:00<?, ?B/s]

Generating validation split: 0 examples [00:00, ? examples/s]

In [6]:
preds, refs = [], []
for qa_pair in raw["qas"]:
    qid   = qa_pair["id"]
    qtext = qa_pair["question"]
    gold  = qa_pair["answers"][0]["text"]
    out   = qa(question=qtext, context=context)

    # collect for SQuAD metric
    preds.append({"id": qid, "prediction_text": out["answer"]})
    refs.append({ "id": qid,
                  "answers": [{"text": gold,
                               "answer_start": context.find(gold)}] })


You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


In [7]:
metrics = squad.compute(predictions=preds, references=refs)
print("→ Exact Match: {:.2f}".format(metrics["exact_match"]))
print("→ F1 Score:    {:.2f}".format(metrics["f1"]))

# 5) Or simply do a quick human‑in‑the‑loop test:


→ Exact Match: 0.00
→ F1 Score:    0.00


In [8]:
print("\n=== Manual Tests ===")
questions = [
    "Who won the race?",
    "How many points did Lewis Hamilton score?",
    "What was the fastest lap overall?",
    "Which driver retired?"
]
for q in questions:
    ans = qa(question=q, context=context)
    print(f"Q: {q}\nA: {ans['answer']}  (score {ans['score']:.3f})\n")


=== Manual Tests ===
Q: Who won the race?
A: ).  (score 0.000)

Q: How many points did Lewis Hamilton score?
A: ).  (score 0.000)

Q: What was the fastest lap overall?
A: ).  (score 0.000)

Q: Which driver retired?
A: ).  (score 0.000)



In [10]:
# After building `predictions` and `references` lists:

for i in range(5):
    p = preds[i]
    r = refs[i]
    print(f"ID: {p['id']}")
    print("  Predicted:", repr(p["prediction_text"]))
    print("  Gold     :", repr(r["answers"][0]["text"]))
    print("  answer_start:", r["answers"][0].get("answer_start"))
    print("---")


ID: q19_driverLogan Sargeant_CompoundMEDIUM
  Predicted: ').'
  Gold     : '18'
  answer_start: 383
---
ID: q3_driverFernando Alonso
  Predicted: ').'
  Gold     : '2'
  answer_start: 127
---
ID: q20_driverGeorge Russell_CompoundSOFT
  Predicted: '.'
  Gold     : 'nan'
  answer_start: 330
---
ID: q20_driverGuanyu Zhou_CompoundSOFT
  Predicted: ').'
  Gold     : 'nan'
  answer_start: 330
---
ID: q3_driverSergio Perez
  Predicted: ').'
  Gold     : '16'
  answer_start: 1579
---


Device set to use cuda:0


🔎 Validation examples: 72

ID: q19_driverKevin Magnussen_CompoundMEDIUM
 Predicted: '.'
 Gold     : '0'
 Start    : 89
---
ID: q20_driverLewis Hamilton_CompoundINTERMEDIATE
 Predicted: ').'
 Gold     : '01:24.651'
 Start    : 5841
---
ID: q20_driverCarlos Sainz_CompoundHARD
 Predicted: ').'
 Gold     : '01:17.171'
 Start    : 6021
---
ID: q3_driverCharles Leclerc
 Predicted: '.'
 Gold     : '6'
 Start    : 704
---
ID: q20_driverPierre Gasly_CompoundHARD
 Predicted: ').'
 Gold     : '01:16.839'
 Start    : 2238
---

✅ Exact Match: 0.00%
✅ F1 Score:    0.00%


In [1]:
print("Hello world")

Hello world


Device set to use cuda:0



⚡️ F1 QA Interactive Demo
Type any question about the race (or ‘exit’ to quit).



In [1]:

from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForQuestionAnswering, pipeline

# 1) Point to your saved checkpoint folder (no leading “./”)
CHECKPOINT = "qa_roberta_checkpoint"

# 2) Reload the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(CHECKPOINT, local_files_only=True)
model     = AutoModelForQuestionAnswering.from_pretrained(CHECKPOINT, local_files_only=True)


In [2]:
# 3) Build the HF QA pipeline
qa = pipeline(
    "question-answering",
    model=model,
    tokenizer=tokenizer,
    device=0   # set to -1 if you only have CPU
)


Device set to use cuda:0


In [3]:
# 4) Read and extract the exact context you trained on
raw = load_dataset(
    "json",
    data_files={"full": "f1_gp_qa.json"},
    field="data"
)["full"]
# assume first (and only) race, first paragraph
context = raw[0]["paragraphs"][0]["context"]

print("\n⚡️ F1 QA Interactive Demo")
print("Type any question about the race (or ‘exit’ to quit).\n")



⚡️ F1 QA Interactive Demo
Type any question about the race (or ‘exit’ to quit).



In [None]:
# 5) Interactive loop
while True:
    question = input("Q: ").strip()
    if not question or question.lower() in ("exit", "quit"):
        print("👋 Goodbye!")
        break

    # chunk & stride exactly as you did during training
    result = qa(
        question=question,
        context=context,
        max_length=384,
        doc_stride=128,
        return_overflowing_tokens=True,
        top_k=1,
    )

    print(f"A: {result['answer']}   (confidence {result['score']:.2f})\n")

Device set to use cuda:0



⚡️ F1 QA Interactive (type ‘exit’ to quit)

A: ).   (confidence 0.00)

A: ).   (confidence 0.00)



ValueError: `question` cannot be empty