In [1]:
# Compute EM/F1 from saved predictions in outputs_spoken_squad_1.1

import os, json
import evaluate

OUTPUT_DIR = "outputs_spoken_squad_1.1"
PRED_PATH  = os.path.join(OUTPUT_DIR, "predictions.json")
TEST_FILE  = "spoken_test-v1.1.json"   

def _flat_answers(ans):
    texts  = ans.get("text", [])
    starts = ans.get("answer_start", [])
    if not isinstance(texts, list):  texts = [texts]
    if not isinstance(starts, list): starts = [starts]
    if texts and isinstance(texts[0], list):
        texts = [t for sub in texts for t in sub]
    if starts and isinstance(starts[0], list):
        starts = [s for sub in starts for s in sub]
    n = min(len(texts), len(starts))
    return {
        "text": [str(t) for t in texts[:n]],
        "answer_start": [int(s) for s in starts[:n]],
    }

def load_references_from_test_json(path):
    """Load references (id, answers) from Spoken-SQuAD v1.1 test JSON."""
    with open(path, "r", encoding="utf-8") as f:
        j = json.load(f)
    refs = []
    for article in j.get("data", []):
        for para in article.get("paragraphs", []):
            for qa in para.get("qas", []):
                ex_id = str(qa.get("id"))
                ans   = qa.get("answers", []) or []
                texts  = [a.get("text", "") for a in ans]
                starts = [a.get("answer_start", 0) for a in ans]
                refs.append({"id": ex_id, "answers": _flat_answers({"text": texts, "answer_start": starts})})
    return refs

# load predictions and references
with open(PRED_PATH, "r", encoding="utf-8") as f:
    pred_text = json.load(f)  

references = load_references_from_test_json(TEST_FILE)

formatted_preds = [{"id": r["id"], "prediction_text": pred_text.get(r["id"], "")} for r in references]

# compute metrics 
squad_metric = evaluate.load("squad")  
metrics = squad_metric.compute(predictions=formatted_preds, references=references)
print("Exact Match (EM):", metrics.get("exact_match"))
print("F1 Score:", metrics.get("f1"))

# Save metrics 
with open(os.path.join(OUTPUT_DIR, "metrics.json"), "w", encoding="utf-8") as f:
    json.dump(metrics, f, ensure_ascii=False, indent=2)
print(f"Saved EM/F1 to {os.path.join(OUTPUT_DIR, 'metrics.json')}")

  from pandas.core import (


Exact Match (EM): 60.04485142963932
F1 Score: 69.56580173817838
Saved EM/F1 to outputs_spoken_squad_1.1/metrics.json
