In [2]:
# Token-level IoU metric utilities
from typing import Optional, Tuple


def char_span_to_token_span(tokenizer, context: str, answer_start: int, answer_text: str) -> Optional[Tuple[int,int]]:
    """Convert a character-level span to token span (start, end_exclusive).

    tokenizer: a HuggingFace tokenizer configured to return offset_mapping.
    Returns None if mapping fails.
    """
    enc = tokenizer(context, add_special_tokens=False, return_offsets_mapping=True)
    offsets = enc.get("offset_mapping", [])
    start_char = int(answer_start)
    end_char = start_char + len(answer_text)
    token_indices = [i for i, (s, e) in enumerate(offsets) if not (e <= start_char or s >= end_char)]
    if not token_indices:
        return None
    return token_indices[0], token_indices[-1] + 1


def token_level_iou(pred_span: Tuple[int,int], true_span: Tuple[int,int]) -> float:
    """IoU between two token spans (end exclusive)."""
    ps = set(range(pred_span[0], pred_span[1]))
    ts = set(range(true_span[0], true_span[1]))
    inter = ps & ts
    union = ps | ts
    if not union:
        return 0.0
    return float(len(inter)) / float(len(union))


def token_level_iou_from_char_spans(tokenizer, context: str, pred_start: int, pred_text: str, true_start: int, true_text: str) -> float:
    p = char_span_to_token_span(tokenizer, context, pred_start, pred_text)
    t = char_span_to_token_span(tokenizer, context, true_start, true_text)
    if p is None or t is None:
        return 0.0
    return token_level_iou(p, t)

print('Metric utilities loaded.')

Metric utilities loaded.


In [4]:
# Setup: install required packages and import
import sys, subprocess, os

def pip_install(packages):
    subprocess.check_call([sys.executable, "-m", "pip", "install", "--quiet"] + packages)

# Try imports; install if missing
need_install = False
try:
    import transformers
    import datasets
    import kagglehub
except Exception:
    need_install = True

if need_install:
    print('Installing required packages (transformers, datasets, kagglehub)...')
    pip_install(["transformers[torch]","datasets","kagglehub","tqdm"])  # torch provided by user environment if available

from transformers import pipeline, AutoTokenizer, AutoModelForQuestionAnswering
from tqdm.auto import tqdm
import json

# Initialize a small QA model for speed
MODEL_NAME = 'distilbert-base-uncased-distilled-squad'
print('Loading model:', MODEL_NAME)
qa_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
qa_model = AutoModelForQuestionAnswering.from_pretrained(MODEL_NAME)
qa_pipe = pipeline('question-answering', model=qa_model, tokenizer=qa_tokenizer)

print('QA pipeline ready.')

ImportError: cannot import name 'pipeline' from 'transformers' (C:\Users\91983\AppData\Roaming\Python\Python313\site-packages\transformers\__init__.py)

In [None]:
# Inference: download SQuAD (kagglehub) if needed, run QA on the dev set, compute token-level IoU
import os
import json
from tqdm.auto import tqdm

DATA_DIR = '.'
train_file = os.path.join(DATA_DIR, 'train-v1.1.json')
dev_file = os.path.join(DATA_DIR, 'dev-v1.1.json')

# Try to download via kagglehub if files missing
if not (os.path.exists(train_file) and os.path.exists(dev_file)):
    try:
        import kagglehub
        print('Attempting dataset download via kagglehub...')
        kagglehub.dataset_download('stanfordu/stanford-question-answering-dataset', path='.', unzip=True)
    except Exception as e:
        print('kagglehub download failed or not available:', str(e))
        print('Please place train-v1.1.json and dev-v1.1.json in the notebook folder and re-run.')

# Validate files
if not os.path.exists(dev_file):
    raise FileNotFoundError('dev-v1.1.json not found in notebook folder')

with open(dev_file, 'r', encoding='utf-8') as f:
    dev = json.load(f)

# Flatten dev examples (context, question, answers[0])
examples = []
for article in dev['data']:
    for para in article['paragraphs']:
        context = para['context']
        for qa in para['qas']:
            qid = qa.get('id')
            question = qa['question']
            # choose first answer from answers if available (dev has answers)
            answers = qa.get('answers', [])
            if answers:
                true_text = answers[0]['text']
                true_start = answers[0]['answer_start']
            else:
                true_text = ''
                true_start = -1
            examples.append({'id': qid, 'context': context, 'question': question, 'true_text': true_text, 'true_start': true_start})

print('Loaded', len(examples), 'dev examples')

# Run QA pipeline and compute token-level IoU
results = []
for ex in tqdm(examples, total=len(examples)):
    try:
        qa_res = qa_pipe({'question': ex['question'], 'context': ex['context']})
        pred_text = qa_res.get('answer', '')
        pred_start = qa_res.get('start', -1)
        score = qa_res.get('score', 0.0)
    except Exception as e:
        pred_text = ''
        pred_start = -1
        score = 0.0
    # compute token IoU using metrics cell utilities
    try:
        iou = token_level_iou_from_char_spans(qa_tokenizer, ex['context'], pred_start, pred_text, ex['true_start'], ex['true_text'])
    except Exception:
        iou = 0.0
    results.append({'id': ex['id'], 'question': ex['question'], 'pred_text': pred_text, 'pred_start': pred_start, 'score': score, 'true_text': ex['true_text'], 'true_start': ex['true_start'], 'token_iou': iou})

# Save results
with open('preds.json', 'w', encoding='utf-8') as f:
    json.dump(results, f, ensure_ascii=False, indent=2)

# Also CSV summary
import pandas as pd
pd.DataFrame(results).to_csv('preds_with_iou.csv', index=False)

# Print quick stats
import numpy as np
ious = [r['token_iou'] for r in results]
print('Mean token-level IoU on dev (quick):', np.mean(ious))
print('Saved preds.json and preds_with_iou.csv')