In [13]:
# Heuristic QA inference fallback (fast, no heavy dependencies)
# This cell runs because the transformers/pipeline approach failed in the kernel.
# It downloads SQuAD dev if missing, runs a sentence-overlap baseline, computes
# token-level IoU using whitespace tokenization, and saves results.

import os, json, urllib.request, math
from tqdm.auto import tqdm

DEV_URL = 'https://raw.githubusercontent.com/rajpurkar/SQuAD-explorer/master/dataset/dev-v1.1.json'
DEV_FILE = 'dev-v1.1.json'

if not os.path.exists(DEV_FILE):
    print('Downloading dev set...')
    urllib.request.urlretrieve(DEV_URL, DEV_FILE)
    print('Downloaded', DEV_FILE)

with open(DEV_FILE, 'r', encoding='utf-8') as f:
    dev = json.load(f)

# flatten examples
examples = []
for article in dev['data']:
    for para in article['paragraphs']:
        context = para['context']
        # split into sentences simply
        sents = [s.strip() for s in __import__('re').split(r'(?<=[.!?])\s+', context) if s.strip()]
        for qa in para['qas']:
            qid = qa.get('id')
            question = qa['question']
            answers = qa.get('answers', [])
            if answers:
                true_text = answers[0]['text']
                true_start = answers[0]['answer_start']
            else:
                true_text = ''
                true_start = -1
            examples.append({'id': qid, 'context': context, 'sents': sents, 'question': question, 'true_text': true_text, 'true_start': true_start})

print('Loaded', len(examples), 'examples (heuristic)')

# simple tokenizer
import re
_tokenize = lambda t: [w for w in re.findall(r"\w+", t.lower())]

def token_iou_whitespace(pred: str, true: str) -> float:
    p = _tokenize(pred)
    t = _tokenize(true)
    if not p and not t:
        return 1.0
    if not p or not t:
        return 0.0
    ps = set(p)
    ts = set(t)
    inter = ps & ts
    union = ps | ts
    if not union:
        return 0.0
    return len(inter)/len(union)

results = []
for ex in tqdm(examples, total=len(examples)):
    q_tokens = set(_tokenize(ex['question']))
    best_sent = ''
    best_overlap = -1
    for s in ex['sents']:
        s_tokens = set(_tokenize(s))
        overlap = len(q_tokens & s_tokens)
        if overlap > best_overlap:
            best_overlap = overlap
            best_sent = s
    pred_text = best_sent
    # compute token IoU against true_text
    iou = token_iou_whitespace(pred_text, ex['true_text'])
    results.append({'id': ex['id'], 'pred_text': pred_text, 'true_text': ex['true_text'], 'token_iou': iou})

# save
with open('preds.json', 'w', encoding='utf-8') as f:
    json.dump(results, f, ensure_ascii=False)

import pandas as pd
pd.DataFrame(results).to_csv('preds_with_iou.csv', index=False)

import statistics
ious = [r['token_iou'] for r in results]
print('Saved preds.json and preds_with_iou.csv')
print('Mean token-level IoU (heuristic, whitespace tokens):', statistics.mean(ious))

Downloading dev set...
Downloaded dev-v1.1.json
Loaded 10570 examples (heuristic)
Downloaded dev-v1.1.json
Loaded 10570 examples (heuristic)


  0%|          | 0/10570 [00:00<?, ?it/s]

Saved preds.json and preds_with_iou.csv
Mean token-level IoU (heuristic, whitespace tokens): 0.10677103468786174


In [2]:
# Token-level IoU metric utilities
from typing import Optional, Tuple


def char_span_to_token_span(tokenizer, context: str, answer_start: int, answer_text: str) -> Optional[Tuple[int,int]]:
    """Convert a character-level span to token span (start, end_exclusive).

    tokenizer: a HuggingFace tokenizer configured to return offset_mapping.
    Returns None if mapping fails.
    """
    enc = tokenizer(context, add_special_tokens=False, return_offsets_mapping=True)
    offsets = enc.get("offset_mapping", [])
    start_char = int(answer_start)
    end_char = start_char + len(answer_text)
    token_indices = [i for i, (s, e) in enumerate(offsets) if not (e <= start_char or s >= end_char)]
    if not token_indices:
        return None
    return token_indices[0], token_indices[-1] + 1


def token_level_iou(pred_span: Tuple[int,int], true_span: Tuple[int,int]) -> float:
    """IoU between two token spans (end exclusive)."""
    ps = set(range(pred_span[0], pred_span[1]))
    ts = set(range(true_span[0], true_span[1]))
    inter = ps & ts
    union = ps | ts
    if not union:
        return 0.0
    return float(len(inter)) / float(len(union))


def token_level_iou_from_char_spans(tokenizer, context: str, pred_start: int, pred_text: str, true_start: int, true_text: str) -> float:
    p = char_span_to_token_span(tokenizer, context, pred_start, pred_text)
    t = char_span_to_token_span(tokenizer, context, true_start, true_text)
    if p is None or t is None:
        return 0.0
    return token_level_iou(p, t)

print('Metric utilities loaded.')

Metric utilities loaded.


In [12]:
# Setup: import required libs and load QA model (avoid transformers.pipeline)
import sys, subprocess

try:
    from transformers import AutoTokenizer, AutoModelForQuestionAnswering
except Exception:
    print('Installing transformers and datasets inside kernel...')
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--quiet', 'transformers', 'datasets', 'kagglehub', 'tqdm'])
    from transformers import AutoTokenizer, AutoModelForQuestionAnswering

import torch
from tqdm.auto import tqdm
import json

# Initialize a small QA model for speed
MODEL_NAME = 'distilbert-base-uncased-distilled-squad'
print('Loading model:', MODEL_NAME)
qa_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
qa_model = AutoModelForQuestionAnswering.from_pretrained(MODEL_NAME)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
qa_model.to(device)
qa_model.eval()

print('QA model ready on', device)

Loading model: distilbert-base-uncased-distilled-squad


ModuleNotFoundError: No module named 'transformers.models.distilbert'

In [None]:
# Inference: download SQuAD (kagglehub) if needed, run QA on the dev set using tokenizer+model, compute token-level IoU
import os
import json
from tqdm.auto import tqdm
import torch

DATA_DIR = '.'
train_file = os.path.join(DATA_DIR, 'train-v1.1.json')
dev_file = os.path.join(DATA_DIR, 'dev-v1.1.json')

# Try to download via kagglehub if files missing
if not (os.path.exists(train_file) and os.path.exists(dev_file)):
    try:
        import kagglehub
        print('Attempting dataset download via kagglehub...')
        kagglehub.dataset_download('stanfordu/stanford-question-answering-dataset', path='.', unzip=True)
    except Exception as e:
        print('kagglehub download failed or not available:', str(e))
        print('Please place train-v1.1.json and dev-v1.1.json in the notebook folder and re-run.')

# Validate files
if not os.path.exists(dev_file):
    raise FileNotFoundError('dev-v1.1.json not found in notebook folder')

with open(dev_file, 'r', encoding='utf-8') as f:
    dev = json.load(f)

# Flatten dev examples (context, question, answers[0])
examples = []
for article in dev['data']:
    for para in article['paragraphs']:
        context = para['context']
        for qa in para['qas']:
            qid = qa.get('id')
            question = qa['question']
            answers = qa.get('answers', [])
            if answers:
                true_text = answers[0]['text']
                true_start = answers[0]['answer_start']
            else:
                true_text = ''
                true_start = -1
            examples.append({'id': qid, 'context': context, 'question': question, 'true_text': true_text, 'true_start': true_start})

print('Loaded', len(examples), 'dev examples')

# Helpers to run model
from transformers import AutoTokenizer
import numpy as np

max_len = 384
stride = 128

results = []
device = next(qa_model.parameters()).device

for ex in tqdm(examples, total=len(examples)):
    context = ex['context']
    question = ex['question']
    # tokenize with return_offsets_mapping to map tokens back to chars
    enc = qa_tokenizer(question, context, truncation='only_second', max_length=max_len, stride=stride, return_overflowing_tokens=True, return_offsets_mapping=True, padding='max_length')
    input_ids = enc['input_ids']
    attention_mask = enc['attention_mask']
    offset_mappings = enc['offset_mapping']
    # iterate splits to find best answer
    best_score = -1e9
    best_answer = ''
    best_start = -1
    for i in range(len(input_ids)):
        ids = torch.tensor([input_ids[i]], device=device)
        mask = torch.tensor([attention_mask[i]], device=device)
        with torch.no_grad():
            out = qa_model(input_ids=ids, attention_mask=mask)
        start_logits = out.start_logits.cpu().numpy()[0]
        end_logits = out.end_logits.cpu().numpy()[0]
        # choose max start+end where end>=start
        for s in range(len(start_logits)):
            for e in range(s, min(s+30, len(end_logits))):
                score = start_logits[s] + end_logits[e]
                if score > best_score:
                    # convert token span to char span using offset mapping
                    off = offset_mappings[i]
                    s_off = off[s]
                    e_off = off[e]
                    # check that tokens map to context (non-zero)
                    if s_off is None or e_off is None:
                        continue
                    char_start = s_off[0]
                    char_end = e_off[1]
                    answer_text = context[char_start:char_end]
                    best_score = score
                    best_answer = answer_text
                    best_start = char_start
    # compute IoU
    iou = 0.0
    try:
        iou = token_level_iou_from_char_spans(qa_tokenizer, context, best_start, best_answer, ex['true_start'], ex['true_text'])
    except Exception:
        iou = 0.0
    results.append({'id': ex['id'], 'pred_text': best_answer, 'pred_start': best_start, 'score': float(best_score), 'true_text': ex['true_text'], 'true_start': ex['true_start'], 'token_iou': iou})

# Save
import pandas as pd
with open('preds.json', 'w', encoding='utf-8') as f:
    json.dump(results, f, ensure_ascii=False, indent=2)
pd.DataFrame(results).to_csv('preds_with_iou.csv', index=False)

import numpy as np
ious = [r['token_iou'] for r in results]
print('Mean token-level IoU on dev (quick):', np.mean(ious))
print('Saved preds.json and preds_with_iou.csv')

In [5]:
import sys, subprocess
print('Kernel python:', sys.executable)

try:
    import pkg_resources
    dist = pkg_resources.get_distribution('transformers')
    print('transformers version (pkg_resources):', dist.version, 'location:', dist.location)
except Exception as e:
    print('transformers not found via pkg_resources:', e)

import importlib
try:
    mod = importlib.import_module('transformers')
    print('transformers module file:', getattr(mod, '__file__', None))
    print('Has pipeline attribute?', hasattr(mod, 'pipeline'))
except Exception as e:
    print('Error importing transformers module:', e)


Kernel python: c:\Users\91983\AppData\Local\Programs\Python\Python313\python.exe
transformers version (pkg_resources): 4.55.2 location: c:\users\91983\appdata\roaming\python\python313\site-packages
transformers module file: C:\Users\91983\AppData\Roaming\Python\Python313\site-packages\transformers\__init__.py
Has pipeline attribute? False
transformers version (pkg_resources): 4.55.2 location: c:\users\91983\appdata\roaming\python\python313\site-packages
transformers module file: C:\Users\91983\AppData\Roaming\Python\Python313\site-packages\transformers\__init__.py
Has pipeline attribute? False


  import pkg_resources


In [6]:
# Diagnostic: check torch and transformers.pipelines
import importlib

try:
    import torch
    print('torch version:', torch.__version__)
except Exception as e:
    print('torch not available in kernel:', e)

try:
    from transformers.pipelines import pipeline
    print('Imported pipeline from transformers.pipelines')
except Exception as e:
    print('Could not import pipeline from transformers.pipelines:', e)

try:
    from transformers import AutoTokenizer, AutoModelForQuestionAnswering
    print('AutoTokenizer and AutoModel imports success')
except Exception as e:
    print('AutoTokenizer/AutoModel import failed:', e)


torch version: 2.8.0+cpu
Could not import pipeline from transformers.pipelines: partially initialized module 'torchvision' from 'c:\Users\91983\AppData\Local\Programs\Python\Python313\Lib\site-packages\torchvision\__init__.py' has no attribute 'extension' (most likely due to a circular import)
AutoTokenizer and AutoModel imports success


In [7]:
# Fix kernel environment: uninstall torchvision and reinstall transformers + deps inside the kernel
import sys, subprocess, importlib

def pip(*args):
    print('Running pip', args)
    subprocess.check_call([sys.executable, '-m', 'pip'] + list(args))

# Try uninstalling torchvision to avoid circular import issues
try:
    pip('uninstall', '-y', 'torchvision')
except Exception as e:
    print('Ignoring uninstall error:', e)

# Reinstall/upgrade packages inside kernel
try:
    pip('install', '--upgrade', 'transformers', 'datasets', 'kagglehub', 'tqdm')
except Exception as e:
    print('Install error (continuing):', e)

# Verify import
try:
    from transformers.pipelines import pipeline
    print('pipeline import OK')
except Exception as e:
    print('pipeline import still failing:', e)

# Show transformers version
try:
    import pkg_resources
    print('transformers version:', pkg_resources.get_distribution('transformers').version)
except Exception as e:
    print('Could not determine transformers version:', e)


Running pip ('uninstall', '-y', 'torchvision')
Running pip ('install', '--upgrade', 'transformers', 'datasets', 'kagglehub', 'tqdm')
pipeline import still failing: No module named 'transformers.pipelines'
transformers version: 4.55.2


In [8]:
# Attempt to upgrade transformers inside the kernel and test pipeline import
import sys, subprocess

def run_pip(*args):
    print('pip', args)
    subprocess.check_call([sys.executable, '-m', 'pip'] + list(args))

try:
    run_pip('install', '--upgrade', 'transformers==4.56.1')
except Exception as e:
    print('pip install failed:', e)

# Test import
try:
    from transformers import pipeline
    print('Imported pipeline from transformers')
    import transformers as _t
    print('transformers version after upgrade:', _t.__version__)
except Exception as e:
    print('Import still failed:', e)


pip ('install', '--upgrade', 'transformers==4.56.1')
Import still failed: Could not import module 'pipeline'. Are this object's requirements defined correctly?


In [10]:
# Repair transformers installation in the kernel
import sys, subprocess

pkgs = ['transformers', 'tokenizers', 'huggingface-hub']
print('Force-reinstalling:', pkgs)
subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--upgrade', '--force-reinstall'] + pkgs)
print('Reinstall complete.')

# Show versions
import importlib
import pkgutil
for p in pkgs:
    try:
        mod = importlib.import_module(p)
        print(p, 'version =', getattr(mod, '__version__', 'unknown'), 'file=', getattr(mod, '__file__', None))
    except Exception as e:
        print('Error importing', p, e)

Force-reinstalling: ['transformers', 'tokenizers', 'huggingface-hub']


CalledProcessError: Command '['c:\\Users\\91983\\AppData\\Local\\Programs\\Python\\Python313\\python.exe', '-m', 'pip', 'install', '--upgrade', '--force-reinstall', 'transformers', 'tokenizers', 'huggingface-hub']' returned non-zero exit status 1.

In [11]:
# Diagnostic pip install with output capture
import sys, subprocess

cmd = [sys.executable, '-m', 'pip', 'install', '--upgrade', '--force-reinstall', 'transformers']
print('Running:', ' '.join(cmd))
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
out, err = proc.communicate()
print('RETURN CODE:', proc.returncode)
print('STDOUT:\n', out[:10000])
print('STDERR:\n', err[:10000])

# Show installed transformers location if available
try:
    import importlib, pkgutil
    mod = importlib.import_module('transformers')
    print('transformers:', getattr(mod, '__version__', None), getattr(mod, '__file__', None))
except Exception as e:
    print('import transformers after install failed:', e)


Running: c:\Users\91983\AppData\Local\Programs\Python\Python313\python.exe -m pip install --upgrade --force-reinstall transformers
RETURN CODE: 1
STDOUT:
 Collecting transformers
  Using cached transformers-4.56.1-py3-none-any.whl.metadata (42 kB)
Collecting filelock (from transformers)
  Using cached filelock-3.19.1-py3-none-any.whl.metadata (2.1 kB)
Collecting huggingface-hub<1.0,>=0.34.0 (from transformers)
  Using cached huggingface_hub-0.34.4-py3-none-any.whl.metadata (14 kB)
Collecting numpy>=1.17 (from transformers)
  Using cached numpy-2.3.2-cp313-cp313-win_amd64.whl.metadata (60 kB)
Collecting packaging>=20.0 (from transformers)
  Using cached packaging-25.0-py3-none-any.whl.metadata (3.3 kB)
Collecting pyyaml>=5.1 (from transformers)
  Using cached PyYAML-6.0.2-cp313-cp313-win_amd64.whl.metadata (2.1 kB)
Collecting regex!=2019.12.17 (from transformers)
  Using cached regex-2025.9.1-cp313-cp313-win_amd64.whl.metadata (41 kB)
Collecting requests (from transformers)
  Using cach