# Extraction Annotation Notebook

Use this notebook to review and correct the automated `extract_parameters` results produced by `scripts/diagnose_nlp_samples.py`.

How to use: open the **View / Edit** cell below (Cell 5), pick an entry index, review the original text and the extracted JSON, edit the JSON if needed, then press Save to write changes back to the in-memory report and to disk. If `ipywidgets` is not available, the notebook falls back to a simple text-based edit flow.

In [None]:
# Initialization: ensure local package is importable and import helpers
import sys
from pathlib import Path
ROOT = Path('.').resolve()
SRC = ROOT / 'src'
if str(SRC) not in sys.path:
    sys.path.insert(0, str(SRC))
import json
print('Root:', ROOT)

In [None]:
# Load the diagnostic report (default path)
report_path = ROOT / 'diagnose_report.json'
if not report_path.exists():
    raise FileNotFoundError(f'Report not found: {report_path} - run scripts/diagnose_nlp_samples.py first')
rows = json.loads(report_path.read_text(encoding='utf-8'))
print(f'Loaded {len(rows)} entries from {report_path}')

In [None]:
# Print a brief summary of each entry (index, truncated text, keys present)
for i, r in enumerate(rows):
    txt = r.get('text','')
    short = (txt[:120] + '...') if len(txt) > 120 else txt
    keys = list(r.get('extraction', {}).keys())
    print(f

In [None]:
# View / Edit a single entry. This cell uses ipywidgets when available,
# otherwise falls back to a simple input-based editor.
from pprint import pformat
try:
    import ipywidgets as widgets
    from IPython.display import display, clear_output
    WIDGETS = True
except Exception:
    WIDGETS = False

def save_report(path=ROOT / 'diagnose_report.json'):
    path.write_text(json.dumps(rows, ensure_ascii=False, indent=2), encoding='utf-8')
    print(f'Saved {len(rows)} entries to {path}')

def view_entry(index):
    r = rows[index]
    print('--- Original text ---')
    print(r.get('text',''))
    print('
--- Extracted (JSON) ---')
    print(pformat(r.get('extraction', {})))

def edit_entry_fallback(index):
    print('Fallback editor: paste replacement JSON for the extraction (single line), or just press Enter to skip')
    cur = rows[index].get('extraction', {})
    print('Current extraction:')
    print(pformat(cur))
    s = input('New extraction JSON (or Enter to keep): ').strip()
    if s:
        try:
            newv = json.loads(s)
            rows[index]['extraction'] = newv
            save_report()
            print('Updated entry and saved report.')
        except Exception as e:
            print('Invalid JSON:', e)

if WIDGETS:
    idx = widgets.BoundedIntText(value=0, min=0, max=max(0, len(rows)-1), description='Index:')
    view_btn = widgets.Button(description='View')
    save_btn = widgets.Button(description='Save All')
    edit_area = widgets.Textarea(value=json.dumps(rows[0].get('extraction',{}), ensure_ascii=False, indent=2), layout=widgets.Layout(width='100%', height='200px'))
    update_btn = widgets.Button(description='Update Entry')

    def on_view(b):
        i = int(idx.value)
        clear_output(wait=True)
        display(idx, view_btn, update_btn, save_btn, edit_area)
        view_entry(i)
        edit_area.value = json.dumps(rows[i].get('extraction',{}), ensure_ascii=False, indent=2)

    def on_update(b):
        i = int(idx.value)
        try:
            newv = json.loads(edit_area.value)
            rows[i]['extraction'] = newv
            print('Updated entry in memory (use Save All to persist).')
        except Exception as e:
            print('Invalid JSON:', e)

    def on_save(b):
        save_report()

    view_btn.on_click(on_view)
    update_btn.on_click(on_update)
    save_btn.on_click(on_save)

    display(idx, view_btn, update_btn, save_btn, edit_area)
    print('Use the controls above to view/edit entries.
Choose an index and press View.')
else:
    print('ipywidgets not available. Use view_entry(i) and edit_entry_fallback(i) functions manually.')

In [None]:
# Helper functions: list indices and quick save
def list_indices():
    for i, r in enumerate(rows):
        txt = r.get('text','')
        short = (txt[:80] + '...') if len(txt) > 80 else txt
        print(f'[{i}] {short}')

def save_all(path=ROOT / 'diagnose_report.json'):
    path.write_text(json.dumps(rows, ensure_ascii=False, indent=2), encoding='utf-8')
    print(f'Saved {len(rows)} entries to {path}')

# Quick reminder
print('Run list_indices() to see entries, then use the View/Edit cell (Cell 5) to inspect and edit.')