# Enrichment API Exploration

This notebook mirrors the admin enrichment tooling so you can experiment with the same API endpoints and parsing logic outside the web UI.

## Requirements

Install the following packages in the environment where you plan to run the notebook:

- Python 3.10 or newer
- [`requests`](https://pypi.org/project/requests/)
- [`python-dotenv`](https://pypi.org/project/python-dotenv/) *(optional, for loading a `.env` file with API tokens)*
- [`ipykernel`](https://pypi.org/project/ipykernel/) *(if you need to register the environment as a Jupyter kernel)*


## Configuration

Set `GVM_API_BASE_URL` to point at the German Verb Master backend (e.g. `http://localhost:3000`).
You can also provide an optional `GVM_ADMIN_TOKEN` if the enrichment endpoints require the admin header.


In [None]:
from __future__ import annotations

import json
import os
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, TypedDict, Literal

import requests

API_BASE_URL = os.getenv("GVM_API_BASE_URL", "http://localhost:3000").rstrip('/')
ADMIN_TOKEN = os.getenv("GVM_ADMIN_TOKEN")


In [None]:
class RunEnrichmentPayload(TypedDict, total=False):
    limit: int
    mode: Literal['non-canonical', 'canonical', 'all']
    onlyIncomplete: bool
    enableAi: bool
    allowOverwrite: bool
    collectSynonyms: bool
    collectExamples: bool
    collectTranslations: bool
    collectWiktionary: bool

class WordEnrichmentOptions(TypedDict, total=False):
    enableAi: bool
    allowOverwrite: bool
    collectSynonyms: bool
    collectExamples: bool
    collectTranslations: bool
    collectWiktionary: bool

class EnrichmentPatch(TypedDict, total=False):
    english: Optional[str]
    exampleDe: Optional[str]
    exampleEn: Optional[str]
    sourcesCsv: Optional[str]
    complete: Optional[bool]
    praeteritum: Optional[str]
    partizipIi: Optional[str]
    perfekt: Optional[str]
    aux: Optional[Literal['haben', 'sein']]

@dataclass
class EnrichmentTranslationCandidate:
    value: str
    source: str
    confidence: Optional[float] = None

@dataclass
class EnrichmentExampleCandidate:
    source: str
    exampleDe: Optional[str] = None
    exampleEn: Optional[str] = None

@dataclass
class EnrichmentVerbFormSuggestion:
    source: str
    praeteritum: Optional[str] = None
    partizipIi: Optional[str] = None
    perfekt: Optional[str] = None
    aux: Optional[str] = None

@dataclass
class EnrichmentProviderDiagnostic:
    id: str
    label: str
    status: Literal['success', 'error', 'skipped']
    error: Optional[str] = None
    payload: Optional[Any] = None

@dataclass
class EnrichmentFieldUpdate:
    field: str
    previous: Any
    next: Any
    source: Optional[str] = None

@dataclass
class WordEnrichmentSuggestions:
    translations: List[EnrichmentTranslationCandidate] = field(default_factory=list)
    examples: List[EnrichmentExampleCandidate] = field(default_factory=list)
    synonyms: List[str] = field(default_factory=list)
    englishHints: List[str] = field(default_factory=list)
    wiktionarySummary: Optional[str] = None
    verbForms: List[EnrichmentVerbFormSuggestion] = field(default_factory=list)
    providerDiagnostics: List[EnrichmentProviderDiagnostic] = field(default_factory=list)

@dataclass
class EnrichmentWordSummary:
    id: int
    lemma: str
    pos: str
    missingFields: List[str] = field(default_factory=list)
    synonyms: List[str] = field(default_factory=list)
    sources: List[str] = field(default_factory=list)
    applied: bool = False
    aiUsed: bool = False
    translation: Optional[EnrichmentTranslationCandidate] = None
    englishHints: Optional[List[str]] = None
    wiktionarySummary: Optional[str] = None
    example: Optional[EnrichmentExampleCandidate] = None
    verbForms: Optional[EnrichmentVerbFormSuggestion] = None
    updates: Optional[List[EnrichmentFieldUpdate]] = None
    errors: Optional[List[str]] = None

@dataclass
class WordEnrichmentPreview:
    summary: EnrichmentWordSummary
    patch: EnrichmentPatch
    hasUpdates: bool
    suggestions: WordEnrichmentSuggestions

@dataclass
class BulkEnrichmentResponse:
    scanned: int
    updated: int
    words: List[EnrichmentWordSummary] = field(default_factory=list)

@dataclass
class ApplyEnrichmentResponse:
    word: Any
    appliedFields: List[str] = field(default_factory=list)


In [None]:
def _map_translation_candidate(data: Dict[str, Any]) -> EnrichmentTranslationCandidate:
    return EnrichmentTranslationCandidate(
        value=data.get('value', ''),
        source=data.get('source', ''),
        confidence=data.get('confidence'),
    )

def _map_example_candidate(data: Dict[str, Any]) -> EnrichmentExampleCandidate:
    return EnrichmentExampleCandidate(
        source=data.get('source', ''),
        exampleDe=data.get('exampleDe'),
        exampleEn=data.get('exampleEn'),
    )

def _map_verb_form_suggestion(data: Dict[str, Any]) -> EnrichmentVerbFormSuggestion:
    return EnrichmentVerbFormSuggestion(
        source=data.get('source', ''),
        praeteritum=data.get('praeteritum'),
        partizipIi=data.get('partizipIi'),
        perfekt=data.get('perfekt'),
        aux=data.get('aux'),
    )

def _map_provider_diagnostic(data: Dict[str, Any]) -> EnrichmentProviderDiagnostic:
    return EnrichmentProviderDiagnostic(
        id=data.get('id', ''),
        label=data.get('label', ''),
        status=data.get('status', 'skipped'),
        error=data.get('error'),
        payload=data.get('payload'),
    )

def _map_field_update(data: Dict[str, Any]) -> EnrichmentFieldUpdate:
    return EnrichmentFieldUpdate(
        field=data.get('field', ''),
        previous=data.get('previous'),
        next=data.get('next'),
        source=data.get('source'),
    )

def _map_word_summary(data: Dict[str, Any]) -> EnrichmentWordSummary:
    translation = data.get('translation')
    example = data.get('example')
    verb_forms = data.get('verbForms')
    return EnrichmentWordSummary(
        id=data.get('id'),
        lemma=data.get('lemma', ''),
        pos=data.get('pos', ''),
        missingFields=list(data.get('missingFields', [])),
        synonyms=list(data.get('synonyms', [])),
        sources=list(data.get('sources', [])),
        applied=bool(data.get('applied', False)),
        aiUsed=bool(data.get('aiUsed', False)),
        translation=_map_translation_candidate(translation) if translation else None,
        englishHints=list(data.get('englishHints') or []) or None,
        wiktionarySummary=data.get('wiktionarySummary'),
        example=_map_example_candidate(example) if example else None,
        verbForms=_map_verb_form_suggestion(verb_forms) if verb_forms else None,
        updates=[_map_field_update(item) for item in data.get('updates', [])] or None,
        errors=list(data.get('errors', [])) or None,
    )

def _map_suggestions(data: Dict[str, Any]) -> WordEnrichmentSuggestions:
    return WordEnrichmentSuggestions(
        translations=[_map_translation_candidate(item) for item in data.get('translations', [])],
        examples=[_map_example_candidate(item) for item in data.get('examples', [])],
        synonyms=list(data.get('synonyms', [])),
        englishHints=list(data.get('englishHints', [])),
        wiktionarySummary=data.get('wiktionarySummary'),
        verbForms=[_map_verb_form_suggestion(item) for item in data.get('verbForms', [])],
        providerDiagnostics=[_map_provider_diagnostic(item) for item in data.get('providerDiagnostics', [])],
    )

def parse_word_preview(data: Dict[str, Any]) -> WordEnrichmentPreview:
    return WordEnrichmentPreview(
        summary=_map_word_summary(data.get('summary', {})),
        patch=data.get('patch', {}),
        hasUpdates=bool(data.get('hasUpdates', False)),
        suggestions=_map_suggestions(data.get('suggestions', {})),
    )

def parse_bulk_response(data: Dict[str, Any]) -> BulkEnrichmentResponse:
    return BulkEnrichmentResponse(
        scanned=int(data.get('scanned', 0)),
        updated=int(data.get('updated', 0)),
        words=[_map_word_summary(item) for item in data.get('words', [])],
    )

def parse_apply_response(data: Dict[str, Any]) -> ApplyEnrichmentResponse:
    return ApplyEnrichmentResponse(
        word=data.get('word'),
        appliedFields=list(data.get('appliedFields', [])),
    )


In [None]:
def _request(path: str, *, method: str = 'GET', payload: Optional[Dict[str, Any]] = None, admin_token: Optional[str] = None) -> Dict[str, Any]:
    url = f"{API_BASE_URL}{path}" if path.startswith('/') else f"{API_BASE_URL}/{path}"
    headers = {'Content-Type': 'application/json'}
    token = admin_token or ADMIN_TOKEN
    if token:
        headers['x-admin-token'] = token.strip()

    response = requests.request(method, url, headers=headers, json=payload)

    if response.status_code >= 400:
        message = response.text or f"Request failed with status {response.status_code}"
        raise RuntimeError(message)

    try:
        return response.json()
    except json.JSONDecodeError as exc:
        raise RuntimeError('Response did not contain valid JSON') from exc

def run_bulk_enrichment(payload: RunEnrichmentPayload, *, admin_token: Optional[str] = None) -> BulkEnrichmentResponse:
    data = _request('/api/enrichment/run', method='POST', payload=payload, admin_token=admin_token)
    return parse_bulk_response(data)

def preview_word_enrichment(word_id: int, options: WordEnrichmentOptions, *, admin_token: Optional[str] = None) -> WordEnrichmentPreview:
    data = _request(f'/api/enrichment/words/{word_id}/preview', method='POST', payload=options, admin_token=admin_token)
    return parse_word_preview(data)

def apply_word_enrichment(word_id: int, patch: EnrichmentPatch, *, admin_token: Optional[str] = None) -> ApplyEnrichmentResponse:
    data = _request(
        f'/api/enrichment/words/{word_id}/apply',
        method='POST',
        payload={'patch': patch},
        admin_token=admin_token,
    )
    return parse_apply_response(data)


## Usage Examples

Uncomment and adapt the snippets below to run live calls once the backend is available.

In [None]:
# Example: run a non-canonical enrichment scan
# response = run_bulk_enrichment({
#     'mode': 'non-canonical',
#     'limit': 5,
#     'collectTranslations': True,
#     'collectWiktionary': True,
# })
# response


In [None]:
# Example: preview enrichment for a specific word
# preview = preview_word_enrichment(
#     word_id=123,
#     options={
#         'collectTranslations': True,
#         'collectExamples': True,
#         'collectWiktionary': True,
#     },
# )
# preview


In [None]:
# Example: apply a curated patch after reviewing suggestions
# apply_word_enrichment(
#     word_id=123,
#     patch={
#         'english': 'to do',
#         'praeteritum': 'tat',
#         'partizipIi': 'getan',
#         'aux': 'haben',
#     },
# )
