### Menu Document Processing Pipeline

Deploys a DLT pipeline using a **bronze / silver / gold** medallion architecture:

- **Bronze** — Raw ingestion from `brands_metadata`, `inspections`, and `violations` source
  tables with DLT data-quality expectations (`@dlt.expect`) that enforce non-null keys and
  valid value ranges.
- **Silver** — Cleaned and enriched: price tiers, calorie categories, macronutrient ratios,
  allergen counts (menus); pass/fail status, score bands, severity index (inspections);
  urgency scoring and immediate-action flags (violations).
- **Gold** — Business-ready tables consumed by the Genie space: `menu_items`,
  `nutritional_info`, `allergens`, `brand_nutrition_summary`, `inspection_details`,
  `violation_analysis`, `location_compliance_summary`.

In [None]:
%pip install --upgrade databricks-sdk

In [None]:
dbutils.library.restartPython()

In [None]:
CATALOG = dbutils.widgets.get("CATALOG")

##### Deploy the document processing pipeline

In [None]:
import os

from databricks.sdk import WorkspaceClient
from databricks.sdk.service import pipelines

w = WorkspaceClient()

PIPELINE_NAME = "Menu Document Processing Pipeline"

root_abs_path = os.path.abspath("../pipelines/menu_documents")
root_dbx_path = root_abs_path.replace(
    os.environ.get("DATABRICKS_WORKSPACE_ROOT", "/Workspace"),
    "/Workspace"
)

pipeline_config = dict(
    catalog=CATALOG,
    schema="menu_documents",
    continuous=False,
    name=PIPELINE_NAME,
    serverless=True,
    configuration={"MENU_CATALOG": CATALOG},
    root_path=root_dbx_path,
    libraries=[
        pipelines.PipelineLibrary(
            glob=pipelines.PathPattern(include=f"{root_dbx_path}/**")
        )
    ],
)

existing = [
    p for p in w.pipelines.list_pipelines(filter=f"name LIKE '{PIPELINE_NAME}'")
    if p.name == PIPELINE_NAME
]

if existing:
    pipeline_id = existing[0].pipeline_id
    w.pipelines.update(pipeline_id=pipeline_id, **pipeline_config)
    print(f"Updated existing pipeline: {pipeline_id}")
else:
    created = w.pipelines.create(**pipeline_config)
    pipeline_id = created.pipeline_id
    print(f"Created new pipeline: {pipeline_id}")

import sys
sys.path.append('../utils')
from uc_state import add
add(CATALOG, "pipelines", {"pipeline_id": pipeline_id, "name": PIPELINE_NAME})

##### Trigger the pipeline and wait for completion

In [None]:
import time

update = w.pipelines.start_update(pipeline_id=pipeline_id)
print(f"Started pipeline update: {update.update_id}")

while True:
    info = w.pipelines.get(pipeline_id=pipeline_id)
    latest = info.latest_updates[0] if info.latest_updates else None
    state_str = str(latest.state) if latest else "STARTING"
    if "COMPLETED" in state_str:
        print(f"Pipeline finished: {state_str}")
        break
    if "FAILED" in state_str:
        raise RuntimeError(f"Pipeline failed: {state_str}")
    if "CANCELED" in state_str:
        raise RuntimeError(f"Pipeline canceled: {state_str}")
    print(f"  Pipeline state: {state_str}...")
    time.sleep(15)

print("\u2705 Menu pipeline stage complete")