### Inspection Knowledge Agent

Creates an Agent Bricks Knowledge Assistant over the food safety inspection
report PDFs stored in the Unity Catalog volume. The Knowledge Assistant uses
Databricks' Instructed Retriever to answer compliance and safety questions
with citations from the original inspection documents.

In [None]:
%pip install --upgrade databricks-sdk
dbutils.library.restartPython()

In [None]:
CATALOG = dbutils.widgets.get("CATALOG")
ENDPOINT_NAME = dbutils.widgets.get("INSPECTION_KNOWLEDGE_ENDPOINT_NAME")

In [None]:
from databricks.sdk import WorkspaceClient
import json

w = WorkspaceClient()

VOLUME_PATH = f"/Volumes/{CATALOG}/food_safety/reports"
AGENT_NAME = f"{CATALOG}-inspection-knowledge"
API_BASE = "/api/2.0/knowledge-assistants"

KA_BODY = {
    "name": AGENT_NAME,
    "description": (
        "Answers questions about food safety inspections at Caspers Kitchens "
        "ghost kitchen locations. Covers inspection scores, violations, "
        "corrective actions, and compliance status across all 4 locations."
    ),
    "endpoint_name": ENDPOINT_NAME,
    "knowledge_sources": [
        {
            "files_source": {
                "name": "inspection_reports",
                "type": "files",
                "files": {"path": VOLUME_PATH},
                "description": (
                    "Food safety inspection report PDFs for 4 ghost kitchen locations "
                    "(San Francisco, Silicon Valley, Bellevue, Chicago). Each PDF contains "
                    "the full inspection report with facility information, overall score, "
                    "letter grade, violation details, corrective actions, and follow-up status."
                ),
            },
        },
    ],
    "instructions": (
        "You are a food safety compliance assistant for Caspers Kitchens. "
        "Always cite the specific inspection report (location and date) when answering. "
        "Be precise about violation severities (critical, major, minor), corrective "
        "actions, and deadlines. Flag any critical violations prominently."
    ),
}

def find_existing_id():
    """Look up agent ID from uc_state."""
    try:
        df = spark.sql(f"""
            SELECT resource_data FROM {CATALOG}._internal_state.resources
            WHERE resource_type = 'endpoints'
            ORDER BY created_at DESC
        """)
        for row in df.collect():
            info = json.loads(row.resource_data)
            if info.get("endpoint_name") == ENDPOINT_NAME:
                return info.get("agent_id")
    except Exception:
        pass
    return None

def try_get(agent_ref):
    """GET the agent by ID or name, return the response or None."""
    try:
        return w.api_client.do("GET", f"{API_BASE}/{agent_ref}")
    except Exception:
        return None

existing_id = find_existing_id()
agent_id = None
needs_polling = True

# Path 1: found ID in uc_state — verify and update
if existing_id:
    info = try_get(existing_id)
    if info:
        print(f"Inspection Knowledge Assistant exists ({existing_id}), updating...")
        w.api_client.do("PUT", f"{API_BASE}/{existing_id}", body=KA_BODY)
        agent_id = existing_id
        needs_polling = False
        print(f"\u2705 Updated Inspection Knowledge Assistant: {agent_id}")

# Path 2: no ID in state (or GET failed) — try to create
if not agent_id:
    try:
        ka = w.api_client.do("POST", API_BASE, body=KA_BODY)
        agent_id = ka.get("id") or ka.get("agent_id")
        print(f"\u2705 Created Inspection Knowledge Assistant: {agent_id}")
    except Exception as e:
        err = str(e)
        if "already exists" in err.lower() or "ALREADY_EXISTS" in err:
            info = try_get(AGENT_NAME)
            if info:
                agent_id = info.get("id") or info.get("agent_id")
                if agent_id:
                    print(f"Found by name ({agent_id}), updating...")
                    w.api_client.do("PUT", f"{API_BASE}/{agent_id}", body=KA_BODY)
                    needs_polling = False
                    print(f"\u2705 Updated Inspection Knowledge Assistant: {agent_id}")
                else:
                    agent_id = AGENT_NAME
                    needs_polling = False
                    print(f"Agent {AGENT_NAME} already exists and is running. Proceeding.")
            else:
                agent_id = AGENT_NAME
                needs_polling = False
                print(f"Agent {AGENT_NAME} already exists and is running. Proceeding.")
        else:
            raise

print(f"   Endpoint: {ENDPOINT_NAME}")

In [None]:
import time

if needs_polling:
    MAX_WAIT = 300
    POLL_INTERVAL = 30
    elapsed = 0
    print(f"Checking if Inspection Knowledge Assistant endpoint is ready (max {MAX_WAIT}s)...")

    while elapsed < MAX_WAIT:
        try:
            ka_status = w.api_client.do("GET", f"{API_BASE}/{agent_id}")
            state = ka_status.get("endpoint_status", "")
            print(f"  endpoint_status: {state}")
            if str(state).upper() in ("ACTIVE", "READY", "ONLINE"):
                print(f"\u2705 Inspection Knowledge Assistant {AGENT_NAME} is READY")
                break
        except Exception as e:
            print(f"  GET status check failed: {type(e).__name__}: {e}")

        time.sleep(POLL_INTERVAL)
        elapsed += POLL_INTERVAL
    else:
        print(f"\u2705 Endpoint may still be provisioning — proceeding.")
else:
    print(f"\u2705 Inspection Knowledge Assistant already running — skipping polling.")

In [None]:
import sys
sys.path.append('../utils')
from uc_state import add

add(CATALOG, "endpoints", {"endpoint_name": ENDPOINT_NAME, "agent_id": agent_id})
print("\u2705 Inspection Knowledge Agent stage complete")