In [None]:
import os
from databricks.connect import DatabricksSession

# if running on a local IDE
spark = DatabricksSession.builder.remote(serverless=True).getOrCreate()

# set your variables
USER = "david.huang@databricks.com"  # TODO: user email
CATALOG = "users"  # TODO: catalog
SCHEMA = "david_huang"  # TODO: schema
GT_TABLE = "lease_docs_short"  # TODO: ground truth data (with inputs & labels)
EVAL_TABLE = "lease_docs_eval_dataset"  # TODO: evaluation dataset name
MLFLOW_EXPERIMENT_NAME = f"/Users/{USER}/client-demos/evals/eval-demo"  # TODO: experiment name & location on Databricks workspace
MODEL_SERVING_ENDPOINT = "databricks-claude-3-7-sonnet"  # TODO: Databricks hosted LLM
MODEL_SERVING_BASE_URL = "https://adb-984752964297111.11.azuredatabricks.net/serving-endpoints/"  # TODO: workspace URL

In [0]:
# create or set MLflow experiment on Databricks
import mlflow

mlflow.set_registry_uri("databricks-uc")
mlflow.set_tracking_uri("databricks")

try:
    experiment_id = mlflow.create_experiment(name=MLFLOW_EXPERIMENT_NAME)
    mlflow.set_experiment(MLFLOW_EXPERIMENT_NAME)
    print(
        f"Created and set new experiment: {MLFLOW_EXPERIMENT_NAME} (ID: {experiment_id})"
    )
except mlflow.exceptions.RestException as e:
    if "already exists" in str(e):
        print(f"Set to existing MLflow experiment: {MLFLOW_EXPERIMENT_NAME}")
        mlflow.set_experiment(MLFLOW_EXPERIMENT_NAME)
    else:
        raise e

In [0]:
import mlflow

mlflow.openai.autolog()


# define the LLM extraction function
def extract_lease_data(query: str):
    from openai import OpenAI

    client = OpenAI(
        api_key=dbutils.secrets.get(scope="dhuang", key="dbx-token"),
        base_url=MODEL_SERVING_BASE_URL,
    )

    response_format = {
        "type": "json_schema",
        "json_schema": {
            "name": "lease_agreement_extraction",
            "schema": {
                "type": "object",
                "properties": {
                    "start_date": {"type": "string"},
                    "end_date": {"type": "string"},
                    "leased_space": {"type": "string"},
                    "lessee": {"type": "string"},
                    "lessor": {"type": "string"},
                    "signing_date": {"type": "string"},
                    "term_of_payment": {"type": "string"},
                    "designated_use": {"type": "string"},
                    "extension_period": {"type": "string"},
                    "expiration_date_of_lease": {"type": "string"},
                },
            },
            "strict": True,
        },
    }

    messages = [
        {
            "role": "system",
            "content": (
                "You are an expert at structured data extraction. "
                "You will be given unstructured text from a lease agreement "
                "and you must convert it into the given structure."
            ),
        },
        {
            "role": "user",
            "content": ("Extract from the following lease agreement:\n" f"{query}"),
        },
    ]

    response = client.chat.completions.create(
        model=MODEL_SERVING_ENDPOINT,
        messages=messages,
        response_format=response_format,
    )

    return response