In [None]:
# set your variables
USER = "first.last@databricks.com"  # TODO: Your user email
CATALOG = "catalog_name"  # TODO: Your existing catalog
SCHEMA = "schema_name"  # TODO: Your existing schema
GT_TABLE = "lease_docs_short"  # TODO: Name of existing ground truth table (with inputs & labels)
EVAL_TABLE = "lease_docs_eval_dataset"  # TODO: Name of eval dataset name to create
MLFLOW_EXPERIMENT_NAME = f"/Users/{USER}/path/to/experiment"  # TODO: MLflow experiment full path on Databricks
MODEL_SERVING_ENDPOINT = "databricks-gpt-5-2"  # TODO: Model endpoint
MODEL_SERVING_BASE_URL = "https://your-workspace.cloud.databricks.com/"  # TODO: Your Databricks workspace URL
SECRET_SCOPE = "your_scope"  # TODO: Your secret scope
SECRET_KEY = "your_key"  # TODO: Your secret key

In [0]:
# create or set MLflow experiment on Databricks
import mlflow

mlflow.set_registry_uri("databricks-uc")
mlflow.set_tracking_uri("databricks")

try:
    experiment_id = mlflow.create_experiment(name=MLFLOW_EXPERIMENT_NAME)
    mlflow.set_experiment(MLFLOW_EXPERIMENT_NAME)
    print(
        f"Created and set new experiment: {MLFLOW_EXPERIMENT_NAME} (ID: {experiment_id})"
    )
except mlflow.exceptions.RestException as e:
    if "already exists" in str(e):
        print(f"Set to existing MLflow experiment: {MLFLOW_EXPERIMENT_NAME}")
        mlflow.set_experiment(MLFLOW_EXPERIMENT_NAME)
    else:
        raise e

In [None]:
import mlflow
from openai import OpenAI
from pydantic import BaseModel
from typing import Optional

mlflow.openai.autolog()


# Define Pydantic schema for structured output
class LeaseAgreementExtraction(BaseModel):
    start_date: Optional[str] = None
    end_date: Optional[str] = None
    leased_space: Optional[str] = None
    lessee: Optional[str] = None
    lessor: Optional[str] = None
    signing_date: Optional[str] = None
    term_of_payment: Optional[str] = None
    designated_use: Optional[str] = None
    extension_period: Optional[str] = None
    expiration_date_of_lease: Optional[str] = None


# define the LLM extraction function using Responses API
def extract_lease_data(query: str) -> LeaseAgreementExtraction:
    client = OpenAI(
        api_key=dbutils.secrets.get(
            scope=SECRET_SCOPE, key=SECRET_KEY
        ),  # TODO: enter your own API key
        base_url=MODEL_SERVING_BASE_URL + "/serving-endpoints/",
    )

    response = client.responses.parse(
        model=MODEL_SERVING_ENDPOINT,
        input=[
            {
                "role": "system",
                "content": (
                    "You are an expert at structured data extraction. "
                    "You will be given unstructured text from a lease agreement "
                    "and you must convert it into the given structure."
                ),
            },
            {
                "role": "user",
                "content": f"Extract from the following lease agreement:\n{query}",
            },
        ],
        text_format=LeaseAgreementExtraction,
    )

    return response.output_parsed