# Driver Notebook

For more information, check out how to [author and deploy an MCP tool-calling OpenAI Responses API agent](https://docs.databricks.com/aws/en/notebooks/source/generative-ai/openai-mcp-tool-calling-agent.html).

## Setup
This repo is designed to run in a local IDE with Databricks Connect enabled.

In [None]:
from databricks.connect import DatabricksSession

spark = DatabricksSession.builder.remote(serverless=True).getOrCreate()

In [None]:
import os
import mlflow

# TODO make sure you update the config file before this

configs = mlflow.models.ModelConfig(development_config="./config.yaml")
databricks_config = configs.get("databricks")
agent_config = configs.get("agent")

CATALOG = databricks_config["catalog"]
SCHEMA = databricks_config["schema"]
UC_MODEL = databricks_config["model"]
WORKSPACE_URL = databricks_config["workspace_url"]
SQL_WAREHOUSE_ID = databricks_config["sql_warehouse_id"]
UC_TABLES = agent_config["tools"]["tables"]
UC_FUNCTIONS = [
    uc_func["function_name"] for uc_func in agent_config["tools"]["uc_functions"]
]
UC_CONNECTION = agent_config["tools"]["uc_connection"]
LLM_ENDPOINT_NAME = agent_config["llm"]["endpoint_name"]
VECTOR_SEARCH_INDEX = agent_config["tools"]["vector_search"]["index_name"]
GENIE_SPACE_ID = agent_config["tools"]["genie"]["space_id"]
MLFLOW_EXPERIMENT_ID = databricks_config["mlflow_experiment_id"]
AGENT_NAME = agent_config["name"]

SECRET_SCOPE_NAME = databricks_config.get("databricks_pat").get("secret_scope_name")
SECRET_KEY_NAME = databricks_config.get("databricks_pat").get("secret_key_name")

# Set up authentication for Databricks services
databricks_token = dbutils.secrets.get(scope=SECRET_SCOPE_NAME, key=SECRET_KEY_NAME)

os.environ["DB_MODEL_SERVING_HOST_URL"] = WORKSPACE_URL
os.environ["DATABRICKS_GENIE_PAT"] = databricks_token
os.environ["DATABRICKS_TOKEN"] = databricks_token

In [None]:
import mlflow

mlflow.set_registry_uri("databricks-uc")
mlflow.set_tracking_uri("databricks")

try:
    experiment = mlflow.get_experiment(experiment_id=MLFLOW_EXPERIMENT_ID)
    mlflow.set_experiment(experiment_id=MLFLOW_EXPERIMENT_ID)
    print(f"Set to existing experiment: {MLFLOW_EXPERIMENT_ID}")
except mlflow.exceptions.RestException as e:
    if "does not exist" in str(e):
        print(f"Experiment not found. Must create one first.")
    else:
        raise e

## Load & test agent

Make sure you go to the MLflow experiment to look at trace data as you develop & debug the agent.

In [None]:
from agent import AGENT

In [None]:
sample_questions = [
    # "Compare and contrast the annual net income growth in the past 10 years between AAPL and AXP",
    # "What risks face APPL in 2022 and 2023?",
    "What was Apple's stock price on 10/2/2025?"
]

input_example = {
    "input": [
        {
            "role": "user",
            "content": sample_questions[0],
        }
    ]
}

In [None]:
result = AGENT.predict(input_example)
print(result.model_dump(exclude_none=True))

In [None]:
for event in AGENT.predict_stream(input_example):
    print(event, "-----------\n")

## Log the agent as an MLflow model

In [None]:
from mlflow.models.resources import (
    DatabricksUCConnection,
    DatabricksFunction,
    DatabricksGenieSpace,
    DatabricksSQLWarehouse,
    DatabricksServingEndpoint,
    DatabricksTable,
    DatabricksVectorSearchIndex,
)

# TODO: Define your resources here
resources = [
    DatabricksServingEndpoint(endpoint_name=LLM_ENDPOINT_NAME),
    DatabricksGenieSpace(genie_space_id=GENIE_SPACE_ID),
    DatabricksSQLWarehouse(warehouse_id=SQL_WAREHOUSE_ID),
    DatabricksVectorSearchIndex(index_name=VECTOR_SEARCH_INDEX),
    DatabricksUCConnection(connection_name=UC_CONNECTION),
]
for function_name in UC_FUNCTIONS:
    resources.append(DatabricksFunction(function_name=function_name))
for table_name in UC_TABLES:
    resources.append(DatabricksTable(table_name=table_name))

print(resources)

In [None]:
with mlflow.start_run():
    logged_agent_info = mlflow.pyfunc.log_model(
        name="agent",
        python_model=os.path.join(os.getcwd(), "agent.py"),
        model_config=os.path.join(os.getcwd(), "config.yaml"),
        input_example=input_example,
        resources=resources,
        pip_requirements=["-r ../requirements.txt"],
    )

## Evaluate the agent with MLflow 3

In [None]:
import json

evals_json_path = "./evals/eval-questions.json"

with open(evals_json_path, "r") as f:
    eval_dataset_list = json.load(f)

In [None]:
import mlflow
from mlflow.genai.scorers import (
    Correctness,
    RelevanceToQuery,
    RetrievalGroundedness,
    RetrievalRelevance,
    Safety,
)

# To avoid concurrency issue
# os.environ["MLFLOW_GENAI_EVAL_MAX_WORKERS"] = "1"

eval_results = mlflow.genai.evaluate(
    data=eval_dataset_list,
    predict_fn=lambda input: AGENT.predict({"input": input}),
    scorers=[
        Correctness(),
        RelevanceToQuery(),
        Safety(),
        RetrievalGroundedness(),
        RetrievalRelevance(),
    ],  # add more scorers here if they're applicable
)

## Run pre-deployment agent validation

In [None]:
mlflow.models.predict(
    model_uri=f"runs:/{logged_agent_info.run_id}/agent",
    input_data={"input": [{"role": "user", "content": "Hello!"}]},
    env_manager="uv",
)

## Register the model to Unity Catalog

In [None]:
FULL_UC_MODEL_NAME = f"{CATALOG}.{SCHEMA}.{UC_MODEL}"

uc_registered_model_info = mlflow.register_model(
    model_uri=logged_agent_info.model_uri,
    name=FULL_UC_MODEL_NAME,
)

## Deploy the agent

In [None]:
from databricks import agents

agents.deploy(
    FULL_UC_MODEL_NAME,
    uc_registered_model_info.version,
    tags={"endpointSource": "docs"},
    environment_vars={
        "DATABRICKS_GENIE_PAT": f"{{{{secrets/{SECRET_SCOPE_NAME}/{SECRET_KEY_NAME}}}}}"
    },
)

## Next steps

* Test the agent endpoint via Playground or the Review App
* Contine to iterate on the agent
* Use the full Agent Evaluation Suite on MLflow 3