# Driver Notebook

For more information, check out how to [author and deploy an MCP tool-calling OpenAI Responses API agent](https://docs.databricks.com/aws/en/notebooks/source/generative-ai/openai-mcp-tool-calling-agent.html).

## Setup
This repo is designed to run in a local IDE with Databricks Connect enabled.

In [1]:
from databricks.connect import DatabricksSession

spark = DatabricksSession.builder.remote(serverless=True).getOrCreate()

In [2]:
import os
import mlflow

# TODO make sure you update the config file before this

configs = mlflow.models.ModelConfig(development_config="./config.yaml")
databricks_config = configs.get("databricks")
agent_config = configs.get("agent")

CATALOG = databricks_config["catalog"]
SCHEMA = databricks_config["schema"]
UC_MODEL = databricks_config["model"]
WORKSPACE_URL = databricks_config["workspace_url"]
SQL_WAREHOUSE_ID = databricks_config["sql_warehouse_id"]
UC_TABLES = agent_config["tools"]["tables"]
UC_FUNCTIONS = [
    uc_func["function_name"] for uc_func in agent_config["tools"]["uc_functions"]
]
UC_CONNECTION = agent_config["tools"]["uc_connection"]
LLM_ENDPOINT_NAME = agent_config["llm"]["endpoint_name"]
VECTOR_SEARCH_INDEX = agent_config["tools"]["vector_search"]["index_name"]
GENIE_SPACE_ID = agent_config["tools"]["genie"]["space_id"]
MLFLOW_EXPERIMENT_ID = databricks_config["mlflow_experiment_id"]
AGENT_NAME = agent_config["name"]

SECRET_SCOPE_NAME = databricks_config.get("databricks_pat").get("secret_scope_name")
SECRET_KEY_NAME = databricks_config.get("databricks_pat").get("secret_key_name")

# Set up authentication for Databricks services
databricks_token = dbutils.secrets.get(scope=SECRET_SCOPE_NAME, key=SECRET_KEY_NAME)

os.environ["DB_MODEL_SERVING_HOST_URL"] = WORKSPACE_URL
os.environ["DATABRICKS_GENIE_PAT"] = databricks_token
os.environ["DATABRICKS_TOKEN"] = databricks_token

In [3]:
import mlflow

mlflow.set_registry_uri("databricks-uc")
mlflow.set_tracking_uri("databricks")

try:
    experiment = mlflow.get_experiment(experiment_id=MLFLOW_EXPERIMENT_ID)
    mlflow.set_experiment(experiment_id=MLFLOW_EXPERIMENT_ID)
    print(f"Set to existing experiment: {MLFLOW_EXPERIMENT_ID}")
except mlflow.exceptions.RestException as e:
    if "does not exist" in str(e):
        print(f"Experiment not found. Must create one first.")
    else:
        raise e

Set to existing experiment: 3013595711630322


## Load & test agent

Make sure you go to the MLflow experiment to look at trace data as you develop & debug the agent.

In [4]:
from agent import AGENT

In [9]:
sample_questions = [
    # "Compare and contrast the annual net income growth in the past 10 years between AAPL and AXP",
    "What risks face APPL in 2022 and 2023?",
    # "What was Apple's stock price on 10/2/2025?"
]

input_example = {
    "input": [
        {
            "role": "user",
            "content": sample_questions[0],
        }
    ]
}

In [10]:
result = AGENT.predict(input_example)
print(result.model_dump(exclude_none=True))

{'object': 'response', 'output': [{'type': 'message', 'id': 'msg_bdrk_01U9BQXRKSN6FyyocvhGJ6f1', 'content': [{'text': "I'll search for information about risks facing Apple (AAPL) in 2022 and 2023 using the financial documents database.", 'type': 'output_text'}], 'role': 'assistant'}, {'type': 'function_call', 'id': 'msg_bdrk_01U9BQXRKSN6FyyocvhGJ6f1', 'call_id': 'toolu_bdrk_0135NqbRx8fy2Zi53ZFbtxJj', 'name': 'users__david_huang__sec_docs_chunks_index', 'arguments': '{"query": "AAPL Apple risks 2022 2023 risk factors challenges"}'}, {'type': 'function_call_output', 'call_id': 'toolu_bdrk_0135NqbRx8fy2Zi53ZFbtxJj', 'output': '[{"score":"","doc_content":"COMPANY: APPLE\\nDOCUMENT TYPE: 10k\\nDOCUMENT YEAR: 2022\\nDOC SUMMARY:\\nThis document chunk is from the \\"Risk Factors\\" section (Item 1A) of Apple Inc.\'s 2022 Form 10-K filing. The section discusses various macroeconomic and industry risks that could materially and adversely affect the company\'s business, results of operations, fi

In [None]:
for event in AGENT.predict_stream(input_example):
    print(event, "-----------\n")

## Log the agent as an MLflow model

In [13]:
from mlflow.models.resources import (
    DatabricksUCConnection,
    DatabricksFunction,
    DatabricksGenieSpace,
    DatabricksSQLWarehouse,
    DatabricksServingEndpoint,
    DatabricksTable,
    DatabricksVectorSearchIndex,
)

# TODO: Define your resources here
resources = [
    DatabricksServingEndpoint(endpoint_name=LLM_ENDPOINT_NAME),
    DatabricksGenieSpace(genie_space_id=GENIE_SPACE_ID),
    DatabricksSQLWarehouse(warehouse_id=SQL_WAREHOUSE_ID),
    DatabricksVectorSearchIndex(index_name=VECTOR_SEARCH_INDEX),
    DatabricksUCConnection(connection_name=UC_CONNECTION),
]
for function_name in UC_FUNCTIONS:
    resources.append(DatabricksFunction(function_name=function_name))
for table_name in UC_TABLES:
    resources.append(DatabricksTable(table_name=table_name))

for resource in resources:
    print(resource.name)

databricks-claude-sonnet-4-5
01f070c3397d14b49986083aca54f0c2
148ccb90800933a1
users.david_huang.sec_docs_chunks_index
you-com-api
system.ai.python_exec
users.david_huang.search_web
users.david_huang.balance_sheet
users.david_huang.income_statement


In [14]:
with mlflow.start_run():
    logged_agent_info = mlflow.pyfunc.log_model(
        name="agent",
        python_model=os.path.join(os.getcwd(), "agent.py"),
        model_config=os.path.join(os.getcwd(), "config.yaml"),
        input_example=input_example,
        resources=resources,
        pip_requirements=["-r ../requirements.txt"],
    )

{"ts": "2025-10-26 11:56:29.506", "level": "ERROR", "logger": "pyspark.sql.connect.logging", "msg": "GRPC Error received", "context": {}, "exception": {"class": "_InactiveRpcError", "msg": "<_InactiveRpcError of RPC that terminated with:\n\tstatus = StatusCode.FAILED_PRECONDITION\n\tdetails = \"BAD_REQUEST: session_id is no longer usable. Generate a new session_id by detaching and reattaching the compute and then try again [sessionId=650e7389-8c7b-43db-a245-e6b08175dd15, reason=INACTIVITY_TIMEOUT]. (requestId=4b70c375-dc3d-44a5-a96e-024c13a52062)\"\n\tdebug_error_string = \"UNKNOWN:Error received from peer  {grpc_message:\"BAD_REQUEST: session_id is no longer usable. Generate a new session_id by detaching and reattaching the compute and then try again [sessionId=650e7389-8c7b-43db-a245-e6b08175dd15, reason=INACTIVITY_TIMEOUT]. (requestId=4b70c375-dc3d-44a5-a96e-024c13a52062)\", grpc_status:9}\"\n>", "stacktrace": [{"class": null, "method": "config", "file": "/Users/david.huang/Document

üèÉ View run dapper-sow-311 at: https://adb-984752964297111.11.azuredatabricks.net/ml/experiments/3013595711630322/runs/02c9c3a61ade4831b0b8751014a234d8
üß™ View experiment at: https://adb-984752964297111.11.azuredatabricks.net/ml/experiments/3013595711630322


## Evaluate the agent with MLflow 3

In [15]:
import json

evals_json_path = "./evals/eval-questions.json"

with open(evals_json_path, "r") as f:
    eval_dataset_list = json.load(f)

In [16]:
import mlflow
from mlflow.genai.scorers import (
    Correctness,
    RelevanceToQuery,
    RetrievalGroundedness,
    RetrievalRelevance,
    Safety,
)

# To avoid concurrency issue
# os.environ["MLFLOW_GENAI_EVAL_MAX_WORKERS"] = "1"

eval_results = mlflow.genai.evaluate(
    data=eval_dataset_list,
    predict_fn=lambda input: AGENT.predict({"input": input}),
    scorers=[
        Correctness(),
        RelevanceToQuery(),
        Safety(),
        RetrievalGroundedness(),
        RetrievalRelevance(),
    ],  # add more scorers here if they're applicable
)

2025/10/26 12:20:39 INFO mlflow.models.evaluation.utils.trace: Auto tracing is temporarily enabled during the model evaluation for computing some metrics and debugging. To disable tracing, call `mlflow.autolog(disable=True)`.
2025/10/26 12:20:39 INFO mlflow.genai.utils.data_validation: Testing model prediction with the first sample in the dataset.
  from .autonotebook import tqdm as notebook_tqdm
Evaluating: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 4/4 [Elapsed: 00:49, Remaining: 00:00] , Time breakdown=(91.73% predict_fn, 8.27% scorers) 


Evaluation completed.

Metrics and evaluation results can be viewed from the MLflow run page.
To compare evaluation results across runs, view the "Evaluations" tab of the experiment.

Get aggregate metrics: `result.metrics`.
Get per-row evaluation results: `result.tables['eval_results']`.
`result` is the `EvaluationResult` object returned by `mlflow.evaluate`.

üèÉ View run zealous-perch-922 at: https://adb-984752964297111.11.azuredatabricks.net/ml/experiments/3013595711630322/runs/44dbbd3fe07c4ad39decdc8e18f83d38
üß™ View experiment at: https://adb-984752964297111.11.azuredatabricks.net/ml/experiments/3013595711630322


## Run pre-deployment agent validation

In [17]:
mlflow.models.predict(
    model_uri=f"runs:/{logged_agent_info.run_id}/agent",
    input_data={"input": [{"role": "user", "content": "Hello!"}]},
    env_manager="uv",
)

2025/10/26 12:23:16 INFO mlflow.models.flavor_backend_registry: Selected backend for flavor 'python_function'
2025/10/26 12:23:21 INFO mlflow.utils.virtualenv: Creating a new environment in /var/folders/9q/3l7fvp612q78vljxhnjsmx9r0000gp/T/tmp4x00tp5s/envs/virtualenv_envs/mlflow-985f213607a808827069c2c46086fc7b1f73c1f2 with python version 3.12.3 using uv
Using CPython 3.12.3 interpreter at: [36m/Library/Frameworks/Python.framework/Versions/3.12/bin/python3.12[39m
Creating virtual environment at: [36m/var/folders/9q/3l7fvp612q78vljxhnjsmx9r0000gp/T/tmp4x00tp5s/envs/virtualenv_envs/mlflow-985f213607a808827069c2c46086fc7b1f73c1f2[39m
Activate with: [32msource /var/folders/9q/3l7fvp612q78vljxhnjsmx9r0000gp/T/tmp4x00tp5s/envs/virtualenv_envs/mlflow-985f213607a808827069c2c46086fc7b1f73c1f2/bin/activate[39m
2025/10/26 12:23:21 INFO mlflow.utils.virtualenv: Installing dependencies
[2mUsing Python 3.12.3 environment at: /private/var/folders/9q/3l7fvp612q78vljxhnjsmx9r0000gp/T/tmp4x00tp5s/

{"object": "response", "output": [{"type": "message", "id": "msg_bdrk_014UG8pTW9ojs31enTkoXE83", "content": [{"text": "Hello! \ud83d\udc4b I'm here to help you analyze data and answer questions. I have access to several powerful tools:\n\n- **Genie**: A natural language query system for structured data analysis\n- **Vector search**: For searching through financial documents and company filings\n- **Python execution**: For advanced calculations and data processing\n- **Web search**: For finding current information when needed\n\nI can help you with things like:\n- Financial data analysis and metrics\n- Company information and comparisons\n- Document research from SEC filings and reports\n- Complex calculations and data processing\n- Current market information\n\nWhat would you like to explore today?", "type": "output_text"}], "role": "assistant"}]}

2025/10/26 12:24:00 INFO mlflow.tracing.export.async_export_queue: Flushing the async trace logging queue before program exit. This may take a while...


## Register the model to Unity Catalog

In [18]:
FULL_UC_MODEL_NAME = f"{CATALOG}.{SCHEMA}.{UC_MODEL}"

uc_registered_model_info = mlflow.register_model(
    model_uri=logged_agent_info.model_uri,
    name=FULL_UC_MODEL_NAME,
)

Registered model 'users.david_huang.oai_mcp_agent' already exists. Creating a new version of this model...
Created version '7' of model 'users.david_huang.oai_mcp_agent'.


## Deploy the agent

In [19]:
from databricks import agents

agents.deploy(
    FULL_UC_MODEL_NAME,
    uc_registered_model_info.version,
    tags={"endpointSource": "docs"},
    environment_vars={
        "DATABRICKS_GENIE_PAT": f"{{{{secrets/{SECRET_SCOPE_NAME}/{SECRET_KEY_NAME}}}}}"
    },
)


    Deployment of users.david_huang.oai_mcp_agent version 7 initiated.  This can take up to 15 minutes and the Review App & Query Endpoint will not work until this deployment finishes.

    View status: https://adb-984752964297111.11.azuredatabricks.net/ml/endpoints/agents_users-david_huang-oai_mcp_agent
    Review App: https://adb-984752964297111.11.azuredatabricks.net/ml/review-v2/808465cb7b0e47659b87e1f552144fde/chat
    Monitor: https://adb-984752964297111.11.azuredatabricks.net/ml/experiments/3013595711630322?compareRunsMode=TRACES

You can refer back to the links above from the endpoint detail page at https://adb-984752964297111.11.azuredatabricks.net/ml/endpoints/agents_users-david_huang-oai_mcp_agent.


Deployment(model_name='users.david_huang.oai_mcp_agent', model_version='7', endpoint_name='agents_users-david_huang-oai_mcp_agent', served_entity_name='users-david_huang-oai_mcp_agent_7', query_endpoint='https://adb-984752964297111.11.azuredatabricks.net/serving-endpoints/agents_users-david_huang-oai_mcp_agent/served-models/users-david_huang-oai_mcp_agent_7/invocations', endpoint_url='https://adb-984752964297111.11.azuredatabricks.net/ml/endpoints/agents_users-david_huang-oai_mcp_agent', review_app_url='https://adb-984752964297111.11.azuredatabricks.net/ml/review-v2/808465cb7b0e47659b87e1f552144fde/chat')

## Next steps

* Test the agent endpoint via Playground or the Review App
* Contine to iterate on the agent
* Use the full Agent Evaluation Suite on MLflow 3