# MLflow3 Example: Databricks ServicesをMCPで利用するAgent

**Databricksにおけるモデルコンテキストプロトコル (MCP)**
- https://docs.databricks.com/aws/ja/generative-ai/agent-framework/mcp


In [0]:
%pip install -qq mlflow[databricks]>=3.1 databricks-agents databricks-langchain langgraph databricks-mcp "mcp>=1.9" "databricks-sdk[openai]"
%pip install nest-asyncio

%restart_python

[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.[0m
[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.[0m


In [0]:
import nest_asyncio
nest_asyncio.apply()

## 開発

In [0]:
import asyncio

from mcp.client.streamable_http import streamablehttp_client
from mcp.client.session import ClientSession
from databricks_mcp import DatabricksOAuthClientProvider
from databricks.sdk import WorkspaceClient

workspace_client = WorkspaceClient()
workspace_hostname = workspace_client.config.host
mcp_server_url = f"{workspace_hostname}/api/2.0/mcp/functions/system/ai"
# mcp_server_url = f"{workspace_hostname}/api/2.0/mcp/genie/01f046d343c21b2aa054343c77e91ab9"


# This snippet below uses the Unity Catalog functions MCP server to expose built-in
# AI tools under `system.ai`, like the `system.ai.python_exec` code interpreter tool
async def test_connect_to_server():
    async with streamablehttp_client(
        f"{mcp_server_url}", auth=DatabricksOAuthClientProvider(workspace_client)
    ) as (read_stream, write_stream, _), ClientSession(
        read_stream, write_stream
    ) as session:
        # List and call tools from the MCP server
        await session.initialize()
        tools = await session.list_tools()
        print(
            f"Discovered tools {[t.name for t in tools.tools]} "
            f"from MCP server {mcp_server_url}"
        )
        result = await session.call_tool(
            "system__ai__python_exec", {"code": "print('Hello, world!')"}
        )
        print(
            f"Called system__ai__python_exec tool and got result " f"{result.content}"
        )


if __name__ == "__main__":
    asyncio.run(test_connect_to_server())

Discovered tools ['system__ai__python_exec'] from MCP server https://dbc-f1d46cb2-e7aa.cloud.databricks.com/api/2.0/mcp/functions/system/ai


  + Exception Group Traceback (most recent call last):
  |   File "/databricks/python/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3577, in run_code
  |     exec(code_obj, self.user_global_ns, self.user_ns)
  |   File "/home/spark-784f8e97-a9d8-46ae-a007-58/.ipykernel/5730/command-8794549490817940-4107433531", line 38, in <module>
  |     asyncio.run(test_connect_to_server())
  |   File "/databricks/python/lib/python3.11/site-packages/nest_asyncio.py", line 35, in run
  |     return loop.run_until_complete(task)
  |            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  |   File "/databricks/python/lib/python3.11/site-packages/nest_asyncio.py", line 90, in run_until_complete
  |     return f.result()
  |            ^^^^^^^^^^
  |   File "/usr/lib/python3.11/asyncio/futures.py", line 203, in result
  |     raise self._exception.with_traceback(self._exception_tb)
  |   File "/usr/lib/python3.11/asyncio/tasks.py", line 277, in __step
  |     result = coro.send(None)
  |       



## カスタムResponseAgentの定義

In [0]:
%%writefile agents/simple_responses_agent.py
import mlflow
from mlflow.pyfunc import ResponsesAgent
from mlflow.types.responses import (
    ResponsesAgentRequest,
    ResponsesAgentResponse,
    ResponsesAgentStreamEvent,
)
from mlflow.entities import SpanType
from typing import Generator, Any
from databricks_langchain import ChatDatabricks, UCFunctionToolkit
from langgraph.prebuilt import create_react_agent
from langchain_core.messages import (
    BaseMessage,
    AIMessage,
    ToolMessage,
    AIMessageChunk,
)
from langchain_core.tools import BaseTool, tool
from functools import reduce


# ダミーツール
@tool
def get_weather(city: str) -> str:
    """指定された都市の天気を取得します

    Args:
        city (str): 都市名

    Returns:
        str: 天気情報
    """
    return f"It's always sunny in {city}!"


# Tracingの有効化
mlflow.langchain.autolog()

# Agent
class SimpleResponsesAgent(ResponsesAgent):
    def __init__(self, model, tools: list[BaseTool]):
        """SimpleResponsesAgentの初期化

        Args:
            model: 使用するモデル
            tools (list[BaseTool]): 使用するツールのリスト
        """
        self.model = model
        self.tools = tools

    @mlflow.trace(span_type=SpanType.AGENT)
    def predict(self, request: ResponsesAgentRequest) -> ResponsesAgentResponse:
        """リクエストに基づいて予測を行います

        Args:
            request (ResponsesAgentRequest): 予測リクエスト

        Returns:
            ResponsesAgentResponse: 予測結果のレスポンス
        """
        events = [
            event
            for event in self.predict_stream(request)
            if event.type == "response.output_item.done"
        ]
        outputs = [event.item for event in events]
        # usage総量を計算
        usages = [event.usage for event in events]
        total_usage = {
            "input_tokens_details": {"cached_tokens": 0},
            "output_tokens_details": {"reasoning_tokens": 0},
            **reduce(
                lambda x, y: {k: x.get(k, 0) + y.get(k, 0) for k in set(x) | set(y)},
                usages,
            ),
        }

        return ResponsesAgentResponse(output=outputs, usage=total_usage)

    @mlflow.trace(span_type=SpanType.AGENT)
    def predict_stream(
        self, request: ResponsesAgentRequest
    ) -> Generator[ResponsesAgentStreamEvent, None, None]:
        """ストリームモードで予測を行います

        Args:
            request (ResponsesAgentRequest): 予測リクエスト

        Yields:
            ResponsesAgentStreamEvent: ストリームイベント
        """
        messages, params = self._convert_request_to_lc_request(request)
        react_agent = create_react_agent(self.model.bind(**params), tools=self.tools)

        for chunk in react_agent.stream({"messages": messages}, stream_mode="updates"):
            for value in chunk.values():
                messages = value.get("messages", [])
                responses = self._convert_lc_messages_to_response(messages)
                for response in responses:
                    yield response

    @mlflow.trace(span_type=SpanType.PARSER)
    def _convert_request_to_lc_request(
        self, request: ResponsesAgentRequest
    ) -> (list[BaseMessage], dict[str, Any]):
        """リクエストをLangChainのメッセージおよびパラメータ形式に変換します

        Args:
            request (ResponsesAgentRequest): 変換するリクエスト

        Returns:
            tuple: メッセージリスト、パラメータ辞書
        """

        lc_request = request.model_dump_compat(exclude_none=True)
        custom_inputs = lc_request.pop("custom_inputs", {})

        # custom_inputsは通常のパラメータとして展開
        lc_request.update(custom_inputs)
        messages = lc_request.pop("input")

        # LangChainで有効なパラメータのみに限定
        valid_params = [
            "temperature",
            "max_output_tokens",
            "top_p",
            "top_k",
        ]
        params = {k: v for k, v in lc_request.items() if k in valid_params}
        if "max_output_tokens" in params:
            params["max_tokens"] = params.pop("max_output_tokens")

        return messages, params

    @mlflow.trace(span_type=SpanType.PARSER)
    def _convert_lc_messages_to_response(
        self, messages: list[BaseMessage]
    ) -> list[ResponsesAgentStreamEvent]:
        """LangChainメッセージをレスポンス出力に変換します

        Args:
            messages (list[BaseMessage]): 変換するメッセージリスト

        Returns:
            list[ResponsesAgentStreamEvent]: レスポンス出力のリスト
        """

        def _create_response_agent_stream_event(
            item, usage, metadata
        ) -> ResponsesAgentStreamEvent:
            return ResponsesAgentStreamEvent(
                type="response.output_item.done",
                item=item,
                usage=_convert_lc_usage_to_openai_usage(usage),
                metadata=metadata,
            )

        def _convert_lc_usage_to_openai_usage(usage: dict[str, int]) -> dict[str, int]:
            return {
                "input_tokens": usage.get("prompt_tokens", 0),
                "output_tokens": usage.get("completion_tokens", 0),
                "total_tokens": usage.get("total_tokens", 0),
            }

        outputs = []
        for message in messages:
            if isinstance(message, ToolMessage):
                item = self.create_function_call_output_item(
                    output=message.content,
                    call_id=message.tool_call_id,
                )
                metadata = message.response_metadata
                usage = metadata.pop("usage", {})
                outputs.append(
                    _create_response_agent_stream_event(item, usage, metadata)
                )
            elif (
                isinstance(message, (AIMessage, AIMessageChunk)) and message.tool_calls
            ):
                metadata = message.response_metadata
                usage = metadata.pop("usage", {})
                for tool_call in message.tool_calls:
                    item = self.create_function_call_item(
                        id=message.id,
                        call_id=tool_call.get("id"),
                        name=tool_call.get("name"),
                        arguments=str(tool_call.get("args")),
                    )
                    outputs.append(
                        _create_response_agent_stream_event(item, usage, metadata)
                    )
                    # 1件目のみusageを設定
                    usage = {}
            elif isinstance(message, (AIMessage, AIMessageChunk)):
                item = self.create_text_output_item(
                    text=message.content,
                    id=message.id,
                )
                metadata = message.response_metadata
                usage = metadata.pop("usage", {})
                outputs.append(
                    _create_response_agent_stream_event(item, usage, metadata)
                )
            else:
                raise ValueError(f"Unknown message: {message}")
        return outputs


# Databricksネイティブのllama-3-1-405b-instructを利用
LLM_ENDPOINT_NAME = "databricks-meta-llama-3-1-405b-instruct"
llm = ChatDatabricks(model=LLM_ENDPOINT_NAME)

# 利用可能なツールとして、get_weather関数とUnity Catalogのsystem.ai配下の関数を設定
func_name = f"system.ai.python_exec"
uc_toolkit = UCFunctionToolkit(function_names=[func_name])
LC_TOOLS = [get_weather] + uc_toolkit.tools

# mlflowにエージェントを設定
agent = SimpleResponsesAgent(model=llm, tools=LC_TOOLS)
mlflow.models.set_model(agent)

Overwriting agents/simple_responses_agent.py


動作試験

In [0]:
%restart_python

In [0]:
import os
import sys

sys.path.append(os.path.join(os.getcwd(), "agents"))

from simple_responses_agent import (
    SimpleResponsesAgent,
    ResponsesAgentRequest,
    agent,
)

input = {
    "input": [{"role": "user", "content": "what is the weather in Tokyo?"}],
    # "input": [{"role": "user", "content":"aa"}],    
    "context": {"conversation_id": "123", "user_id": "456"},
    "max_output_tokens": 100,
    "top_p": 0.8,
    "temperature": 0.1,
}
print(agent.predict(ResponsesAgentRequest(**input)))

input = {
    "input": [{"role": "user", "content": "what is 4*3 in python"}],
    "context": {"conversation_id": "123", "user_id": "456"},
    "top_p": 0.0,
}
for event in agent.predict_stream(ResponsesAgentRequest(**input)):
    print(event)



type='response.output_item.done' custom_outputs=None item={'type': 'function_call', 'id': 'run--69c56a83-1f65-4567-951b-7cfda9aa5639-0', 'call_id': 'call_804651ef-d4e5-41cd-ae89-07b5c9358c4f', 'name': 'system__ai__python_exec', 'arguments': "{'code': 'result = 4*3\\\\nprint(result)'}", 'content': 'system__ai__python_exec'} usage={'input_tokens': 956, 'output_tokens': 28, 'total_tokens': 984} metadata={'id': 'chatcmpl_72ed6757-9776-4251-adb3-dcbccbd335d2', 'object': 'chat.completion', 'created': 1749889674, 'model': 'meta-llama-3.1-405b-instruct-081924', 'model_name': 'meta-llama-3.1-405b-instruct-081924'}
type='response.output_item.done' custom_outputs=None item={'type': 'function_call_output', 'call_id': 'call_804651ef-d4e5-41cd-ae89-07b5c9358c4f', 'output': '{"format": "SCALAR", "value": "12\\n"}', 'content': '{"format": "SCALAR", "value": "12\\n"}'} usage={'input_tokens': 0, 'output_tokens': 0, 'total_tokens': 0} metadata={}
type='response.output_item.done' custom_outputs=None item=

Trace(trace_id=tr-1e0257132151447d84b45542b7882451)


## エージェントのロギング・テスト

In [0]:
import mlflow
from mlflow.models.resources import (
    DatabricksServingEndpoint,
    DatabricksFunction,
)
from simple_responses_agent import LLM_ENDPOINT_NAME

resources = [
    DatabricksServingEndpoint(endpoint_name=LLM_ENDPOINT_NAME),
    DatabricksFunction(function_name="system.ai.python_exec"),
]

with mlflow.start_run():
    logged_agent_info = mlflow.pyfunc.log_model(
        python_model="agents/simple_responses_agent.py",
        name="simple_responses_agent",
        pip_requirements=[
            "mlflow>=3.1.0",
            "langgraph==0.4.8",
            "databricks-langchain==0.5.1",
            "unitycatalog-langchain==0.2.0",
            "unitycatalog-ai==0.3.1",
            "protobuf==4.25.8",
        ],
        resources=resources,
    )

🔗 View Logged Model at: https://dbc-f1d46cb2-e7aa.cloud.databricks.com/ml/experiments/e4d5134ffa8248e396d26e1e1d9a67a6/models/m-5a82ded1cce34c1dad1d3d6acf8fcf6a?o=1765512908890676
2025/06/14 09:19:49 INFO mlflow.pyfunc: Predicting on input example to validate output


In [0]:
import mlflow
from pprint import pprint

model_uri = logged_agent_info.model_uri
agent = mlflow.pyfunc.load_model(model_uri)

input = {
    "input": [
        {"role": "user", "content": "what is the weather in Tokyo?"},
    ],
    "max_output_tokens": 1000,
    "top_p": 0.8,
    "temperature": 0.1,
}

for event in agent.predict_stream(input):
    pprint(event.get("item"))
    print("-----------------")

Downloading artifacts:   0%|          | 0/12 [00:00<?, ?it/s]

 - protobuf (current: 4.25.8, required: protobuf==3.20.0)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.


{'arguments': "{'city': 'Tokyo'}",
 'call_id': 'call_bceea16a-766b-4fce-b3bc-380c03aee5e7',
 'content': 'get_weather',
 'id': 'run--48d60513-f67b-4dae-91f7-5cb243c3d438-0',
 'name': 'get_weather',
 'type': 'function_call'}
-----------------
{'call_id': 'call_bceea16a-766b-4fce-b3bc-380c03aee5e7',
 'content': "It's always sunny in Tokyo!",
 'output': "It's always sunny in Tokyo!",
 'type': 'function_call_output'}
-----------------
{'content': [{'text': "It's always sunny in Tokyo!", 'type': 'output_text'}],
 'id': 'run--e651d622-aa19-465c-934b-51d3523ac68b-0',
 'role': 'assistant',
 'type': 'message'}
-----------------


Trace(trace_id=tr-3a43f4de77cc02ffca366cabc0db7456)

## デプロイ

In [0]:
import mlflow

catalog = "examples"
schema = "mlflow"
model_name = f"{catalog}.{schema}.simple_responses_agent"

mlflow.set_registry_uri("databricks-uc")

registered_model = mlflow.register_model(model_uri=logged_agent_info.model_uri, name=model_name)

Registered model 'examples.mlflow.simple_responses_agent' already exists. Creating a new version of this model...


Downloading artifacts:   0%|          | 0/12 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/13 [00:00<?, ?it/s]

🔗 Created version '7' of model 'examples.mlflow.simple_responses_agent': https://dbc-f1d46cb2-e7aa.cloud.databricks.com/explore/data/models/examples/mlflow/simple_responses_agent/version/7?o=1765512908890676


In [0]:
from databricks import agents

deployment = agents.deploy(
    registered_model.name, registered_model.version, scale_to_zero=False
)

# Retrieve the query endpoint URL for making API requests
deployment.query_endpoint

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]


    Deployment of examples.mlflow.simple_responses_agent version 7 initiated.  This can take up to 15 minutes and the Review App & Query Endpoint will not work until this deployment finishes.

    View status: https://dbc-f1d46cb2-e7aa.cloud.databricks.com/ml/endpoints/agents_examples-mlflow-simple_responses_agent
    Review App: https://dbc-f1d46cb2-e7aa.cloud.databricks.com/ml/review-v2/chat?endpoint=agents_examples-mlflow-simple_responses_agent
    Monitor: https://dbc-f1d46cb2-e7aa.cloud.databricks.com/ml/experiments/e4d5134ffa8248e396d26e1e1d9a67a6?compareRunsMode=TRACES


'https://dbc-f1d46cb2-e7aa.cloud.databricks.com/serving-endpoints/agents_examples-mlflow-simple_responses_agent/served-models/examples-mlflow-simple_responses_agent_7/invocations'