In [0]:
%load_ext autoreload
%autoreload 2
# Enables autoreload; learn more at https://docs.databricks.com/en/files/workspace-modules.html#autoreload-for-python-modules
# To disable autoreload; run %autoreload 0

# Generate the access token

In [0]:
%sh

export CLIENT_ID=c29a0539-c265-4284-8298-20dd41ca124c


  curl --request POST \
  --url "https://accounts.cloud.databricks.com/oidc/accounts/b4b3a7b6-ad44-4f55-9f2b-5cc18881ee38/v1/token" \
  --user "$CLIENT_ID:$CLIENT_SECRET" \
  --data 'grant_type=client_credentials&scope=all-apis'

: 

# Install all the Python libraries

In [0]:
%pip install -U -qqq mlflow langgraph==0.3.4 databricks-langchain databricks-agents uv databricks-vectorsearch
dbutils.library.restartPython()

[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.[0m


# Configure the Langgraph agent

In [0]:
%%writefile agent.py
import functools
import os
from typing import Any, Generator, Literal, Optional

import mlflow
from databricks.sdk import WorkspaceClient
from databricks_langchain import (
    ChatDatabricks,
    UCFunctionToolkit,
)
from databricks_langchain.genie import GenieAgent
from databricks_langchain import DatabricksVectorSearch
from langchain_core.runnables import RunnableLambda
from langgraph.graph import END, StateGraph
from langgraph.graph.state import CompiledStateGraph
from langgraph.prebuilt import create_react_agent
from mlflow.langchain.chat_agent_langgraph import ChatAgentState
from mlflow.pyfunc import ChatAgent
from mlflow.types.agent import (
    ChatAgentChunk,
    ChatAgentMessage,
    ChatAgentResponse,
    ChatContext,
)
from pydantic import BaseModel

###################################################
## Create a GenieAgent with access to a Genie Space
###################################################

# TODO add GENIE_SPACE_ID and a description for this space
# You can find the ID in the URL of the genie room /genie/rooms/<GENIE_SPACE_ID>
GENIE_SPACE_ID = "01f0454b13a0192c9419325353b3aa99"
genie_agent_description = "This agent can help find the right Medicare plan for a person based on a given set of filtered Medicare plans that are available in the state"

genie_agent = GenieAgent(
    genie_space_id=GENIE_SPACE_ID,
    genie_agent_name="Genie",
    description=genie_agent_description,
    client=WorkspaceClient(
        host=os.getenv("DB_MODEL_SERVING_HOST_URL"),
        token=os.getenv("DATABRICKS_GENIE_PAT"),
    ),
)

#######################################
# Initialize the vector search engine
#######################################

# TODO: Configure your vector store parameters
VECTOR_SEARCH_ENDPOINT = "hackathon_search"  # Replace with your endpoint
VECTOR_INDEX_NAME = "workspace.default.benefit_search"  # Replace with your index

vector_store = DatabricksVectorSearch(
    endpoint=VECTOR_SEARCH_ENDPOINT,
    index_name=VECTOR_INDEX_NAME,
    columns=["plan_id", "contract_id"]  # Columns to retrieve
)

#######################################
# Define the clarification agent
#######################################

def clarification_agent(state):
    prompt = "Based on the conversation history, ask the user for the necessary information to find a suitable Medicare plan. Be specific about what you need (e.g., age, zip code, health conditions, current prescriptions). Be conversational and helpful."
    preprocessor = RunnableLambda(
        lambda state: state["messages"] + [{"role": "user", "content": prompt}]
    )
    clarification_chain = preprocessor | llm
    response = clarification_chain.invoke(state)
    return {
        "messages": [
            {
                "role": "assistant",
                "content": response.content,
                "name": "Clarification",
            }
        ],
        "needs_user_input": True  # Flag to indicate we need user response
    }

#######################################
# Define the RAG agent for plan retrieval
#######################################

def rag_agent(state):
    # Extract user requirements from conversation
    messages = state.get("messages", [])
    
    # Build search query from user information
    user_info = []
    search_query = ""
    
    # Parse messages to extract user requirements
    for msg in messages:
        if msg.get("role") == "user":
            user_info.append(msg.get("content", ""))
    
    # Combine user information to create search query
    search_query = " ".join(user_info)
    
    try:
        # Retrieve relevant plans from vector store
        retriever = vector_store.as_retriever(
            search_kwargs={
                "k": 10,  # Number of plans to retrieve
                "filters": {}  # Add filters if needed based on user requirements
            }
        )
        
        relevant_plans = retriever.get_relevant_documents(search_query)
        
        # Format retrieved plans
        if relevant_plans:
            plans_info = []
            plan_ids = []

            for doc in relevant_plans:
                plan_data = doc.metadata
                plan_id = plan_data.get("plan_id", "N/A")
                contract_id = plan_data.get("contract_id", "N/A")
                
                plans_info.append(f"Plan ID: {plan_id}, Name: {contract_id}")
                if plan_id != "N/A":
                    plan_ids.append(plan_id)
            
            retrieved_context = f"Found {len(plan_ids)} Medicare plans matching your requirements:\n\n" + "\n".join(plans_info)

            return {
                "messages": [
                    {
                        "role": "assistant", 
                        "content": f"{retrieved_context}\n\nI'll now get detailed benefit information for these plans to help you choose the best one.",
                        "name": "RAG",
                    }
                ],
                "retrieved_plans": [doc.metadata for doc in relevant_plans],
                "plan_ids": plan_ids,  # Store just the plan IDs for Genie
                "needs_user_input": False
            }
        else:
            retrieved_context = "I couldn't find any Medicare plans matching your specific requirements in our database."
        
        # Create response with retrieved plan information
        prompt = f"""Based on the user's requirements and the following retrieved Medicare plan information, provide a helpful response about suitable Medicare plans:

Retrieved Plans:
{retrieved_context}

Provide specific plan recommendations with plan IDs and explain why these plans might be suitable based on the user's stated needs."""

        preprocessor = RunnableLambda(
            lambda state: state["messages"] + [{"role": "user", "content": prompt}]
        )
        rag_chain = preprocessor | llm
        response = rag_chain.invoke(state)
        
        return {
            "messages": [
                {
                    "role": "assistant", 
                    "content": response.content,
                    "name": "RAG",
                }
            ],
            "retrieved_plans": [doc.metadata for doc in relevant_plans],  # Store plan metadata
            "needs_user_input": False
        }
        
    except Exception as e:
        # Fallback if RAG fails
        return {
            "messages": [
                {
                    "role": "assistant",
                    "content": f"I encountered an issue retrieving Medicare plans from our database. Let me route you to our Genie agent for assistance. Error: {str(e)}",
                    "name": "RAG",
                }
            ],
            "needs_user_input": False
        }


############################################
# Define your LLM endpoint and system prompt
############################################

# TODO: Replace with your model serving endpoint
# multi-agent Genie works best with claude 3.7 or gpt 4o models.
LLM_ENDPOINT_NAME = "databricks-llama-4-maverick"
llm = ChatDatabricks(endpoint=LLM_ENDPOINT_NAME)

#############################
# Define the supervisor agent
#############################

# TODO update the max number of iterations between supervisor and worker nodes
# before returning to the user
MAX_ITERATIONS = 5

worker_descriptions = {
    "Genie": genie_agent_description,
    "Clarification": "This agent help clarify the request of the user so that all needed information are provided.",
    "RAG": "This agent retrieves specific Medicare plan IDs and details from the vector database based on user requirements.",

}

formatted_descriptions = "\n".join(
    f"- {name}: {desc}" for name, desc in worker_descriptions.items()
)

system_prompt = f"""You are a supervisor agent. Your job is to route user requests to the appropriate worker agent or to a clarification agent if more information is needed.

The available workers are:
{formatted_descriptions}

Analyze the conversation history to determine if the user has provided enough information to find a Medicare plan. You need details like:
- Age or eligibility for Medicare
- Location (zip code or state)
- Current health status or conditions
- Prescription drug needs
- Budget considerations

Analyze the conversation history to determine the next best action:

1. If the user hasn't provided enough information for Medicare plan search (missing age, location, health conditions, prescriptions), route to 'Clarification'

2. If the user has provided sufficient information but no specific plans have been retrieved yet, route to 'RAG' to search for relevant Medicare plans

3. If specific plans have been retrieved then route to 'Genie' to retrieve benefits information about those plans and tell which plan would fit this person

4. If the user's question has been fully answered, respond with 'FINISH'

Current conversation context: Look at the messages to see what stage we're at in helping the user find a Medicare plan.

Choose the next step carefully.
"""

options = ["FINISH", "Clarification"] + list(worker_descriptions.keys())
FINISH = {"next_node": "FINISH"}

def supervisor_agent(state):
    count = state.get("iteration_count", 0) + 1
    if count > MAX_ITERATIONS:
        return FINISH
    
    class nextNode(BaseModel):
        next_node: Literal[tuple(options)]

    preprocessor = RunnableLambda(
        lambda state: [{"role": "system", "content": system_prompt}] + state["messages"]
    )
    supervisor_chain = preprocessor | llm.with_structured_output(nextNode)
    next_node = supervisor_chain.invoke(state).next_node
    
    # if routed back to the same node, exit the loop
    if state.get("next_node") == next_node:
        return FINISH
    return {
        "iteration_count": count,
        "next_node": next_node,
        "needs_user_input": False
    }

#######################################
# Define our multiagent graph structure
#######################################

def agent_node(state, agent, name):
    result = agent.invoke(state)
    return {
        "messages": [
            {
                "role": "assistant",
                "content": result["messages"][-1].content,
                "name": name,
            }
        ],
        "needs_user_input": False
    }

def should_continue_or_end(state):
    """Decide whether to continue routing or end based on state"""
    if state.get("needs_user_input", False):
        return "END"  # Stop here so user can respond to clarification
    return "supervisor"  # Continue routing

class AgentState(ChatAgentState):
    next_node: str
    iteration_count: int
    needs_user_input: bool = False
    retrieved_plans: list = []  # Store retrieved plan ids


# clarification_node = functools.partial(agent_node, agent=clarification_agent, name="Clarification")
genie_node = functools.partial(agent_node, agent=genie_agent, name="Genie")

workflow = StateGraph(AgentState)
workflow.add_node("Genie", genie_node)
workflow.add_node("supervisor", supervisor_agent)
workflow.add_node("Clarification", clarification_agent)
workflow.add_node("RAG", rag_agent)

workflow.set_entry_point("supervisor")

# - Genie and RAG always goes back to supervisor to check if more is needed
# - Clarification can either go to END (for user input) or back to supervisor
workflow.add_edge("Genie", "supervisor")
workflow.add_edge("RAG", "supervisor")
workflow.add_conditional_edges(
    "Clarification",
    should_continue_or_end,
    {
        "END": END,
        "supervisor": "supervisor"
    }
)

# Let the supervisor decide which next node to go to
conditional_edges = {k: k for k in worker_descriptions.keys()}
conditional_edges["Clarification"] = "Clarification"
conditional_edges["FINISH"] = END

# Let the supervisor decide which next node to go
workflow.add_conditional_edges(
    "supervisor",
    lambda x: x["next_node"],
    conditional_edges
)
multi_agent = workflow.compile()

###################################
# Wrap our multi-agent in ChatAgent
###################################


class LangGraphChatAgent(ChatAgent):
    def __init__(self, agent: CompiledStateGraph):
        self.agent = agent

    def predict(
        self,
        messages: list[ChatAgentMessage],
        context: Optional[ChatContext] = None,
        custom_inputs: Optional[dict[str, Any]] = None,
    ) -> ChatAgentResponse:
        request = {
            "messages": [m.model_dump_compat(exclude_none=True) for m in messages]
        }

        messages = []
        for event in self.agent.stream(request, stream_mode="updates"):
            for node_data in event.values():
                new_messages = node_data.get("messages", [])
                if new_messages:
                    messages.extend(
                        ChatAgentMessage(**msg) for msg in node_data.get("messages", [])
                    )
                    # If this is a clarification request, return immediately to allow user response
                    if node_data.get("needs_user_input", False):
                        break
        return ChatAgentResponse(messages=messages)

    def predict_stream(
        self,
        messages: list[ChatAgentMessage],
        context: Optional[ChatContext] = None,
        custom_inputs: Optional[dict[str, Any]] = None,
    ) -> Generator[ChatAgentChunk, None, None]:
        request = {
            "messages": [m.model_dump_compat(exclude_none=True) for m in messages]
        }
        for event in self.agent.stream(request, stream_mode="updates"):
            for node_data in event.values():
                new_messages = node_data.get("messages", [])
                if new_messages:
                    yield from (
                        ChatAgentChunk(**{"delta": msg})
                        for msg in node_data.get("messages", [])
                    )
                    # If this is a clarification request, stop streaming to allow user response
                    if node_data.get("needs_user_input", False):
                        break


# Create the agent object, and specify it as the agent object to use when
# loading the agent back for inference via mlflow.models.set_model()
mlflow.langchain.autolog()
AGENT = LangGraphChatAgent(multi_agent)
mlflow.models.set_model(AGENT)

Overwriting agent.py


# Test the agent with a simple user message

In [0]:
from agent import AGENT

input_example = {
    "messages": [
        {
            "role": "user",
            "content": "I am 70 years old and live in dutchess county. I am looking for a teeth routine cleaning. I do not have any other health conditions. I do not have dental insurance.",
        }
    ]
}
AGENT.predict(input_example)

  relevant_plans = retriever.get_relevant_documents(search_query)


[NOTICE] Using a notebook authentication token. Recommended for development only. For improved performance, please use Service Principal based authentication. To disable this message, pass disable_notice=True.


ChatAgentResponse(messages=[ChatAgentMessage(role='assistant', content="## Step 1: Understand the User's Requirements\nThe user is a 70-year-old living in Dutchess County, seeking a teeth routine cleaning without dental insurance, and is looking for suitable Medicare plans.\n\n## Step 2: Identify Relevant Medicare Plan Information\nThe retrieved Medicare plans are: H2624 (Plan IDs: 802, 803), H9808 (Plan IDs: 807, 808), H5521 (Plan IDs: 323, 459), H2775 (Plan IDs: 105, 112), H5216 (Plan ID: 382), and H2406 (Plan ID: 119).\n\n## Step 3: Analyze the Coverage for Dental Services\nOriginal Medicare (Part A and Part B) does not typically cover routine dental care, including teeth cleanings. However, some Medicare Advantage plans may offer additional benefits, such as dental coverage.\n\n## Step 4: Determine Suitable Plans Based on the User's Needs\nTo be suitable, a plan should be a Medicare Advantage plan that offers dental coverage, as the user is looking for coverage for a routine teeth 

Trace(request_id=tr-86e9ab391f1646dba8f5697d133e2db8)

# Set the environment variables

In [0]:
import os
from dbruntime.databricks_repl_context import get_context

# TODO: set secret_scope_name and secret_key_name to access your PAT
secret_scope_name = "hackathon"
secret_key_name = "pat"

os.environ["DB_MODEL_SERVING_HOST_URL"] = "https://dbc-05bab63c-84b8.cloud.databricks.com/browse/folders/workspace?o=806146813099306"
assert os.environ["DB_MODEL_SERVING_HOST_URL"] is not None
os.environ["DATABRICKS_GENIE_PAT"] = dbutils.secrets.get(
    scope=secret_scope_name, key=secret_key_name
)
assert os.environ["DATABRICKS_GENIE_PAT"] is not None, (
    "The DATABRICKS_GENIE_PAT was not properly set to the PAT secret"
)

# Register the model with mlflow

In [0]:
# Determine Databricks resources to specify for automatic auth passthrough at deployment time
import mlflow
from agent import GENIE_SPACE_ID, LLM_ENDPOINT_NAME
from databricks_langchain import UnityCatalogTool, VectorSearchRetrieverTool
from mlflow.models.resources import (
    DatabricksFunction,
    DatabricksGenieSpace,
    DatabricksServingEndpoint,
)
from pkg_resources import get_distribution

# TODO: Manually include underlying resources if needed. See the TODO in the markdown above for more information.
resources = [
    DatabricksServingEndpoint(endpoint_name=LLM_ENDPOINT_NAME),
    DatabricksGenieSpace(genie_space_id=GENIE_SPACE_ID),
#   DatabricksSQLWarehouse(warehouse_id="your_warehouse_id"),
#   DatabricksTable(table_name="your_table_name"),
]

# for tool in tools:
#     if isinstance(tool, VectorSearchRetrieverTool):
#         resources.extend(tool.resources)
#     elif isinstance(tool, UnityCatalogTool):
#         resources.append(DatabricksFunction(function_name=tool.uc_function_name))

with mlflow.start_run():
    logged_agent_info = mlflow.pyfunc.log_model(
        artifact_path="agent",
        python_model="agent.py",
        input_example=input_example,
        extra_pip_requirements=[f"databricks-connect=={get_distribution('databricks-connect').version}"],
        resources=resources,
    )

2025/06/09 18:49:09 INFO mlflow.pyfunc: Predicting on input example to validate output


Uploading artifacts:   0%|          | 0/12 [00:00<?, ?it/s]

# Test the deployed model

In [0]:
mlflow.models.predict(
    model_uri=f"runs:/{logged_agent_info.run_id}/agent",
    input_data=input_example,
    env_manager="uv",
)

Downloading artifacts:   0%|          | 0/12 [00:00<?, ?it/s]

2025/06/09 18:50:41 INFO mlflow.models.flavor_backend_registry: Selected backend for flavor 'python_function'


Downloading artifacts:   0%|          | 0/12 [00:00<?, ?it/s]

2025/06/09 18:50:43 INFO mlflow.utils.virtualenv: Creating a new environment in /tmp/virtualenv_envs/mlflow-3e34192403d42e40bc5d898d008b8ab35e3e7faf with python version 3.11.10 using uv
Using CPython 3.11.10 interpreter at: [36m/usr/bin/python3.11[39m
Creating virtual environment at: [36m/tmp/virtualenv_envs/mlflow-3e34192403d42e40bc5d898d008b8ab35e3e7faf[39m
Activate with: [32msource /tmp/virtualenv_envs/mlflow-3e34192403d42e40bc5d898d008b8ab35e3e7faf/bin/activate[39m
2025/06/09 18:50:44 INFO mlflow.utils.virtualenv: Installing dependencies
[2mUsing Python 3.11.10 environment at: /tmp/virtualenv_envs/mlflow-3e34192403d42e40bc5d898d008b8ab35e3e7faf[0m
[2mResolved [1m3 packages[0m [2min 95ms[0m[0m
[36m[1mDownloading[0m[39m setuptools [2m(1.2MiB)[0m
[36m[1mDownloading[0m[39m pip [2m(1.7MiB)[0m
 [32m[1mDownloading[0m[39m pip
 [32m[1mDownloading[0m[39m setuptools
[2mPrepared [1m3 packages[0m [2min 132ms[0m[0m
[2mInstalled [1m3 packages[0m [2min 2

{"messages": [{"role": "assistant", "content": "You're looking for a Medicare plan that covers your dental needs. I'd be happy to help you figure out the best option.\n\nTo get started, can you please tell me a bit more about your situation? \n\nSpecifically, I need to know:\n\n1. **Your age**: Are you 65 or older, or are you under 65 with a disability?\n2. **Your location**: What's your zip code? Medicare plans vary by location, so this will help me narrow down the options.\n3. **Your dental needs**: Are you looking for routine care like cleanings and check-ups, or do you need more extensive coverage for procedures like crowns, dentures, or implants?\n4. **Current Medicare coverage**: Are you already enrolled in Medicare Part A and/or Part B, or are you new to Medicare?\n5. **Additional health needs**: Do you have any other health conditions or take prescription medications that you'd like to ensure are covered by your Medicare plan?\n\nOnce I have this information, I can help you exp

In [0]:
mlflow.set_registry_uri("databricks-uc")

# TODO: define the catalog, schema, and model name for your UC model
catalog = "workspace"
schema = "models"
model_name = "first-try-model"
UC_MODEL_NAME = f"{catalog}.{schema}.{model_name}"

# register the model to UC
uc_registered_model_info = mlflow.register_model(
    model_uri=logged_agent_info.model_uri, name=UC_MODEL_NAME
)

Registered model 'workspace.models.first-try-model' already exists. Creating a new version of this model...


Downloading artifacts:   0%|          | 0/12 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/12 [00:00<?, ?it/s]

Created version '2' of model 'workspace.models.first-try-model'.


In [0]:
from databricks import agents

agents.deploy(
    UC_MODEL_NAME,
    uc_registered_model_info.version,
    tags={"endpointSource": "docs"},
    environment_vars={
        "DATABRICKS_GENIE_PAT": f"{{{{secrets/{secret_scope_name}/{secret_key_name}}}}}"
    },
)

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]


    Deployment of workspace.models.first-try-model version 2 initiated.  This can take up to 15 minutes and the Review App & Query Endpoint will not work until this deployment finishes.

    View status: https://dbc-05bab63c-84b8.cloud.databricks.com/ml/endpoints/agents_workspace-models-first-try-model
    Review App: https://dbc-05bab63c-84b8.cloud.databricks.com/ml/review-v2/11a64ab1ac874e1f8d276ce4794a75ec/chat
    Monitor: https://dbc-05bab63c-84b8.cloud.databricks.com/ml/experiments/2104128721751618/evaluation-monitoring


Deployment(model_name='workspace.models.first-try-model', model_version='2', endpoint_name='agents_workspace-models-first-try-model', served_entity_name='workspace-models-first-try-model_2', query_endpoint='https://dbc-05bab63c-84b8.cloud.databricks.com/serving-endpoints/agents_workspace-models-first-try-model/served-models/workspace-models-first-try-model_2/invocations', endpoint_url='https://dbc-05bab63c-84b8.cloud.databricks.com/ml/endpoints/agents_workspace-models-first-try-model', review_app_url='https://dbc-05bab63c-84b8.cloud.databricks.com/ml/review-v2/11a64ab1ac874e1f8d276ce4794a75ec/chat')

In [0]:
from databricks.sdk import WorkspaceClient

w = WorkspaceClient()

w.secrets.put_secret("hackathon","pat",string_value ="eyJraWQiOiJkZmJjOWVmMThjZTQ2ZTlhMDg2NWZmYzlkODkxYzJmMjg2NmFjMDM3MWZiNDlmOTdhMDg1MzBjNWYyODU3ZTg4IiwidHlwIjoiYXQrand0IiwiYWxnIjoiUlMyNTYifQ.eyJjbGllbnRfaWQiOiJjMjlhMDUzOS1jMjY1LTQyODQtODI5OC0yMGRkNDFjYTEyNGMiLCJzY29wZSI6ImFsbC1hcGlzIiwiaXNzIjoiaHR0cHM6Ly9hY2NvdW50cy5jbG91ZC5kYXRhYnJpY2tzLmNvbS9vaWRjL2FjY291bnRzL2I0YjNhN2I2LWFkNDQtNGY1NS05ZjJiLTVjYzE4ODgxZWUzOCIsImF1ZCI6ImI0YjNhN2I2LWFkNDQtNGY1NS05ZjJiLTVjYzE4ODgxZWUzOCIsInN1YiI6ImMyOWEwNTM5LWMyNjUtNDI4NC04Mjk4LTIwZGQ0MWNhMTI0YyIsImlhdCI6MTc0OTQ4OTk0MywiZXhwIjoxNzQ5NDkzNTQzLCJqdGkiOiIwMzZiOTliMS0yMzFjLTRhOGMtYmNhZC1iYmY5M2IwOTM1MjkifQ.OOFt0zEgRG-6POaVj1Br5PKZ49di8dDIC2uoyFytT48ZLBiukHS3jug_EKP7MJtsyX2dXBKYi0vghX35a3UFqxzROEQ7WYbqCEhI-dgnllwY5Bs6EOcQyuBMh9bRXAc3Uhm2lV4BvpkUxmULbVWBeWyBaw8Kbh4pfmBYpzkJ2QUdYdvqXzQKXPQmcfjczBDe1GOV47zJgreNvIN_xHy3KyOm2ERQZP58Xblv6tw2mzEtqve0M58dhdO4-G3Uip5ZGlHjUAtiEjymzeXeklYQ3zqkWSTbDyPozvqBg7-C0ujLOfNaIkEtT_wsWADHxLbc3SPwkSZfIPfOx8qI6-NeIQ")