In [0]:
%pip install -U -qqqq -r requirements.txt
%restart_python

## Generate synthetic data

In [0]:
docs_df = (
    spark.table("agents_demo.data.product_docs")
    .withColumnRenamed("indexed_doc", "content")
    .withColumnRenamed("product_id", "doc_uri")
)
display(docs_df)

In [0]:
from pyspark.sql.functions import col, to_json, struct, expr, lit
from databricks.agents.eval import generate_evals_df

# Optional guideline
guidelines = """
# Task Description
You are generating an evaluation dataset which will be used to test a customer analytics chatbot on its ability to answer business user's questions about our product catalog.

# Content Guidelines
- Address scenarios that customer support agents may face while helping customers understand our products.
- Address scenarios that business analysts, who aren't familar with all of our products, may have

# Example questions
- how to troubleshoot <some issue>?
- how many colors of <product>f are there?
- what's the best product for <use case>?
- did we change <feature> recently?

# Style Guidelines
- Questions should be succinct, and human-like.

# Personas
- A business analyst
- A customer support agent
"""

# Generate 1 question for each document
synthetic_eval_data = generate_evals_df(
    docs=docs_df.head(20),
    guidelines=guidelines, 
    num_questions_per_doc=1
)

display(synthetic_eval_data)

## Vibe check agent

In [0]:
%run ./function_calling_agent_openai_sdk

In [0]:
# Pydantic class to make configuration easiser to use.  Developers can use this, Python dictionaries or YAML files for their configuration.
from configs import (
    AgentConfig,
    FunctionCallingLLMConfig,
    LLMParametersConfig,
    RetrieverToolConfig,
    RetrieverParametersConfig,
    RetrieverSchemaConfig,
    UCToolConfig,
)
import yaml


docs_retriever = RetrieverToolConfig(
    vector_search_index="agents_demo.data.product_docs_index",  # UC Vector Search index
    vector_search_schema=RetrieverSchemaConfig(
        primary_key="product_id",
        chunk_text="indexed_doc",
        document_uri="product_id",
        additional_metadata_columns=[
            # "issue_area",
            # "issue_category",
            # "issue_sub_category",
            # "product_category",
            # "product_sub_category",
            # "conversation",
            # "timestamp",
            # "user_id",
        ],
    ),
    vector_search_parameters=RetrieverParametersConfig(
        num_results=1,
        query_type="ann",  # Type of search: ann or hybrid
    ),
    vector_search_threshold=0.0,
    # Tool prompt templates
    # chunk_template="Passage text: {chunk_text}\nPassage metadata: {metadata}\n\n",
    # prompt_template="""Use the following pieces of retrieved context to answer the question.\nOnly use the passages from context that are relevant to the query to answer the question, ignore the irrelevant passages.  When responding, cite your source, referring to the passage by the columns in the passage's metadata.\n\nContext: {context}""",
    retriever_query_parameter_prompt="query to look up in the product documentation",
    retriever_filter_parameter_prompt="Optional filters to apply to the search. An array of objects, each specifying a field name and the filters to apply to that field.  ONLY use the LIKE type of filter if you have a string to query in product_category, etc.  Prefer to query WITHOUT filters.",
    tool_description_prompt="Search the production documentation for product information.  If you need to know how to troubleshoot, what a product does, common issues, etc, use this tool.",
    tool_name="retrieve_product_docs",
    # Retriever internals
    tool_class_name="VectorSearchRetriever",
)

# python_exec_config = UCToolConfig(
#     uc_catalog_name="ep",
#     uc_schema_name="agent_demo",
#     uc_function_name="python_exec",
# )

# recent_orders = UCToolConfig(
#     uc_catalog_name="ep",
#     uc_schema_name="agent_demo",
#     uc_function_name="user_orders",
# )

########################
#### ✅✏️ LLM configuration
########################

llm_config = FunctionCallingLLMConfig(
    llm_endpoint_name="agents-demo-gpt4o",  # Model serving endpoint
    llm_system_prompt_template=(
        """You are a helpful assistant that answers questions by calling tools.  Provide responses ONLY based on the outputs from tools.  Ask follow up questions if needed.  If don't get relevant results from the retriever tool while using filters, try to call the retriever tool again with JUST a query and no filters!"""
    ),  # System prompt template
    llm_parameters=LLMParametersConfig(
        temperature=0.01, max_tokens=1500
    ),  # LLM parameters
    tools=[docs_retriever],
)

function_calling_agent_config = AgentConfig(
    llm_config=llm_config,
    input_example={
        "messages": [
            {
                "role": "user",
                "content": "What is the top customer issue?",
            },
        ]
    },
    agent_description="Has access to the product documentation, transcripts from our customer service call center and information about customer's recent orders.",
    agent_name="CustomerServiceTranscripts",
    endpoint_name="agents_ep-agent_demo-customer_bot_function_calling_agent",
)

with open("config.yml", "w") as file:
    yaml.dump(function_calling_agent_config.dict(), file, default_flow_style=False)

import json
print(json.dumps(function_calling_agent_config.dict(), indent=4))

In [0]:
agent = FunctionCallingAgent(agent_config=function_calling_agent_config.dict())

response = agent.predict(
    model_input={
        "messages": [
            {
                "role": "user",
                "content": "How does our blender work?",
            },
        ]
    }
)

## Initial quality evaluation

In [0]:
import mlflow
from mlflow.types.llm import CHAT_MODEL_INPUT_SCHEMA
from mlflow.models.signature import ModelSignature
from mlflow.models.rag_signatures import StringResponse

with mlflow.start_run(run_name="initial_agent"):
    # Log to MLflow
    agent_model_info = mlflow.pyfunc.log_model(
        python_model="function_calling_agent_openai_sdk",  # Agent's code
        model_config=function_calling_agent_config.dict(),  # Agent's config
        artifact_path="agent",
        input_example=function_calling_agent_config.input_example,
        signature=ModelSignature(  # Agent's schema
            inputs=CHAT_MODEL_INPUT_SCHEMA,
            outputs=StringResponse(),
        ),
    )

    # Run the agent for these queries, using Agent evaluation to parallelize the calls
    eval_results = mlflow.evaluate(
        model=agent_model_info.model_uri,  # run the logged Agent for evaluation
        data=synthetic_eval_data,  # Eval set
        model_type="databricks-agent",  # use Agent Evaluation
    )

## Make a change to fix retrieval quality

In [0]:
# Make some prompt changes to encourage the LLM to be less restrictive with filtering

# Original
function_calling_agent_config.llm_config.llm_system_prompt_template = """
You are a helpful assistant that answers questions by calling tools.  Provide responses ONLY based on the outputs from tools.  Ask follow up questions if needed.  If you try the retriever tool WITH filters, and don't get any results, try again without filters for the product categories, only with the user_id."""
# New
function_calling_agent_config.llm_config.llm_system_prompt_template = "You are a helpful assistant that answers questions by calling tools.  Provide responses ONLY based on the outputs from tools.  Ask follow up questions if needed.  If don't get relevant results from the retriever tool while using filters, try to call the retriever tool again with JUST a query and no filters!"

# Get the retriever tool
product_doc_retriever = next(
    (tool for tool in function_calling_agent_config.llm_config.tools 
     if isinstance(tool, RetrieverToolConfig) and tool.vector_search_index == "agents_demo.data.product_docs_index"), 
    None
)
# Original
product_doc_retriever.retriever_filter_parameter_prompt = "Optional filters to apply to the search. An array of objects, each specifying a field name and the filters to apply to that field.  ONLY use the LIKE type of filter if you have a string to query in product_category, etc."

# New
product_doc_retriever.retriever_filter_parameter_prompt = "Optional filters to apply to the search. An array of objects, each specifying a field name and the filters to apply to that field.  ONLY use the LIKE type of filter if you have a string to query in product_category, etc.  Prefer to query WITHOUT filters."

product_doc_retriever.vector_search_parameters.num_results = 5

# Re-run the logging code from the cell above
# agent_model_info = log_and_evaluate()

## Evaluate the change

In [0]:
with mlflow.start_run(run_name="changed_prompt"):
    # Log to MLflow
    agent_model_info = mlflow.pyfunc.log_model(
        python_model="function_calling_agent_openai_sdk",  # Agent's code
        model_config=function_calling_agent_config.dict(),  # Agent's config
        artifact_path="agent",
        input_example=function_calling_agent_config.input_example,
        signature=ModelSignature(  # Agent's schema
            inputs=CHAT_MODEL_INPUT_SCHEMA,
            outputs=StringResponse(),
        ),
    )

    # Run the agent for these queries, using Agent evaluation to parallelize the calls
    eval_results = mlflow.evaluate(
        model=agent_model_info.model_uri,  # run the logged Agent for evaluation
        data=synthetic_eval_data,  # Eval set
        model_type="databricks-agent",  # use Agent Evaluation
    )