1. Install Python Libraries

In [0]:
%pip install -qqqq -U -r requirements.txt

In [0]:
dbutils.library.restartPython()

2. Load Agent UC Storage Locations and Setup MLFlow Experiment

In [0]:
# Import necessary libraries
from cookbook.config.shared.agent_storage_location import AgentStorageConfig
from cookbook.databricks_utils import get_mlflow_experiment_url
from cookbook.config import load_serializable_config_from_yaml_file
import mlflow 
import os

# Load the Agent's storage locations
agent_storage_config: AgentStorageConfig = load_serializable_config_from_yaml_file("./configs/agent_storage_config.yaml")

# Show the Agent's storage locations
agent_storage_config.pretty_print()

# set the MLflow experiment
experiment_info = mlflow.set_experiment(agent_storage_config.mlflow_experiment_name)
# If running in a local IDE, set the MLflow experiment name as an environment variable
os.environ["MLFLOW_EXPERIMENT_NAME"] = agent_storage_config.mlflow_experiment_name

print(f"View the MLflow Experiment `{agent_storage_config.mlflow_experiment_name}` at {get_mlflow_experiment_url(experiment_info.experiment_id)}")

3. Set Genie Space ID

In [0]:
# Set Databricks credentials and Genie space ID
import os

# Set environment variables
os.environ["DATABRICKS_HOST"] = "" # will fillup later
os.environ["DATABRICKS_TOKEN"] = "" # will fillup later
GENIE_SPACE_ID = "" # will fillup later

# Llama 3.3 endpoint for LLMs
LLM_ENDPOINT_NAME = "databricks-meta-llama-3-3-70b-instruct"

4. Setup the **Config YAML Files**

In [0]:
# First let's create our config directory
import os

os.makedirs("configs", exist_ok=True)
with open("configs/README.md", "w") as f:
    f.write("This folder stores the configurations generated by the notebooks.")

# COMMAND ----------

from pydantic import BaseModel, Field, field_validator, FieldValidationInfo
from typing import Optional
from databricks.sdk.errors.platform import ResourceDoesNotExist, NotFound
import json
import yaml

# Serialize and deserialize configs
def serializable_config_to_yaml(obj: BaseModel) -> str:
    data = obj.model_dump()
    return yaml.dump(data)

def serializable_config_to_yaml_file(obj: BaseModel, yaml_file_path: str) -> None:
    with open(yaml_file_path, "w") as handle:
        handle.write(serializable_config_to_yaml(obj))

4.1. Get tools from UC

In [0]:
from cookbook.tools.local_function import LocalFunctionTool
from cookbook.tools.uc_tool import UCTool
# Import visualization tools
from tools.visualization_tools import (
    create_bar_chart,
    create_line_chart,
    create_pie_chart,
    format_table_data
)

# Create UCTool wrappers or use local tools based on what's available
try:
    # Try to use UC tools if already registered
    from unitycatalog.ai.core.databricks import DatabricksFunctionClient
    client = DatabricksFunctionClient()
    
    # Set Unity Catalog info - match with what you used in 02_create_tools.py
    UC_CATALOG = "main"  # Change to your catalog
    UC_SCHEMA = "default"  # Change to your schema
    
    # Check if the tools exist in UC and create wrappers
    viz_tools = []
    
    try:
        bar_chart_tool = UCTool(uc_function_name=f"{UC_CATALOG}.{UC_SCHEMA}.create_bar_chart")
        viz_tools.append(bar_chart_tool)
        print(f"Added UC tool: {UC_CATALOG}.{UC_SCHEMA}.create_bar_chart")
    except Exception:
        bar_chart_tool = LocalFunctionTool(func=create_bar_chart)
        viz_tools.append(bar_chart_tool)
        print("Added local tool: create_bar_chart")
    
    try:
        line_chart_tool = UCTool(uc_function_name=f"{UC_CATALOG}.{UC_SCHEMA}.create_line_chart")
        viz_tools.append(line_chart_tool)
        print(f"Added UC tool: {UC_CATALOG}.{UC_SCHEMA}.create_line_chart")
    except Exception:
        line_chart_tool = LocalFunctionTool(func=create_line_chart)
        viz_tools.append(line_chart_tool)
        print("Added local tool: create_line_chart")
    
    try:
        pie_chart_tool = UCTool(uc_function_name=f"{UC_CATALOG}.{UC_SCHEMA}.create_pie_chart")
        viz_tools.append(pie_chart_tool)
        print(f"Added UC tool: {UC_CATALOG}.{UC_SCHEMA}.create_pie_chart")
    except Exception:
        pie_chart_tool = LocalFunctionTool(func=create_pie_chart)
        viz_tools.append(pie_chart_tool)
        print("Added local tool: create_pie_chart")
    
    try:
        table_format_tool = UCTool(uc_function_name=f"{UC_CATALOG}.{UC_SCHEMA}.format_table_data")
        viz_tools.append(table_format_tool)
        print(f"Added UC tool: {UC_CATALOG}.{UC_SCHEMA}.format_table_data")
    except Exception:
        table_format_tool = LocalFunctionTool(func=format_table_data)
        viz_tools.append(table_format_tool)
        print("Added local tool: format_table_data")
    
except Exception as e:
    print(f"Error loading UC tools: {e}")
    # Fall back to local tools if UC fails
    viz_tools = [
        LocalFunctionTool(func=create_bar_chart),
        LocalFunctionTool(func=create_line_chart),
        LocalFunctionTool(func=create_pie_chart),
        LocalFunctionTool(func=format_table_data)
    ]
    print("Using local visualization tools")

4.2 Setup Prompts for Viz and Story

In [0]:
visualization_system_prompt = """You are an expert data visualization specialist. Your job is to create visualizations from data, no matter the format.

IMPORTANT INSTRUCTIONS:
1. The data will be provided to you in JSON format. You MUST use this data to create a visualization.
2. If the data appears in a format like [{"usage_date": "2025-03-26", "avg_currency_conversion_rate": 9870.5}, ...] create a line chart showing the trend over time.
3. If you see a "No table data found" message, check if JSON data is still available in another part of the input.
4. NEVER give up on creating a visualization! Even with minimal or difficult data, create a simple chart.
5. When using create_line_chart or create_bar_chart, make sure to properly format any JSON data as a string.

Your visualizations should be clear, informative, and properly labeled. Quality visualization is better than no visualization.
"""

story_builder_system_prompt = """You are an expert storyteller specializing in data narratives. Your role is to craft compelling stories based on data analysis and visualizations, following the S.T.O.R.Y framework.

## S.T.O.R.Y Framework
- Situation: Set the context and background for the data analysis
- Take off: Identify the key trends, patterns, or insights that stand out
- Opportunity: Highlight the implications or potential actions suggested by the data
- Resolution: Propose potential solutions or decisions based on the insights
- Yield: Summarize the expected outcomes or benefits of taking action

## Instructions
1. Study the data analysis from the Genie agent and the visualizations created.
2. Identify the most significant insights and patterns in the data.
3. Craft a coherent narrative that flows logically through the S.T.O.R.Y framework.
4. Use clear, engaging language that makes the data accessible and meaningful.
5. Focus on business implications and actionable insights.
6. Reference specific visualizations to support key points in your story.

Your goal is to transform data and visualizations into a compelling narrative that helps stakeholders understand the significance of the information and make informed decisions.
"""

4.3 create required **YAML files** in configs (Genie to Story)

In [0]:
from cookbook.config.agents.genie_agent import GenieAgentConfig
from cookbook.config.agents.visualization_agent import VisualizationAgentConfig
from cookbook.config.agents.story_builder_agent import StoryBuilderAgentConfig
from cookbook.config.shared.llm import LLMParametersConfig, LLMConfig
from cookbook.config import serializable_config_to_yaml_file

GENIE_AGENT_DEFAULT_YAML_CONFIG_FILE_NAME = "genie_agent_config.yaml"
VISUALIZATION_AGENT_DEFAULT_YAML_CONFIG_FILE_NAME = "visualization_agent_config.yaml"
STORY_BUILDER_AGENT_DEFAULT_YAML_CONFIG_FILE_NAME = "story_builder_agent_config.yaml"

genie_agent_config = GenieAgentConfig(
    genie_space_id=GENIE_SPACE_ID
)

visualization_agent_config = VisualizationAgentConfig(
    llm_endpoint_name=LLM_ENDPOINT_NAME,
    system_prompt=visualization_system_prompt,
    tools=viz_tools,
    llm_parameters=LLMParametersConfig(
        temperature=0.2,
        max_tokens=2000
    )
)

story_builder_agent_config = StoryBuilderAgentConfig(
    llm_endpoint_name=LLM_ENDPOINT_NAME,
    system_prompt=story_builder_system_prompt,
    llm_parameters=LLMParametersConfig(
        temperature=0.5,
        max_tokens=3000
    )
)


# Save each agent configuration to YAML before importing the agent classes
serializable_config_to_yaml_file(genie_agent_config, "./configs/" + GENIE_AGENT_DEFAULT_YAML_CONFIG_FILE_NAME)
serializable_config_to_yaml_file(visualization_agent_config, "./configs/" + VISUALIZATION_AGENT_DEFAULT_YAML_CONFIG_FILE_NAME)
serializable_config_to_yaml_file(story_builder_agent_config, "./configs/" + STORY_BUILDER_AGENT_DEFAULT_YAML_CONFIG_FILE_NAME)

Function Calling Agent Config Setup

In [0]:
from cookbook.config.agents.function_calling_agent import FunctionCallingAgentConfig

FC_AGENT_DEFAULT_YAML_CONFIG_FILE_NAME = "function_calling_agent_config.yaml"
# 3. Define system prompt
system_prompt = """
## Role
You are a helpful assistant that answers questions using a set of tools. If needed, you ask the user follow-up questions to clarify their request.

## Objective
Your goal is to provide accurate, relevant, and helpful response based solely on the outputs from these tools. You are concise and direct in your responses.

## Instructions
1. **Understand the Query**: Think step by step to analyze the user's question and determine the core need or problem. 

2. **Assess available tools**: Think step by step to consider each available tool and understand their capabilities in the context of the user's query.

3. **Select the appropriate tool(s) OR ask follow up questions**: Based on your understanding of the query and the tool descriptions, decide which tool(s) should be used to generate a response. If you do not have enough information to use the available tools to answer the question, ask the user follow up questions to refine their request.  If you do not have a relevant tool for a question or the outputs of the tools are not helpful, respond with: "I'm sorry, I can't help you with that."
""".strip()

# 4. Create the agent configuration
fc_agent_config = FunctionCallingAgentConfig(
    llm_config=LLMConfig(
        llm_endpoint_name=LLM_ENDPOINT_NAME,  # Model serving endpoint
        llm_system_prompt_template=system_prompt,  # System prompt
        llm_parameters=LLMParametersConfig(
            temperature=0.01, max_tokens=1500
        ),
    ),
    tools=viz_tools,
)
from cookbook.agents.function_calling_agent import FC_AGENT_DEFAULT_YAML_CONFIG_FILE_NAME
serializable_config_to_yaml_file(fc_agent_config, "./configs/"+FC_AGENT_DEFAULT_YAML_CONFIG_FILE_NAME)

4.4 Create Supervised Agents (Genie to Story)

In [0]:
from cookbook.config import serializable_config_to_yaml
from cookbook.agents.genie_agent import GenieAgent
from cookbook.agents.visualization_agent import VisualizationAgent
from cookbook.agents.story_builder_agent import StoryBuilderAgent
from cookbook.tools.local_function import LocalFunctionTool
from cookbook.config.agents.multi_agent_supervisor import MultiAgentSupervisorConfig, SupervisedAgentConfig

In [0]:
# Create supervised agent configurations
genie_supervised = SupervisedAgentConfig(
    name="Genie", 
    description="Queries and analyzes data from databases with natural language. Can transform complex data requests into SQL queries and return structured results.", 
    agent_config=genie_agent_config,
    agent_class=GenieAgent
)

visualization_supervised = SupervisedAgentConfig(
    name="Visualization", 
    description="Creates data visualizations based on Genie query results. Determines the most appropriate chart types based on data structure and analysis goals.", 
    agent_config=visualization_agent_config,
    agent_class=VisualizationAgent
)

story_builder_supervised = SupervisedAgentConfig(
    name="StoryBuilder", 
    description="Crafts compelling narratives from data and visualizations using the S.T.O.R.Y framework: Situation, Take off, Opportunity, Resolution, Yield. Focuses on business insights and key findings.", 
    agent_config=story_builder_agent_config,
    agent_class=StoryBuilderAgent
)

4.5 create required **YAML files** in configs (MultiAgent)

In [0]:
from cookbook.config.agents.multi_agent_supervisor import MultiAgentSupervisorConfig
from cookbook.config import serializable_config_to_yaml_file

MULTI_AGENT_DEFAULT_YAML_CONFIG_FILE_NAME = "multi_agent_supervisor_config.yaml"
# Create the multi-agent supervisor configuration
multi_agent_config = MultiAgentSupervisorConfig(
    llm_endpoint_name=LLM_ENDPOINT_NAME,
    llm_parameters=LLMParametersConfig(
        max_tokens=1500,
        temperature=0.1
    ),
    playground_debug_mode=True,  # Set to True to see detailed debug info in the UI
    agent_loading_mode="local",   # Use local agents
    max_supervisor_loops=4,      # Maximum number of agent turns before finishing
    agents=[genie_supervised, visualization_supervised, story_builder_supervised]
)

serializable_config_to_yaml_file(multi_agent_config, "./configs/"+MULTI_AGENT_DEFAULT_YAML_CONFIG_FILE_NAME)

4.6 Initialized the Multi-Agent System

In [0]:
# Initialize the Multi-Agent System
from cookbook.agents.multi_agent_supervisor import MultiAgentSupervisor

# Initialize the multi-agent system
multi_agent = MultiAgentSupervisor(agent_config=multi_agent_config)

5. Test the MultiAgent

Debug Genie Agent

In [0]:
# Test the multi-agent system
# Test query
test_query = {
    "messages": [
        {
            "role": "user",
            "content": "Show usage date-wise average currency conversion rate. Take data from enriched table"
        }
    ]
}

In [0]:
def process_genie_data_for_visualization(genie_output):
    """
    Process Genie's output to make it suitable for the visualization agent.
    
    Args:
        genie_output: The output from the Genie agent
        
    Returns:
        dict: Input suitable for the visualization agent
    """
    from tools.visualization_tools import parse_genie_response
    
    # Extract data from Genie's response
    extracted_data = parse_genie_response(genie_output)
    
    # Create an enhanced instruction for the visualization agent
    instruction = f"""Please create a visualization based on this data: {extracted_data}
    
    If the data appears to be a time series (with dates), please create a line chart showing the trend over time.
    If the data is categorical, please create a bar chart to compare values.
    
    Even if there are issues with the data, please make your best effort to create a visualization with whatever data is available.
    """
    
    # Prepare input for visualization agent
    viz_input = {
        "messages": genie_output.get("messages", []) + [{
            "role": "user", 
            "content": instruction
        }]
    }
    
    return viz_input

In [0]:
# Define a simple debug function to print important information about the data
def debug_data(genie_output):
    """Print debugging information about the Genie output"""
    print("=" * 50)
    print("DEBUG: Genie Output Structure")
    print(f"Type: {type(genie_output)}")
    
    if isinstance(genie_output, dict):
        print(f"Keys: {genie_output.keys()}")
        
        if 'content' in genie_output:
            print(f"Content type: {type(genie_output['content'])}")
            print(f"Content preview: {genie_output['content'][:100]}...")
        
        if 'messages' in genie_output:
            print(f"Message count: {len(genie_output['messages'])}")
            print(f"Message types: {[msg.get('role') for msg in genie_output['messages']]}")
            
            # Find assistant messages with content
            assistant_msgs = [msg for msg in genie_output['messages'] if msg.get('role') == 'assistant' and msg.get('content')]
            if assistant_msgs:
                last_msg = assistant_msgs[-1]
                print(f"Last assistant message preview: {last_msg.get('content', '')[:100]}...")
    
    print("=" * 50)


In [0]:
# Test your multi-agent workflow step by step
from cookbook.agents.genie_agent import GenieAgent
from cookbook.agents.visualization_agent import VisualizationAgent
# Import the extract_table_from_markdown function
from tools.visualization_tools import extract_table_from_markdown
# Initialize the agents
genie_agent = GenieAgent(agent_config=genie_agent_config)
visualization_agent = VisualizationAgent(agent_config=visualization_agent_config)

# Test query
test_query = {
    "messages": [
        {
            "role": "user",
            "content": "Show usage date-wise average currency conversion rate. Take data from enriched table"
        }
    ]
}

# Run the query through Genie agent
genie_output = genie_agent.predict(model_input=test_query)

# Process the Genie output for visualization
viz_input = process_genie_data_for_visualization(genie_output)

# Run visualization agent with the processed input
try:
    viz_output = visualization_agent.predict(model_input=viz_input)
    print("Visualization completed successfully")
except Exception as e:
    print(f"Error in visualization agent: {str(e)}")
    # Create a fallback response
    viz_output = {
        "content": "I couldn't create a visualization from the provided data.",
        "messages": genie_output.get("messages", []) + [{
            "role": "assistant",
            "content": "I couldn't create a visualization from the provided data."
        }]
    }

In [0]:
# from cookbook.agents.function_calling_agent import FunctionCallingAgent

# Test visualization agent
# viz_agent = FunctionCallingAgent(agent_config=visualization_agent_config)
# output = viz_agent.predict(model_input={"messages": [{"role": "user", "content": "Create a bar chart from this data: ..."}]})

In [0]:
# Run the query
output = multi_agent.predict(model_input=test_query)
print(output["content"])


In [0]:
# Test Genie agent
genie_agent = GenieAgent(agent_config=genie_agent_config)
genie_output = genie_agent.predict(model_input=test_query)
print(genie_output)



In [0]:
# Test visualization agent with Genie's output
visualization_agent = VisualizationAgent(agent_config=visualization_agent_config)
viz_output = visualization_agent.predict(model_input={"messages": output["content"]})
print(viz_output)

In [0]:
import json
print(json.dumps(output, indent=2, default=str))

In [0]:
# Display the result
print("Final Response:")
print("=" * 80)
print(output["content"])

++++++++++++++++++++++++++++++++++

6. Log agent to MLFlow

In [0]:
# Helper function to log the agent to MLflow
import mlflow
from mlflow.types.llm import CHAT_MODEL_INPUT_SCHEMA
from mlflow.models.rag_signatures import StringResponse
from mlflow.models.signature import ModelSignature
from mlflow.models.resources import DatabricksGenieSpace, DatabricksServingEndpoint

def log_multi_agent_supervisor_to_mlflow(agent_config: MultiAgentSupervisorConfig):
    """
    Log the multi-agent supervisor to MLflow for deployment.
    
    Args:
        agent_config: The configuration for the multi-agent supervisor
        
    Returns:
        The MLflow run info for the logged model
    """
    # Get the agent's code path
    agent_code_path = f"{os.getcwd()}/cookbook/agents/multi_agent_supervisor.py"

    # Get the pip requirements
    with open("requirements.txt", "r") as file:
        pip_requirements = [line.strip() for line in file.readlines()] + ["pyspark"]  # manually add pyspark
    
    # Define the resource dependencies for automatic authorization
    resources = [
        DatabricksServingEndpoint(endpoint_name=agent_config.llm_endpoint_name),
        DatabricksGenieSpace(genie_space_id=GENIE_SPACE_ID),
    ]
    
    # Log the model to MLflow
    logged_agent_info = mlflow.pyfunc.log_model(
        artifact_path="agent",
        python_model=agent_code_path,
        input_example=agent_config.input_example,
        model_config=agent_config.model_dump(),
        resources=resources,
        signature=ModelSignature(
            inputs=CHAT_MODEL_INPUT_SCHEMA,
            outputs=StringResponse()
        ),
        code_paths=[os.path.join(os.getcwd(), "cookbook")],
        pip_requirements=pip_requirements,
    )
    
    return logged_agent_info

In [0]:
# Log and deploy the Multi-Agent System
# Log the multi-agent system to MLflow
with mlflow.start_run() as run:
    logged_agent_info = log_multi_agent_supervisor_to_mlflow(multi_agent_config)
    print(f"Multi-agent system logged to MLflow with run ID: {run.info.run_id}")
    print(f"Model URI: {logged_agent_info.model_uri}")

# Register the model to Unity Catalog
mlflow.set_registry_uri("databricks-uc")

model_name = agent_storage_config.uc_model_name
registered_model = mlflow.register_model(
    model_uri=logged_agent_info.model_uri,
    name=model_name
)

print(f"Model registered to Unity Catalog as {model_name}, version {registered_model.version}")

In [0]:
# Deploy the model using Databricks Agents API
from databricks import agents

# Deploy the model
deployment = agents.deploy(
    model_name=model_name,
    model_version=registered_model.version,
    tags={"source": "multi_agent_story"}
)

print(f"Model deployed successfully as: {deployment.endpoint_name}")