# IRIS Agent Prompt Testing

This notebook allows you to test different prompts and settings for the IRIS agents to optimize their performance and behavior.

In [7]:
# Enable auto-reloading of modules
%load_ext autoreload
%autoreload 2

# Import necessary modules
import json
import time
from IPython.display import display, HTML
from iris.src.llm_connectors.rbc_openai import call_llm
from iris.src.initial_setup.logging_config import configure_logging

# Import our testing utilities
import sys
sys.path.append('/Users/alexwday/Projects/iris-project/notebooks')
from test_utils import get_test_token

# Configure logging
configure_logging()

# Get authentication token
token = get_test_token()

# Set up test model parameters
MODEL_NAME = "gpt-4o-mini-2024-07-18"  # Use the same model as in the agents
MAX_TOKENS = 4096
TEMPERATURE = 0.0  # Use deterministic responses for testing
PROMPT_TOKEN_COST = 0.00000015  # Placeholder cost per token
COMPLETION_TOKEN_COST = 0.00000020  # Placeholder cost per token

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


2025-04-01 12:57:11,142 - root - INFO - Logging system initialized
2025-04-01 12:57:11,142 - iris.src.initial_setup.oauth.oauth - INFO - Using API key authentication from local settings
2025-04-01 12:57:11,142 - iris.src.initial_setup.oauth.oauth - INFO - Using OpenAI API key from settings: sk-proj...


## Define Test Cases

In [8]:
# Create system message
system_message = {"role": "system", "content": "You are a helpful assistant specialized in accounting and finance."}

# Create test cases with different types of questions
test_cases = [
    {
        "name": "Direct Knowledge", 
        "message": "What's the difference between FIFO and LIFO inventory methods?"
    },
    {
        "name": "Complex Research", 
        "message": "How should I account for revenue recognition for a software subscription service under IFRS 15?"
    },
    {
        "name": "Vague Question", 
        "message": "I need help with accounting for intangible assets."
    },
    {
        "name": "Multi-part Question", 
        "message": "Can you explain the treatment of operating leases under IFRS 16 and how it differs from finance leases?"
    }
]

## Router Agent Prompt Testing

In [9]:
# Define router tool schemas
ORIGINAL_ROUTER_TOOL = [
    {
        "type": "function",
        "function": {
            "name": "route_query",
            "description": "Route the user query to the appropriate function based on conversation analysis",
            "parameters": {
                "type": "object",
                "properties": {
                    "function_name": {
                        "type": "string",
                        "description": "The function to route to based on conversation context analysis",
                        "enum": ["response_from_conversation", "research_from_database"]
                    },
                    "thought": {
                        "type": "string",
                        "description": "Clear explanation of why this routing decision was made, including what information is or isn't available"
                    }
                },
                "required": ["function_name", "thought"]
            }
        }
    }
]

OPTIMIZED_ROUTER_TOOL = [
    {
        "type": "function",
        "function": {
            "name": "route_query",
            "description": "Route the user query to the appropriate function based on conversation analysis",
            "parameters": {
                "type": "object",
                "properties": {
                    "function_name": {
                        "type": "string",
                        "description": "The function to route to based on conversation context analysis",
                        "enum": ["response_from_conversation", "research_from_database"]
                    }
                },
                "required": ["function_name"]
            }
        }
    }
]

# Define router prompt
ROUTER_PROMPT = """You are an expert routing agent, the first step in the process, tasked with determining how the next stage of the process should answer any given user queries.

# ANALYSIS INSTRUCTIONS
At the beginning of each request, you MUST choose ONE of the available tools by analyzing:
1. The entire conversation history
2. The user's latest question

# DECISION CRITERIA
This system does not rely on any internal training data, everything must be derived from the conversation history.
If the question is about an accounting or finance topic then you prefer to do research unless there is an explicit request to answer directly.
If the conversation provides enough information to answer the user directly (no further research needed), then you should call 'response_from_conversation'.
Otherwise, when additional information is needed from RBC's internal databases that cannot be sourced from the conversation history, you should call 'research_from_database'.

# OUTPUT REQUIREMENTS
You MUST use the provided tool only without any commentary. Direct responses are not allowed.
Your purpose is only to identify HOW to proceed, and then call out the correct tool.
You NEVER generate a response, you only call out the correct tool to generate the response.
DO NOT add any additional commentary.

# DECISION OPTIONS
Based on your analysis, choose between:
1. 'response_from_conversation' - When the next agent should answer directly from the conversation
2. 'research_from_database' - When the next agent will need to check our internal databases

REMEMBER, YOUR RESPONSE SHOULD ONLY CONTAIN THE ONE TOOL CALL, NO ADDITIONAL COMMENTARY OR RESPONSE TO THE USERS QUERY.
YOU DO NOT EVER INTERACT WITH OR RESPOND TO USERS.
YOU ONLY CALL THE TOOL WHICH GENERATES THE RESPONSE BASED ON YOUR COMMAND.
"""

def test_router_prompt(test_case, tool_definition):
    """Test a router prompt with a specific test case."""
    # Prepare messages
    messages = [
        {"role": "system", "content": ROUTER_PROMPT},
        {"role": "user", "content": test_case["message"]}
    ]
    
    # Make the API call
    start_time = time.time()
    response = call_llm(
        oauth_token=token,
        model=MODEL_NAME,
        messages=messages,
        max_tokens=MAX_TOKENS,
        temperature=TEMPERATURE,
        tools=tool_definition,
        tool_choice={"type": "function", "function": {"name": "route_query"}},
        stream=False,
        prompt_token_cost=PROMPT_TOKEN_COST,
        completion_token_cost=COMPLETION_TOKEN_COST
    )
    elapsed_time = time.time() - start_time
    
    # Extract the tool call
    tool_call = response.choices[0].message.tool_calls[0]
    arguments = json.loads(tool_call.function.arguments)
    
    # Return result with timing and token info
    return {
        "arguments": arguments,
        "time": elapsed_time,
        "prompt_tokens": response.usage.prompt_tokens,
        "completion_tokens": response.usage.completion_tokens,
        "total_tokens": response.usage.total_tokens
    }

# Function to display router results
def display_router_results(test_case, original_result, optimized_result):
    """Display comparison of original and optimized router results."""
    html = f"""
    <div style="margin-bottom: 30px; border: 1px solid #ddd; border-radius: 8px; overflow: hidden;">
        <div style="background-color: #f5f5f5; padding: 10px; border-bottom: 1px solid #ddd;">
            <h3 style="margin: 0;">Test Case: {test_case['name']}</h3>
            <p style="margin: 5px 0 0 0; font-style: italic;">"{test_case['message']}"</p>
        </div>
        
        <div style="display: flex;">
            <!-- Original Result -->
            <div style="flex: 1; padding: 15px; border-right: 1px solid #ddd;">
                <h4 style="margin-top: 0; color: #4285F4;">Original Router</h4>
                <p><strong>Decision:</strong> {original_result['arguments']['function_name']}</p>
    """
    
    # Add thought if it exists
    if 'thought' in original_result['arguments']:
        html += f"""<p><strong>Thought:</strong></p>
                <div style="background-color: #f9f9f9; padding: 10px; border-radius: 5px; max-height: 150px; overflow-y: auto;">
                    {original_result['arguments']['thought']}
                </div>"""
    
    # Add metrics
    html += f"""
                <div style="margin-top: 15px; background-color: #f0f0f0; padding: 10px; border-radius: 5px;">
                    <p style="margin: 0;"><strong>Time:</strong> {original_result['time']:.2f}s</p>
                    <p style="margin: 5px 0 0 0;"><strong>Tokens:</strong> {original_result['total_tokens']} (Prompt: {original_result['prompt_tokens']}, Completion: {original_result['completion_tokens']})</p>
                </div>
            </div>
            
            <!-- Optimized Result -->
            <div style="flex: 1; padding: 15px;">
                <h4 style="margin-top: 0; color: #34a853;">Optimized Router</h4>
                <p><strong>Decision:</strong> {optimized_result['arguments']['function_name']}</p>
    """
    
    # Add thought if it exists (should not for optimized version)
    if 'thought' in optimized_result['arguments']:
        html += f"""<p><strong>Thought:</strong></p>
                <div style="background-color: #f9f9f9; padding: 10px; border-radius: 5px; max-height: 150px; overflow-y: auto;">
                    {optimized_result['arguments']['thought']}
                </div>"""
    
    # Calculate improvement percentages
    time_diff = original_result['time'] - optimized_result['time']
    time_percent = (time_diff / original_result['time']) * 100 if original_result['time'] > 0 else 0
    
    token_diff = original_result['total_tokens'] - optimized_result['total_tokens']
    token_percent = (token_diff / original_result['total_tokens']) * 100 if original_result['total_tokens'] > 0 else 0
    
    # Add metrics and improvement
    html += f"""
                <div style="margin-top: 15px; background-color: #f0f0f0; padding: 10px; border-radius: 5px;">
                    <p style="margin: 0;"><strong>Time:</strong> {optimized_result['time']:.2f}s</p>
                    <p style="margin: 5px 0 0 0;"><strong>Tokens:</strong> {optimized_result['total_tokens']} (Prompt: {optimized_result['prompt_tokens']}, Completion: {optimized_result['completion_tokens']})</p>
                </div>
                
                <div style="margin-top: 15px; background-color: #e6f4e6; padding: 10px; border-radius: 5px;">
                    <p style="margin: 0;"><strong>Improvement:</strong></p>
                    <p style="margin: 5px 0 0 0;">🕒 Time: {time_diff:.2f}s faster ({time_percent:.1f}%)</p>
                    <p style="margin: 5px 0 0 0;">🔤 Tokens: {token_diff} fewer ({token_percent:.1f}%)</p>
                </div>
            </div>
        </div>
    </div>
    """
    
    display(HTML(html))

In [11]:
# Run tests for the router agent
print("Testing Router Agent Prompts...\n")

for test_case in test_cases:
    print(f"Testing: {test_case['name']}")
    
    # Test original router version
    original_result = test_router_prompt(test_case, ORIGINAL_ROUTER_TOOL)
    
    # Test optimized router version
    optimized_result = test_router_prompt(test_case, OPTIMIZED_ROUTER_TOOL)
    
    # Display comparison
    display_router_results(test_case, original_result, optimized_result)
    print("---")

2025-04-01 12:58:14,609 - iris.src.llm_connectors.rbc_openai - INFO - Using API key: sk-proj...
2025-04-01 12:58:14,610 - iris.src.llm_connectors.rbc_openai - INFO - Making non-streaming call to model: gpt-4o-mini-2024-07-18 with tools in local environment
2025-04-01 12:58:14,610 - iris.src.llm_connectors.rbc_openai - INFO - Attempt 1/3: Sending request to OpenAI API


Testing Router Agent Prompts...

Testing: Direct Knowledge


2025-04-01 12:58:15,682 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-04-01 12:58:15,685 - iris.src.llm_connectors.rbc_openai - INFO - Received response in 1.07 seconds
2025-04-01 12:58:15,686 - iris.src.llm_connectors.rbc_openai - INFO - Token usage - Completion: 42 ($0.0000), Prompt: 467 ($0.0000), Total: 509 tokens, Total Cost: $0.0000
2025-04-01 12:58:15,697 - iris.src.llm_connectors.rbc_openai - INFO - Using API key: sk-proj...
2025-04-01 12:58:15,698 - iris.src.llm_connectors.rbc_openai - INFO - Making non-streaming call to model: gpt-4o-mini-2024-07-18 with tools in local environment
2025-04-01 12:58:15,698 - iris.src.llm_connectors.rbc_openai - INFO - Attempt 1/3: Sending request to OpenAI API
2025-04-01 12:58:16,401 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-04-01 12:58:16,404 - iris.src.llm_connectors.rbc_openai - INFO - Received response in 0.71 seconds
2025-04-

2025-04-01 12:58:16,418 - iris.src.llm_connectors.rbc_openai - INFO - Using API key: sk-proj...
2025-04-01 12:58:16,419 - iris.src.llm_connectors.rbc_openai - INFO - Making non-streaming call to model: gpt-4o-mini-2024-07-18 with tools in local environment
2025-04-01 12:58:16,420 - iris.src.llm_connectors.rbc_openai - INFO - Attempt 1/3: Sending request to OpenAI API


---
Testing: Complex Research


2025-04-01 12:58:18,039 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-04-01 12:58:18,041 - iris.src.llm_connectors.rbc_openai - INFO - Received response in 1.62 seconds
2025-04-01 12:58:18,042 - iris.src.llm_connectors.rbc_openai - INFO - Token usage - Completion: 42 ($0.0000), Prompt: 474 ($0.0000), Total: 516 tokens, Total Cost: $0.0000
2025-04-01 12:58:18,054 - iris.src.llm_connectors.rbc_openai - INFO - Using API key: sk-proj...
2025-04-01 12:58:18,055 - iris.src.llm_connectors.rbc_openai - INFO - Making non-streaming call to model: gpt-4o-mini-2024-07-18 with tools in local environment
2025-04-01 12:58:18,055 - iris.src.llm_connectors.rbc_openai - INFO - Attempt 1/3: Sending request to OpenAI API
2025-04-01 12:58:18,664 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-04-01 12:58:18,678 - iris.src.llm_connectors.rbc_openai - INFO - Received response in 0.62 seconds
2025-04-

2025-04-01 12:58:18,690 - iris.src.llm_connectors.rbc_openai - INFO - Using API key: sk-proj...
2025-04-01 12:58:18,692 - iris.src.llm_connectors.rbc_openai - INFO - Making non-streaming call to model: gpt-4o-mini-2024-07-18 with tools in local environment
2025-04-01 12:58:18,692 - iris.src.llm_connectors.rbc_openai - INFO - Attempt 1/3: Sending request to OpenAI API


---
Testing: Vague Question


2025-04-01 12:58:20,291 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-04-01 12:58:20,294 - iris.src.llm_connectors.rbc_openai - INFO - Received response in 1.60 seconds
2025-04-01 12:58:20,295 - iris.src.llm_connectors.rbc_openai - INFO - Token usage - Completion: 44 ($0.0000), Prompt: 465 ($0.0000), Total: 509 tokens, Total Cost: $0.0000
2025-04-01 12:58:20,307 - iris.src.llm_connectors.rbc_openai - INFO - Using API key: sk-proj...
2025-04-01 12:58:20,307 - iris.src.llm_connectors.rbc_openai - INFO - Making non-streaming call to model: gpt-4o-mini-2024-07-18 with tools in local environment
2025-04-01 12:58:20,308 - iris.src.llm_connectors.rbc_openai - INFO - Attempt 1/3: Sending request to OpenAI API
2025-04-01 12:58:21,622 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-04-01 12:58:21,624 - iris.src.llm_connectors.rbc_openai - INFO - Received response in 1.32 seconds
2025-04-

2025-04-01 12:58:21,638 - iris.src.llm_connectors.rbc_openai - INFO - Using API key: sk-proj...
2025-04-01 12:58:21,638 - iris.src.llm_connectors.rbc_openai - INFO - Making non-streaming call to model: gpt-4o-mini-2024-07-18 with tools in local environment
2025-04-01 12:58:21,639 - iris.src.llm_connectors.rbc_openai - INFO - Attempt 1/3: Sending request to OpenAI API


---
Testing: Multi-part Question


2025-04-01 12:58:23,876 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-04-01 12:58:23,879 - iris.src.llm_connectors.rbc_openai - INFO - Received response in 2.24 seconds
2025-04-01 12:58:23,880 - iris.src.llm_connectors.rbc_openai - INFO - Token usage - Completion: 47 ($0.0000), Prompt: 477 ($0.0000), Total: 524 tokens, Total Cost: $0.0000
2025-04-01 12:58:23,892 - iris.src.llm_connectors.rbc_openai - INFO - Using API key: sk-proj...
2025-04-01 12:58:23,893 - iris.src.llm_connectors.rbc_openai - INFO - Making non-streaming call to model: gpt-4o-mini-2024-07-18 with tools in local environment
2025-04-01 12:58:23,894 - iris.src.llm_connectors.rbc_openai - INFO - Attempt 1/3: Sending request to OpenAI API
2025-04-01 12:58:25,002 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-04-01 12:58:25,005 - iris.src.llm_connectors.rbc_openai - INFO - Received response in 1.11 seconds
2025-04-

---


## Clarifier Agent Prompt Testing

In [12]:
# Define clarifier tool schemas
ORIGINAL_CLARIFIER_TOOL = [
    {
        "type": "function",
        "function": {
            "name": "make_clarifier_decision",
            "description": "Decide whether to request essential context or create a research statement",
            "parameters": {
                "type": "object",
                "properties": {
                    "action": {
                        "type": "string",
                        "description": "The chosen action based on conversation analysis",
                        "enum": ["request_essential_context", "create_research_statement"]
                    },
                    "thought": {
                        "type": "string",
                        "description": "Clear explanation of why this decision was made"
                    },
                    "output": {
                        "type": "string",
                        "description": "Either a list of context questions (numbered) or a research statement"
                    },
                    "is_continuation": {
                        "type": "boolean",
                        "description": "Whether the user is requesting continuation of previous research"
                    }
                },
                "required": ["action", "thought", "output"]
            }
        }
    }
]

OPTIMIZED_CLARIFIER_TOOL = [
    {
        "type": "function",
        "function": {
            "name": "make_clarifier_decision",
            "description": "Decide whether to request essential context or create a research statement",
            "parameters": {
                "type": "object",
                "properties": {
                    "action": {
                        "type": "string",
                        "description": "The chosen action based on conversation analysis",
                        "enum": ["request_essential_context", "create_research_statement"]
                    },
                    "output": {
                        "type": "string",
                        "description": "Either a list of context questions (numbered) or a research statement"
                    },
                    "is_continuation": {
                        "type": "boolean",
                        "description": "Whether the user is requesting continuation of previous research"
                    }
                },
                "required": ["action", "output"]
            }
        }
    }
]

# Define clarifier prompt
CLARIFIER_PROMPT = """You are an expert clarifier agent specializing in accounting policy research. You are responsible for determining if enough context exists to perform database research or if essential context must be requested from the user first. If sufficient context exists, you create a research statement for the planner.

# ANALYSIS INSTRUCTIONS
At the beginning of each request, you MUST first evaluate:
1. The entire conversation history
2. The user's latest question
3. The router's thought (which explains why research is needed)

# DECISION CRITERIA
You have two possible paths to choose from:
1. REQUEST_ESSENTIAL_CONTEXT - When critical information is missing that prevents effective research
2. CREATE_RESEARCH_STATEMENT - When enough information exists to create a research statement

For option 1, you should identify specific missing context information like:
- Which accounting standard is being referenced (IFRS, US GAAP, etc.)
- Time period of interest
- Specific type of transaction or accounting topic
- Industry-specific considerations
- Any other critical information needed to perform targeted research

For option 2, you should create a comprehensive research statement that includes:
- The precise accounting question or topic
- Applicable standards or frameworks
- Time period considerations
- Industry context
- Specific document sources to prioritize
- Any constraints or specific requirements

# CONTINUATION DETECTION
You should also detect if the user is requesting continuation of previous research.
If the user asks to "continue," "proceed," or "go ahead" after clarifying questions
have been answered, include this in your output.

# OUTPUT REQUIREMENTS
You MUST use one of the provided tools without any commentary. Direct responses are not allowed.
"""

def test_clarifier_prompt(test_case, tool_definition):
    """Test a clarifier prompt with a specific test case."""
    # Add a fake router thought to the message
    router_thought = "This query requires research because it's asking about specific accounting standards that may require looking up policies or guidelines in our database."
    
    # Prepare messages
    messages = [
        {"role": "system", "content": CLARIFIER_PROMPT},
        {"role": "user", "content": f"User query: {test_case['message']}\n\nRouter thought: {router_thought}"}
    ]
    
    # Make the API call
    start_time = time.time()
    response = call_llm(
        oauth_token=token,
        model=MODEL_NAME,
        messages=messages,
        max_tokens=MAX_TOKENS,
        temperature=TEMPERATURE,
        tools=tool_definition,
        tool_choice={"type": "function", "function": {"name": "make_clarifier_decision"}},
        stream=False,
        prompt_token_cost=PROMPT_TOKEN_COST,
        completion_token_cost=COMPLETION_TOKEN_COST
    )
    elapsed_time = time.time() - start_time
    
    # Extract the tool call
    tool_call = response.choices[0].message.tool_calls[0]
    arguments = json.loads(tool_call.function.arguments)
    
    # Return result with timing and token info
    return {
        "arguments": arguments,
        "time": elapsed_time,
        "prompt_tokens": response.usage.prompt_tokens,
        "completion_tokens": response.usage.completion_tokens,
        "total_tokens": response.usage.total_tokens
    }

# Function to display clarifier results
def display_clarifier_results(test_case, original_result, optimized_result):
    """Display comparison of original and optimized clarifier results."""
    # Choose appropriate colors based on action
    if original_result['arguments']['action'] == "request_essential_context":
        action_color = "#EA4335"  # Red for questions
    else:
        action_color = "#4285F4"  # Blue for research statement
    
    html = f"""
    <div style="margin-bottom: 30px; border: 1px solid #ddd; border-radius: 8px; overflow: hidden;">
        <div style="background-color: #f5f5f5; padding: 10px; border-bottom: 1px solid #ddd;">
            <h3 style="margin: 0;">Test Case: {test_case['name']}</h3>
            <p style="margin: 5px 0 0 0; font-style: italic;">"{test_case['message']}"</p>
        </div>
        
        <div style="display: flex;">
            <!-- Original Result -->
            <div style="flex: 1; padding: 15px; border-right: 1px solid #ddd;">
                <h4 style="margin-top: 0; color: #4285F4;">Original Clarifier</h4>
                <p><strong>Action:</strong> <span style="color: {action_color};">{original_result['arguments']['action']}</span></p>
                <p><strong>Is Continuation:</strong> {original_result['arguments'].get('is_continuation', False)}</p>
    """
    
    # Add thought if it exists
    if 'thought' in original_result['arguments']:
        html += f"""<p><strong>Thought:</strong></p>
                <div style="background-color: #f9f9f9; padding: 10px; border-radius: 5px; max-height: 150px; overflow-y: auto;">
                    {original_result['arguments']['thought']}
                </div>"""
    
    # Add output
    html += f"""<p><strong>Output:</strong></p>
                <div style="background-color: #f9f9f9; padding: 10px; border-radius: 5px; max-height: 150px; overflow-y: auto;">
                    {original_result['arguments']['output']}
                </div>
    """
    
    # Add metrics
    html += f"""
                <div style="margin-top: 15px; background-color: #f0f0f0; padding: 10px; border-radius: 5px;">
                    <p style="margin: 0;"><strong>Time:</strong> {original_result['time']:.2f}s</p>
                    <p style="margin: 5px 0 0 0;"><strong>Tokens:</strong> {original_result['total_tokens']} (Prompt: {original_result['prompt_tokens']}, Completion: {original_result['completion_tokens']})</p>
                </div>
            </div>
            
            <!-- Optimized Result -->
            <div style="flex: 1; padding: 15px;">
                <h4 style="margin-top: 0; color: #34a853;">Optimized Clarifier</h4>
                <p><strong>Action:</strong> <span style="color: {action_color};">{optimized_result['arguments']['action']}</span></p>
                <p><strong>Is Continuation:</strong> {optimized_result['arguments'].get('is_continuation', False)}</p>
    """
    
    # Add thought if it exists (should not for optimized version)
    if 'thought' in optimized_result['arguments']:
        html += f"""<p><strong>Thought:</strong></p>
                <div style="background-color: #f9f9f9; padding: 10px; border-radius: 5px; max-height: 150px; overflow-y: auto;">
                    {optimized_result['arguments']['thought']}
                </div>"""
    
    # Add output
    html += f"""<p><strong>Output:</strong></p>
                <div style="background-color: #f9f9f9; padding: 10px; border-radius: 5px; max-height: 150px; overflow-y: auto;">
                    {optimized_result['arguments']['output']}
                </div>
    """
    
    # Calculate improvement percentages
    time_diff = original_result['time'] - optimized_result['time']
    time_percent = (time_diff / original_result['time']) * 100 if original_result['time'] > 0 else 0
    
    token_diff = original_result['total_tokens'] - optimized_result['total_tokens']
    token_percent = (token_diff / original_result['total_tokens']) * 100 if original_result['total_tokens'] > 0 else 0
    
    # Add metrics and improvement
    html += f"""
                <div style="margin-top: 15px; background-color: #f0f0f0; padding: 10px; border-radius: 5px;">
                    <p style="margin: 0;"><strong>Time:</strong> {optimized_result['time']:.2f}s</p>
                    <p style="margin: 5px 0 0 0;"><strong>Tokens:</strong> {optimized_result['total_tokens']} (Prompt: {optimized_result['prompt_tokens']}, Completion: {optimized_result['completion_tokens']})</p>
                </div>
                
                <div style="margin-top: 15px; background-color: #e6f4e6; padding: 10px; border-radius: 5px;">
                    <p style="margin: 0;"><strong>Improvement:</strong></p>
                    <p style="margin: 5px 0 0 0;">🕒 Time: {time_diff:.2f}s faster ({time_percent:.1f}%)</p>
                    <p style="margin: 5px 0 0 0;">🔤 Tokens: {token_diff} fewer ({token_percent:.1f}%)</p>
                </div>
            </div>
        </div>
    </div>
    """
    
    display(HTML(html))

The history saving thread hit an unexpected error (OperationalError('attempt to write a readonly database')).History will not be written to the database.


In [13]:
# Run tests for the clarifier agent
print("Testing Clarifier Agent Prompts...\n")

for test_case in test_cases:
    print(f"Testing: {test_case['name']}")
    
    # Test original clarifier version
    original_result = test_clarifier_prompt(test_case, ORIGINAL_CLARIFIER_TOOL)
    
    # Test optimized clarifier version
    optimized_result = test_clarifier_prompt(test_case, OPTIMIZED_CLARIFIER_TOOL)
    
    # Display comparison
    display_clarifier_results(test_case, original_result, optimized_result)
    print("---")

2025-04-01 12:59:10,085 - iris.src.llm_connectors.rbc_openai - INFO - Using API key: sk-proj...
2025-04-01 12:59:10,086 - iris.src.llm_connectors.rbc_openai - INFO - Making non-streaming call to model: gpt-4o-mini-2024-07-18 with tools in local environment
2025-04-01 12:59:10,086 - iris.src.llm_connectors.rbc_openai - INFO - Attempt 1/3: Sending request to OpenAI API


Testing Clarifier Agent Prompts...

Testing: Direct Knowledge


2025-04-01 12:59:12,311 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-04-01 12:59:12,315 - iris.src.llm_connectors.rbc_openai - INFO - Received response in 2.23 seconds
2025-04-01 12:59:12,316 - iris.src.llm_connectors.rbc_openai - INFO - Token usage - Completion: 123 ($0.0000), Prompt: 516 ($0.0000), Total: 639 tokens, Total Cost: $0.0000
2025-04-01 12:59:12,328 - iris.src.llm_connectors.rbc_openai - INFO - Using API key: sk-proj...
2025-04-01 12:59:12,329 - iris.src.llm_connectors.rbc_openai - INFO - Making non-streaming call to model: gpt-4o-mini-2024-07-18 with tools in local environment
2025-04-01 12:59:12,329 - iris.src.llm_connectors.rbc_openai - INFO - Attempt 1/3: Sending request to OpenAI API
2025-04-01 12:59:13,848 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-04-01 12:59:13,851 - iris.src.llm_connectors.rbc_openai - INFO - Received response in 1.52 seconds
2025-04

2025-04-01 12:59:13,864 - iris.src.llm_connectors.rbc_openai - INFO - Using API key: sk-proj...
2025-04-01 12:59:13,865 - iris.src.llm_connectors.rbc_openai - INFO - Making non-streaming call to model: gpt-4o-mini-2024-07-18 with tools in local environment
2025-04-01 12:59:13,866 - iris.src.llm_connectors.rbc_openai - INFO - Attempt 1/3: Sending request to OpenAI API


---
Testing: Complex Research


2025-04-01 12:59:16,716 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-04-01 12:59:16,719 - iris.src.llm_connectors.rbc_openai - INFO - Received response in 2.85 seconds
2025-04-01 12:59:16,719 - iris.src.llm_connectors.rbc_openai - INFO - Token usage - Completion: 120 ($0.0000), Prompt: 523 ($0.0000), Total: 643 tokens, Total Cost: $0.0000
2025-04-01 12:59:16,732 - iris.src.llm_connectors.rbc_openai - INFO - Using API key: sk-proj...
2025-04-01 12:59:16,733 - iris.src.llm_connectors.rbc_openai - INFO - Making non-streaming call to model: gpt-4o-mini-2024-07-18 with tools in local environment
2025-04-01 12:59:16,733 - iris.src.llm_connectors.rbc_openai - INFO - Attempt 1/3: Sending request to OpenAI API
2025-04-01 12:59:19,172 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-04-01 12:59:19,175 - iris.src.llm_connectors.rbc_openai - INFO - Received response in 2.44 seconds
2025-04

2025-04-01 12:59:19,188 - iris.src.llm_connectors.rbc_openai - INFO - Using API key: sk-proj...
2025-04-01 12:59:19,190 - iris.src.llm_connectors.rbc_openai - INFO - Making non-streaming call to model: gpt-4o-mini-2024-07-18 with tools in local environment
2025-04-01 12:59:19,190 - iris.src.llm_connectors.rbc_openai - INFO - Attempt 1/3: Sending request to OpenAI API


---
Testing: Vague Question


2025-04-01 12:59:22,961 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-04-01 12:59:22,964 - iris.src.llm_connectors.rbc_openai - INFO - Received response in 3.77 seconds
2025-04-01 12:59:22,965 - iris.src.llm_connectors.rbc_openai - INFO - Token usage - Completion: 144 ($0.0000), Prompt: 514 ($0.0000), Total: 658 tokens, Total Cost: $0.0000
2025-04-01 12:59:22,976 - iris.src.llm_connectors.rbc_openai - INFO - Using API key: sk-proj...
2025-04-01 12:59:22,977 - iris.src.llm_connectors.rbc_openai - INFO - Making non-streaming call to model: gpt-4o-mini-2024-07-18 with tools in local environment
2025-04-01 12:59:22,978 - iris.src.llm_connectors.rbc_openai - INFO - Attempt 1/3: Sending request to OpenAI API
2025-04-01 12:59:28,490 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-04-01 12:59:28,493 - iris.src.llm_connectors.rbc_openai - INFO - Received response in 5.52 seconds
2025-04

2025-04-01 12:59:28,511 - iris.src.llm_connectors.rbc_openai - INFO - Using API key: sk-proj...
2025-04-01 12:59:28,512 - iris.src.llm_connectors.rbc_openai - INFO - Making non-streaming call to model: gpt-4o-mini-2024-07-18 with tools in local environment
2025-04-01 12:59:28,513 - iris.src.llm_connectors.rbc_openai - INFO - Attempt 1/3: Sending request to OpenAI API


---
Testing: Multi-part Question


2025-04-01 12:59:30,846 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-04-01 12:59:30,849 - iris.src.llm_connectors.rbc_openai - INFO - Received response in 2.34 seconds
2025-04-01 12:59:30,850 - iris.src.llm_connectors.rbc_openai - INFO - Token usage - Completion: 134 ($0.0000), Prompt: 526 ($0.0000), Total: 660 tokens, Total Cost: $0.0000
2025-04-01 12:59:30,861 - iris.src.llm_connectors.rbc_openai - INFO - Using API key: sk-proj...
2025-04-01 12:59:30,862 - iris.src.llm_connectors.rbc_openai - INFO - Making non-streaming call to model: gpt-4o-mini-2024-07-18 with tools in local environment
2025-04-01 12:59:30,863 - iris.src.llm_connectors.rbc_openai - INFO - Attempt 1/3: Sending request to OpenAI API
2025-04-01 12:59:32,829 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-04-01 12:59:32,831 - iris.src.llm_connectors.rbc_openai - INFO - Received response in 1.97 seconds
2025-04

---


## Planner Agent Prompt Testing

In [14]:
# Define available databases for the planner
AVAILABLE_DATABASES = {
    "internal_capm": {
        "description": "Central Accounting Policy Manual - Contains formal accounting policies",
        "query_type": "semantic search",
        "content_type": "policies and procedures",
        "use_when": "Needing official policy statements, procedures, or authoritative guidance"
    },
    "internal_infographic": {
        "description": "Visual infographics and diagrams explaining accounting concepts",
        "query_type": "keyword search",
        "content_type": "visual explanations",
        "use_when": "Looking for visual explanations of processes or concepts"
    },
    "internal_wiki": {
        "description": "Internal wiki with practical examples and implementation guides",
        "query_type": "semantic search",
        "content_type": "practical guides",
        "use_when": "Seeking practical implementation guidance, examples, or informal advice"
    }
}

# Define planner tool schemas
ORIGINAL_PLANNER_TOOL = [
    {
        "type": "function",
        "function": {
            "name": "submit_query_plan",
            "description": "Submit a plan of database queries based on the research statement",
            "parameters": {
                "type": "object",
                "properties": {
                    "queries": {
                        "type": "array",
                        "description": "The list of queries to execute",
                        "items": {
                            "type": "object",
                            "properties": {
                                "database": {
                                    "type": "string",
                                    "description": "The database to query",
                                    "enum": list(AVAILABLE_DATABASES.keys())
                                },
                                "query": {
                                    "type": "string",
                                    "description": "The search query text"
                                },
                                "thought": {
                                    "type": "string",
                                    "description": "Explanation of why this query is relevant"
                                }
                            },
                            "required": ["database", "query", "thought"]
                        }
                    },
                    "overall_strategy": {
                        "type": "string",
                        "description": "Explanation of the overall research strategy"
                    }
                },
                "required": ["queries", "overall_strategy"]
            }
        }
    }
]

OPTIMIZED_PLANNER_TOOL = [
    {
        "type": "function",
        "function": {
            "name": "submit_query_plan",
            "description": "Submit a plan of database queries based on the research statement",
            "parameters": {
                "type": "object",
                "properties": {
                    "queries": {
                        "type": "array",
                        "description": "The list of queries to execute",
                        "items": {
                            "type": "object",
                            "properties": {
                                "database": {
                                    "type": "string",
                                    "description": "The database to query",
                                    "enum": list(AVAILABLE_DATABASES.keys())
                                },
                                "query": {
                                    "type": "string",
                                    "description": "The search query text"
                                }
                            },
                            "required": ["database", "query"]
                        }
                    }
                },
                "required": ["queries"]
            }
        }
    }
]

# Define planner prompt
PLANNER_PROMPT = """You are an expert query planning agent specializing in accounting research. You are responsible for planning database research queries based on a research statement.

# ANALYSIS INSTRUCTIONS
At the beginning of each request, you MUST:
1. Analyze the research statement
2. Determine which databases are most relevant
3. Create specific queries for each relevant database
4. Provide a clear thought process for each query

# QUERY PLANNING STRATEGY
For thorough research, consider multiple angles:
- Start with authoritative sources (CAPM)
- Include practical implementation guidance (Wiki)
- Add visual aids when helpful (Infographics)
- Use specific accounting terms and standards in queries
- Split complex questions into multiple focused queries
- Prioritize queries by relevance and authority

# CONTINUATION HANDLING
If the research statement indicates this is a continuation request:
- Reference previous research context
- Focus on filling gaps in previous research
- Avoid duplicating previously successful queries
- Target areas where more depth is needed

# OUTPUT REQUIREMENTS
You MUST use the provided tool to submit your query plan.
You should create 1-5 queries depending on the complexity of the research statement.
Each query must include a database name, specific query text, and explanation.
"""

# Create test research statements
research_statements = [
    {
        "name": "IFRS 15 Revenue Recognition",
        "statement": "Research IFRS 15 revenue recognition principles for software subscription services, focusing on performance obligations, contract modifications, and recognition timing."
    },
    {
        "name": "Lease Accounting",
        "statement": "Research the accounting treatment for operating leases under IFRS 16 from the lessee perspective, including initial recognition, subsequent measurement, and disclosure requirements."
    },
    {
        "name": "Intangible Assets",
        "statement": "Research the accounting criteria for recognizing and measuring internally generated intangible assets under IAS 38, with particular focus on development costs in the technology sector."
    },
    {
        "name": "Financial Instruments",
        "statement": "Research the classification and measurement requirements for financial assets under IFRS 9, including the business model assessment and SPPI test for debt instruments."
    }
]

def test_planner_prompt(research_statement, tool_definition):
    """Test a planner prompt with a specific research statement."""
    # Add database information
    db_info = "Available Databases:\n"
    for db_name, db_data in AVAILABLE_DATABASES.items():
        db_info += f"- {db_name}: {db_data['description']}\n"
        db_info += f"  Use when: {db_data['use_when']}\n"
    
    # Prepare messages
    messages = [
        {"role": "system", "content": PLANNER_PROMPT},
        {"role": "user", "content": f"Research Statement: {research_statement['statement']}"},
        {"role": "system", "content": db_info}
    ]
    
    # Make the API call
    start_time = time.time()
    response = call_llm(
        oauth_token=token,
        model=MODEL_NAME,
        messages=messages,
        max_tokens=MAX_TOKENS,
        temperature=TEMPERATURE,
        tools=tool_definition,
        tool_choice={"type": "function", "function": {"name": "submit_query_plan"}},
        stream=False,
        prompt_token_cost=PROMPT_TOKEN_COST,
        completion_token_cost=COMPLETION_TOKEN_COST
    )
    elapsed_time = time.time() - start_time
    
    # Extract the tool call
    tool_call = response.choices[0].message.tool_calls[0]
    arguments = json.loads(tool_call.function.arguments)
    
    # Return result with timing and token info
    return {
        "arguments": arguments,
        "time": elapsed_time,
        "prompt_tokens": response.usage.prompt_tokens,
        "completion_tokens": response.usage.completion_tokens,
        "total_tokens": response.usage.total_tokens
    }

# Function to display planner results
def display_planner_results(research_statement, original_result, optimized_result):
    """Display comparison of original and optimized planner results."""
    html = f"""
    <div style="margin-bottom: 30px; border: 1px solid #ddd; border-radius: 8px; overflow: hidden;">
        <div style="background-color: #f5f5f5; padding: 10px; border-bottom: 1px solid #ddd;">
            <h3 style="margin: 0;">Research: {research_statement['name']}</h3>
            <p style="margin: 5px 0 0 0; font-style: italic;">Statement: "{research_statement['statement']}"</p>
        </div>
        
        <div style="display: flex;">
            <!-- Original Result -->
            <div style="flex: 1; padding: 15px; border-right: 1px solid #ddd;">
                <h4 style="margin-top: 0; color: #4285F4;">Original Planner</h4>
                <p><strong>Queries:</strong> {len(original_result['arguments']['queries'])}</p>
    """
    
    # Add overall strategy if it exists
    if 'overall_strategy' in original_result['arguments']:
        html += f"""<p><strong>Overall Strategy:</strong></p>
                <div style="background-color: #f9f9f9; padding: 10px; border-radius: 5px; max-height: 100px; overflow-y: auto; margin-bottom: 15px;">
                    {original_result['arguments']['overall_strategy']}
                </div>"""
    
    # Add queries
    html += "<div style=\"max-height: 350px; overflow-y: auto;\">"
    
    for i, query in enumerate(original_result['arguments']['queries']):
        db_name = query['database']
        db_color = "#4285F4" if db_name == "internal_capm" else "#EA4335" if db_name == "internal_wiki" else "#FBBC05"
        
        html += f"""
                <div style="background-color: #f9f9f9; padding: 10px; border-radius: 5px; margin-bottom: 10px; border-left: 4px solid {db_color};">
                    <p style="margin: 0;"><strong>Query {i+1} - {db_name}</strong></p>
                    <p style="margin: 5px 0; font-family: monospace; background-color: #f0f0f0; padding: 5px; border-radius: 3px;">{query['query']}</p>
        """
        
        if 'thought' in query:
            html += f"""<p style="margin: 5px 0 0 0; font-size: 12px; color: #666;"><em>Thought: {query['thought']}</em></p>"""
            
        html += "</div>"
    
    html += "</div>"  # Close the scrollable div
    
    # Add metrics
    html += f"""
                <div style="margin-top: 15px; background-color: #f0f0f0; padding: 10px; border-radius: 5px;">
                    <p style="margin: 0;"><strong>Time:</strong> {original_result['time']:.2f}s</p>
                    <p style="margin: 5px 0 0 0;"><strong>Tokens:</strong> {original_result['total_tokens']} (Prompt: {original_result['prompt_tokens']}, Completion: {original_result['completion_tokens']})</p>
                </div>
            </div>
            
            <!-- Optimized Result -->
            <div style="flex: 1; padding: 15px;">
                <h4 style="margin-top: 0; color: #34a853;">Optimized Planner</h4>
                <p><strong>Queries:</strong> {len(optimized_result['arguments']['queries'])}</p>
    """
    
    # Add overall strategy if it exists (should not for optimized version)
    if 'overall_strategy' in optimized_result['arguments']:
        html += f"""<p><strong>Overall Strategy:</strong></p>
                <div style="background-color: #f9f9f9; padding: 10px; border-radius: 5px; max-height: 100px; overflow-y: auto; margin-bottom: 15px;">
                    {optimized_result['arguments']['overall_strategy']}
                </div>"""
    
    # Add queries
    html += "<div style=\"max-height: 350px; overflow-y: auto;\">"
    
    for i, query in enumerate(optimized_result['arguments']['queries']):
        db_name = query['database']
        db_color = "#4285F4" if db_name == "internal_capm" else "#EA4335" if db_name == "internal_wiki" else "#FBBC05"
        
        html += f"""
                <div style="background-color: #f9f9f9; padding: 10px; border-radius: 5px; margin-bottom: 10px; border-left: 4px solid {db_color};">
                    <p style="margin: 0;"><strong>Query {i+1} - {db_name}</strong></p>
                    <p style="margin: 5px 0; font-family: monospace; background-color: #f0f0f0; padding: 5px; border-radius: 3px;">{query['query']}</p>
        """
        
        if 'thought' in query:
            html += f"""<p style="margin: 5px 0 0 0; font-size: 12px; color: #666;"><em>Thought: {query['thought']}</em></p>"""
            
        html += "</div>"
    
    html += "</div>"  # Close the scrollable div
    
    # Calculate improvement percentages
    time_diff = original_result['time'] - optimized_result['time']
    time_percent = (time_diff / original_result['time']) * 100 if original_result['time'] > 0 else 0
    
    token_diff = original_result['total_tokens'] - optimized_result['total_tokens']
    token_percent = (token_diff / original_result['total_tokens']) * 100 if original_result['total_tokens'] > 0 else 0
    
    # Add metrics and improvement
    html += f"""
                <div style="margin-top: 15px; background-color: #f0f0f0; padding: 10px; border-radius: 5px;">
                    <p style="margin: 0;"><strong>Time:</strong> {optimized_result['time']:.2f}s</p>
                    <p style="margin: 5px 0 0 0;"><strong>Tokens:</strong> {optimized_result['total_tokens']} (Prompt: {optimized_result['prompt_tokens']}, Completion: {optimized_result['completion_tokens']})</p>
                </div>
                
                <div style="margin-top: 15px; background-color: #e6f4e6; padding: 10px; border-radius: 5px;">
                    <p style="margin: 0;"><strong>Improvement:</strong></p>
                    <p style="margin: 5px 0 0 0;">🕒 Time: {time_diff:.2f}s faster ({time_percent:.1f}%)</p>
                    <p style="margin: 5px 0 0 0;">🔤 Tokens: {token_diff} fewer ({token_percent:.1f}%)</p>
                </div>
            </div>
        </div>
    </div>
    """
    
    display(HTML(html))

In [15]:
# Run tests for the planner agent
print("Testing Planner Agent Prompts...\n")

for statement in research_statements:
    print(f"Testing: {statement['name']}")
    
    # Test original planner version
    original_result = test_planner_prompt(statement, ORIGINAL_PLANNER_TOOL)
    
    # Test optimized planner version
    optimized_result = test_planner_prompt(statement, OPTIMIZED_PLANNER_TOOL)
    
    # Display comparison
    display_planner_results(statement, original_result, optimized_result)
    print("---")

2025-04-01 12:59:58,247 - iris.src.llm_connectors.rbc_openai - INFO - Using API key: sk-proj...
2025-04-01 12:59:58,248 - iris.src.llm_connectors.rbc_openai - INFO - Making non-streaming call to model: gpt-4o-mini-2024-07-18 with tools in local environment
2025-04-01 12:59:58,248 - iris.src.llm_connectors.rbc_openai - INFO - Attempt 1/3: Sending request to OpenAI API


Testing Planner Agent Prompts...

Testing: IFRS 15 Revenue Recognition


2025-04-01 13:00:01,668 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-04-01 13:00:01,671 - iris.src.llm_connectors.rbc_openai - INFO - Received response in 3.42 seconds
2025-04-01 13:00:01,672 - iris.src.llm_connectors.rbc_openai - INFO - Token usage - Completion: 217 ($0.0000), Prompt: 502 ($0.0000), Total: 719 tokens, Total Cost: $0.0000
2025-04-01 13:00:01,684 - iris.src.llm_connectors.rbc_openai - INFO - Using API key: sk-proj...
2025-04-01 13:00:01,685 - iris.src.llm_connectors.rbc_openai - INFO - Making non-streaming call to model: gpt-4o-mini-2024-07-18 with tools in local environment
2025-04-01 13:00:01,685 - iris.src.llm_connectors.rbc_openai - INFO - Attempt 1/3: Sending request to OpenAI API
2025-04-01 13:00:04,229 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-04-01 13:00:04,232 - iris.src.llm_connectors.rbc_openai - INFO - Received response in 2.55 seconds
2025-04

2025-04-01 13:00:04,246 - iris.src.llm_connectors.rbc_openai - INFO - Using API key: sk-proj...
2025-04-01 13:00:04,247 - iris.src.llm_connectors.rbc_openai - INFO - Making non-streaming call to model: gpt-4o-mini-2024-07-18 with tools in local environment
2025-04-01 13:00:04,247 - iris.src.llm_connectors.rbc_openai - INFO - Attempt 1/3: Sending request to OpenAI API


---
Testing: Lease Accounting


2025-04-01 13:00:08,061 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-04-01 13:00:08,065 - iris.src.llm_connectors.rbc_openai - INFO - Received response in 3.82 seconds
2025-04-01 13:00:08,066 - iris.src.llm_connectors.rbc_openai - INFO - Token usage - Completion: 251 ($0.0000), Prompt: 506 ($0.0000), Total: 757 tokens, Total Cost: $0.0000
2025-04-01 13:00:08,082 - iris.src.llm_connectors.rbc_openai - INFO - Using API key: sk-proj...
2025-04-01 13:00:08,083 - iris.src.llm_connectors.rbc_openai - INFO - Making non-streaming call to model: gpt-4o-mini-2024-07-18 with tools in local environment
2025-04-01 13:00:08,083 - iris.src.llm_connectors.rbc_openai - INFO - Attempt 1/3: Sending request to OpenAI API
2025-04-01 13:00:10,064 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-04-01 13:00:10,067 - iris.src.llm_connectors.rbc_openai - INFO - Received response in 1.98 seconds
2025-04

2025-04-01 13:00:10,081 - iris.src.llm_connectors.rbc_openai - INFO - Using API key: sk-proj...
2025-04-01 13:00:10,082 - iris.src.llm_connectors.rbc_openai - INFO - Making non-streaming call to model: gpt-4o-mini-2024-07-18 with tools in local environment
2025-04-01 13:00:10,082 - iris.src.llm_connectors.rbc_openai - INFO - Attempt 1/3: Sending request to OpenAI API


---
Testing: Intangible Assets


2025-04-01 13:00:14,264 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-04-01 13:00:14,267 - iris.src.llm_connectors.rbc_openai - INFO - Received response in 4.18 seconds
2025-04-01 13:00:14,268 - iris.src.llm_connectors.rbc_openai - INFO - Token usage - Completion: 215 ($0.0000), Prompt: 505 ($0.0000), Total: 720 tokens, Total Cost: $0.0000
2025-04-01 13:00:14,281 - iris.src.llm_connectors.rbc_openai - INFO - Using API key: sk-proj...
2025-04-01 13:00:14,282 - iris.src.llm_connectors.rbc_openai - INFO - Making non-streaming call to model: gpt-4o-mini-2024-07-18 with tools in local environment
2025-04-01 13:00:14,282 - iris.src.llm_connectors.rbc_openai - INFO - Attempt 1/3: Sending request to OpenAI API
2025-04-01 13:00:16,619 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-04-01 13:00:16,622 - iris.src.llm_connectors.rbc_openai - INFO - Received response in 2.34 seconds
2025-04

2025-04-01 13:00:16,637 - iris.src.llm_connectors.rbc_openai - INFO - Using API key: sk-proj...
2025-04-01 13:00:16,637 - iris.src.llm_connectors.rbc_openai - INFO - Making non-streaming call to model: gpt-4o-mini-2024-07-18 with tools in local environment
2025-04-01 13:00:16,638 - iris.src.llm_connectors.rbc_openai - INFO - Attempt 1/3: Sending request to OpenAI API


---
Testing: Financial Instruments


2025-04-01 13:00:20,639 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-04-01 13:00:20,643 - iris.src.llm_connectors.rbc_openai - INFO - Received response in 4.01 seconds
2025-04-01 13:00:20,644 - iris.src.llm_connectors.rbc_openai - INFO - Token usage - Completion: 224 ($0.0000), Prompt: 505 ($0.0000), Total: 729 tokens, Total Cost: $0.0000
2025-04-01 13:00:20,656 - iris.src.llm_connectors.rbc_openai - INFO - Using API key: sk-proj...
2025-04-01 13:00:20,657 - iris.src.llm_connectors.rbc_openai - INFO - Making non-streaming call to model: gpt-4o-mini-2024-07-18 with tools in local environment
2025-04-01 13:00:20,657 - iris.src.llm_connectors.rbc_openai - INFO - Attempt 1/3: Sending request to OpenAI API
2025-04-01 13:00:22,558 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-04-01 13:00:22,561 - iris.src.llm_connectors.rbc_openai - INFO - Received response in 1.90 seconds
2025-04

---


## Overall Optimization Summary

In [16]:
# Display summary of optimizations
summary_html = """
<div style="background-color: #f5f5f5; padding: 20px; border-radius: 10px; margin: 20px 0;">
    <h2 style="color: #4285F4; text-align: center;">Agent Optimization Summary</h2>
    
    <div style="margin-top: 20px;">
        <h3>Optimization Strategy:</h3>
        <p>We optimized the agent workflows by removing unnecessary thought fields and explanations from agent responses, focusing only on the essential data needed for decision-making. This approach yields several benefits:</p>
        
        <ul>
            <li><strong>Reduced Token Usage:</strong> By eliminating verbose explanations that are not shown to the user, we significantly reduced token consumption</li>
            <li><strong>Faster Response Times:</strong> Less token generation means faster responses to user queries</li>
            <li><strong>Simplified Processing:</strong> Agent outputs contain only what's needed for the next step in the workflow</li>
            <li><strong>Maintained Functionality:</strong> The core decision-making behavior is preserved despite the reduced verbosity</li>
        </ul>
    </div>
    
    <div style="margin-top: 20px;">
        <h3>Changes Made:</h3>
        
        <div style="display: flex; justify-content: space-between; margin-top: 15px;">
            <div style="flex: 1; background-color: white; padding: 15px; border-radius: 8px; margin-right: 10px; box-shadow: 0 2px 5px rgba(0,0,0,0.1);">
                <h4 style="color: #4285F4; margin-top: 0;">Router Agent</h4>
                <p><strong>Before:</strong> Returns function_name and thought</p>
                <p><strong>After:</strong> Returns only function_name</p>
                <p><em>Benefit: Removes detailed explanation of routing decision that isn't shown to users</em></p>
            </div>
            
            <div style="flex: 1; background-color: white; padding: 15px; border-radius: 8px; margin: 0 10px; box-shadow: 0 2px 5px rgba(0,0,0,0.1);">
                <h4 style="color: #EA4335; margin-top: 0;">Clarifier Agent</h4>
                <p><strong>Before:</strong> Returns action, thought, output, and is_continuation</p>
                <p><strong>After:</strong> Returns action, output, and is_continuation</p>
                <p><em>Benefit: Removes verbose explanation while keeping question prompts and research statements</em></p>
            </div>
            
            <div style="flex: 1; background-color: white; padding: 15px; border-radius: 8px; margin-left: 10px; box-shadow: 0 2px 5px rgba(0,0,0,0.1);">
                <h4 style="color: #FBBC05; margin-top: 0;">Planner Agent</h4>
                <p><strong>Before:</strong> Returns queries (with thoughts) and overall_strategy</p>
                <p><strong>After:</strong> Returns only queries without thoughts</p>
                <p><em>Benefit: Removes explanations for each query and overall strategy that's not needed for query execution</em></p>
            </div>
        </div>
    </div>
    
    <div style="margin-top: 25px; text-align: center;">
        <h3 style="color: #34a853;">Estimated Overall Improvement</h3>
        <p style="font-size: 18px;">15-25% reduction in token usage and 10-20% faster response times</p>
        <p style="font-style: italic;">Exact improvements vary by query complexity and conversation context</p>
    </div>
</div>
"""

display(HTML(summary_html))