# Agent Orchestrator Prototype Notebook

**Purpose**: Fast prototyping and iterative development of agent workflows

This notebook allows you to:
- Test individual agents in isolation
- Walk through complete workflows step-by-step 
- Measure performance and analyze results
- Prototype new workflow architectures
- Debug agent interactions

Based on the principles from:
- [How to refactor a Jupyter notebook](https://medium.com/data-science/how-to-refactor-a-jupyter-notebook-ed531b6a17)
- [Prototyping with Python](https://www.fuzzingbook.org/beta/html/PrototypingWithPython.html)


## Setup and Imports


In [2]:
import sys
import os
import asyncio
import json
import time
from datetime import datetime
from typing import Dict, Any, List, Optional
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display, HTML, Markdown
import warnings
warnings.filterwarnings('ignore')

# Add project root to path
project_root = os.path.abspath('.')
if project_root not in sys.path:
    sys.path.insert(0, project_root)

print(f"📁 Project root: {project_root}")
print(f"🐍 Python path updated")
print(f"📊 Data analysis packages loaded successfully")


📁 Project root: /Users/aq_home/1Projects/accessa/insurance_navigator
🐍 Python path updated
📊 Data analysis packages loaded successfully


## Import Project Components


In [3]:
# Import orchestrator and agents
try:
    from graph.agent_orchestrator import AgentOrchestrator, get_orchestrator, reset_orchestrator
    from agents import (
        PromptSecurityAgent,
        PatientNavigatorAgent, 
        TaskRequirementsAgent,
        ServiceAccessStrategyAgent,
        ChatCommunicatorAgent,
        RegulatoryAgent
    )
    from utils.config_manager import ConfigManager
    print("✅ All imports successful")
except ImportError as e:
    print(f"❌ Import error: {e}")
    print("Please ensure you're running from the project root directory")


🔧 Using local database: postgresql://aq_home@[host]
✅ All imports successful


## Configuration and Global Variables


In [4]:
# Global configuration
BYPASS_SECURITY = True  # Set to False for production testing
TEST_USER_ID = "prototype_user_001"
DEFAULT_CONVERSATION_ID = "prototype_conv_001"

# Performance tracking
performance_data = []
test_results = []

# Test scenarios
TEST_SCENARIOS = {
    "simple_qa": "What does Medicare cover?",
    "doctor_search": "I need to find a cardiologist in Seattle with Blue Cross insurance",
    "incomplete_info": "I need a doctor",
    "emergency": "I'm having chest pain and need immediate care",
    "complex_request": "I need an X-ray for my back pain, I have Medicare Part B, live in Portland Oregon"
}

print(f"🔧 Configuration loaded")
print(f"🛡️ Security bypass: {BYPASS_SECURITY}")
print(f"👤 Test user ID: {TEST_USER_ID}")
print(f"💬 Test scenarios: {len(TEST_SCENARIOS)} loaded")


🔧 Configuration loaded
🛡️ Security bypass: True
👤 Test user ID: prototype_user_001
💬 Test scenarios: 5 loaded


## Utility Functions for Analysis


In [5]:
def format_timestamp(timestamp=None):
    """Format timestamp for display"""
    if timestamp is None:
        timestamp = datetime.now()
    return timestamp.strftime("%H:%M:%S")

def print_section_header(title, emoji="🔬"):
    """Print a formatted section header"""
    print(f"\n{emoji} {title}")
    print("=" * (len(title) + 3))

def record_performance(test_name, start_time, end_time, success, metadata=None):
    """Record performance data for analysis"""
    duration = end_time - start_time
    entry = {
        'test_name': test_name,
        'timestamp': datetime.now(),
        'duration_seconds': duration,
        'success': success,
        'metadata': metadata or {}
    }
    performance_data.append(entry)
    return entry

def display_agent_result(agent_name, result, execution_time=None):
    """Display agent result in a formatted way"""
    print(f"\n🤖 {agent_name.upper()}")
    print("-" * (len(agent_name) + 3))
    
    if execution_time:
        print(f"⏱️ Execution time: {execution_time:.3f}s")
    
    if isinstance(result, dict):
        for key, value in result.items():
            if key not in ['raw_response', 'full_output']:  # Skip verbose fields
                print(f"📊 {key}: {value}")
    else:
        print(f"📝 Result: {result}")

def analyze_workflow_performance(workflow_data):
    """Analyze workflow execution performance"""
    if not workflow_data:
        return "No performance data available"
    
    df = pd.DataFrame(workflow_data)
    
    analysis = {
        'total_tests': len(df),
        'success_rate': df['success'].mean() * 100,
        'avg_duration': df['duration_seconds'].mean(),
        'max_duration': df['duration_seconds'].max(),
        'min_duration': df['duration_seconds'].min()
    }
    
    return analysis

print("🛠️ Utility functions loaded")


🛠️ Utility functions loaded


## Initialize Orchestrator


In [6]:
# Reset and initialize orchestrator
print_section_header("Initializing Agent Orchestrator", "🚀")

try:
    # Reset any existing instance
    reset_orchestrator()
    print("🔄 Orchestrator reset")
    
    # Create new instance
    orchestrator = get_orchestrator(bypass_security=BYPASS_SECURITY)
    print("✅ Orchestrator initialized")
    print(f"🛡️ Security bypass: {orchestrator.bypass_security}")
    
    # Test basic functionality
    test_message = "Hello, test"
    workflow_type = orchestrator._determine_workflow_type(test_message)
    print(f"🧪 Test workflow determination: '{test_message}' → {workflow_type}")
    
except Exception as e:
    print(f"❌ Orchestrator initialization failed: {e}")
    import traceback
    traceback.print_exc()


🚫 ORCHESTRATOR: Security bypass is ENABLED for all workflows



🚀 Initializing Agent Orchestrator
🔄 Orchestrator reset
✅ Orchestrator initialized
🛡️ Security bypass: True
🧪 Test workflow determination: 'Hello, test' → strategy_request


## Individual Agent Testing

Test each agent in isolation to understand their behavior and performance.


In [None]:
# Test individual agents in isolation
async def test_individual_agent(agent_name, test_message="I need to find a cardiologist", **kwargs):
    """Test a specific agent individually with appropriate inputs"""
    print_section_header(f"Testing {agent_name} Agent", "🧪")
    print(f"📝 Message: {test_message}")
    
    test_user_id = TEST_USER_ID
    test_conversation_id = f"test_{int(time.time())}"
    
    start_time = time.time()
    
    try:
        if agent_name == "prompt_security":
            result = await orchestrator.prompt_security_agent.check_prompt_security(
                test_message, test_user_id
            )
            display_data = {
                "is_safe": result.is_safe,
                "risk_score": getattr(result, 'risk_score', 'N/A'),
                "issues": getattr(result, 'issues', [])
            }
            
        elif agent_name == "patient_navigator":
            result = await orchestrator.patient_navigator_agent.analyze_request(
                test_message, test_conversation_id
            )
            display_data = {
                "intent_type": result.intent_type,
                "confidence_score": result.confidence_score,
                "meta_intent": result.analysis_details.get("meta_intent", {}),
                "service_intent": result.analysis_details.get("service_intent", {})
            }
            
        elif agent_name == "task_requirements":
            # Requires navigator analysis as input
            navigator_analysis = kwargs.get('navigator_analysis', {
                "meta_intent": {"request_type": "find_provider", "location": "Seattle"},
                "service_intent": {"specialty": "cardiology"}
            })
            result = await orchestrator.task_requirements_agent.analyze_requirements_structured(
                navigator_analysis, test_message
            )
            display_data = {
                "requirements_count": result.requirements_count,
                "documents_needed": result.documents_needed,
                "status": getattr(result, 'status', 'N/A'),
                "missing_context": getattr(result, 'missing_context', [])
            }
            
        elif agent_name == "service_access_strategy":
            # Requires patient info and policy info
            patient_info = kwargs.get('patient_info', {
                "user_id": test_user_id,
                "conversation_id": test_conversation_id,
                "intent": "find_provider",
                "original_message": test_message,
                "location": "Seattle",
                "insurance": "Medicare"
            })
            policy_info = kwargs.get('policy_info', {
                "policy_type": "Medicare",
                "plan_name": "Medicare Plan",
                "coverage_focus": "policy_navigation"
            })
            result = orchestrator.service_access_strategy_agent.develop_strategy(
                patient_info=patient_info,
                medical_need=test_message,
                policy_info=policy_info,
                location=patient_info.get("location"),
                constraints=""
            )
            display_data = {
                "recommended_service": result.get("recommended_service", "N/A"),
                "estimated_timeline": result.get("estimated_timeline", "N/A"),
                "action_plan_steps": len(result.get("action_plan", [])),
                "confidence": result.get("confidence", "N/A")
            }
            
        elif agent_name == "regulatory":
            # Requires strategy result as input
            strategy_result = kwargs.get('strategy_result', {"recommended_service": "cardiology"})
            result = await orchestrator.regulatory_agent.check_compliance(
                test_message, strategy_result
            )
            if isinstance(result, dict):
                compliance_status = result.get("status", "unknown")
                regulations_count = result.get("regulations_count", 0)
            else:
                compliance_status = getattr(result, 'status', 'unknown')
                regulations_count = getattr(result, 'regulations_count', 0)
            
            display_data = {
                "compliance_status": compliance_status,
                "regulations_checked": regulations_count
            }
            
        elif agent_name == "chat_communicator":
            # Requires full state as input
            state = kwargs.get('state', {
                "message": test_message,
                "user_id": test_user_id,
                "conversation_id": test_conversation_id,
                "workflow_type": "strategy_request",
                "metadata": {}
            })
            result = await orchestrator.chat_communicator_agent.generate_response(state)
            display_data = {
                "response_type": result.get("response_type", "N/A"),
                "response_length": len(result.get("response_text", "")),
                "confidence": result.get("confidence", "N/A")
            }
        else:
            print(f"❌ Unknown agent: {agent_name}")
            return None
            
        end_time = time.time()
        execution_time = end_time - start_time
        
        display_agent_result(agent_name.replace("_", " ").title(), display_data, execution_time)
        record_performance(agent_name, start_time, end_time, True)
        
        print(f"\n✅ {agent_name} agent test complete")
        return result
        
    except Exception as e:
        end_time = time.time()
        print(f"❌ Error testing {agent_name}: {e}")
        record_performance(agent_name, start_time, end_time, False)
        return None

# Helper functions for specific agent tests
async def test_prompt_security(message="I need to find a cardiologist"):
    return await test_individual_agent("prompt_security", message)

async def test_patient_navigator(message="I need to find a cardiologist"):
    return await test_individual_agent("patient_navigator", message)

async def test_task_requirements(message="I need to find a cardiologist", navigator_analysis=None):
    return await test_individual_agent("task_requirements", message, navigator_analysis=navigator_analysis)

async def test_service_access_strategy(message="I need to find a cardiologist", patient_info=None, policy_info=None):
    return await test_individual_agent("service_access_strategy", message, 
                                     patient_info=patient_info, policy_info=policy_info)

async def test_regulatory(message="I need to find a cardiologist", strategy_result=None):
    return await test_individual_agent("regulatory", message, strategy_result=strategy_result)

async def test_chat_communicator(message="I need to find a cardiologist", state=None):
    return await test_individual_agent("chat_communicator", message, state=state)

# Test all agents with a sample message
async def test_all_agents_individually(test_message="I need to find a cardiologist"):
    """Test all agents individually with appropriate test data"""
    print_section_header("Testing All Agents Individually", "🧪")
    
    results = {}
    
    # Test each agent with appropriate inputs
    results['prompt_security'] = await test_prompt_security(test_message)
    results['patient_navigator'] = await test_patient_navigator(test_message)
    
    # Use navigator result for subsequent tests if available
    navigator_analysis = None
    if results['patient_navigator']:
        navigator_analysis = results['patient_navigator'].analysis_details
    
    results['task_requirements'] = await test_task_requirements(test_message, navigator_analysis)
    results['service_access_strategy'] = await test_service_access_strategy(test_message)
    results['regulatory'] = await test_regulatory(test_message)
    results['chat_communicator'] = await test_chat_communicator(test_message)
    
    print(f"\n🎯 Individual agent testing summary:")
    for agent_name, result in results.items():
        status = "✅ Success" if result else "❌ Failed"
        print(f"   • {agent_name}: {status}")
    
    return results

# Run all individual agent tests
print("🧪 Running Individual Agent Tests")
individual_results = await test_all_agents_individually()



🧪 Individual Agent Testing

1️⃣ Testing Prompt Security Agent

🤖 PROMPT SECURITY
------------------
⏱️ Execution time: 0.001s
📊 is_safe: True
📊 risk_score: N/A
📊 issues: []

2️⃣ Testing Patient Navigator Agent

🤖 PATIENT NAVIGATOR
--------------------
⏱️ Execution time: 7.932s
📊 intent_type: provider_search
📊 confidence_score: 0.8
📊 analysis_details: {'meta_intent': {'request_type': 'provider_search', 'summary': 'User needs to find a cardiologist but lacks essential details', 'emergency': False, 'location': None, 'insurance': None}, 'clinical_context': {'symptom': None, 'body': {'region': 'cardiovascular', 'side': None, 'subpart': None}, 'onset': None, 'duration': None}, 'service_intent': {'specialty': 'cardiology', 'service': 'provider_search', 'plan_detail_type': None}, 'metadata': {'raw_user_text': 'I need to find a cardiologist', 'user_response_created': "I can help you find a cardiologist. To provide the most accurate results, I'll need some additional information such as your lo

In [None]:
# Test complete workflows by pulling from agent_orchestrator
async def test_workflow(workflow_type, test_message="I need to find a cardiologist"):
    """Test a specific workflow from agent_orchestrator"""
    print_section_header(f"Testing {workflow_type} Workflow", "🔄")
    print(f"📝 Message: {test_message}")
    
    test_user_id = TEST_USER_ID
    test_conversation_id = f"workflow_test_{int(time.time())}"
    
    # Initialize state like agent_orchestrator does
    state = {
        "message": test_message,
        "user_id": test_user_id,
        "conversation_id": test_conversation_id,
        "intent": "",
        "workflow_type": workflow_type,
        "response_text": "",
        "metadata": {},
        "security_check_passed": False,
        "error": None
    }
    
    start_time = time.time()
    
    try:
        # Execute the appropriate compiled workflow from orchestrator
        if workflow_type == "strategy_request":
            print("🎯 Executing strategy request workflow...")
            print("   Steps: Security → Navigator → Task Requirements → Strategy → Regulatory → Chat")
            result = await orchestrator.compiled_strategy_workflow.ainvoke(state)
        elif workflow_type == "navigator_only":
            print("❓ Executing navigator-only workflow...")
            print("   Steps: Security → Navigator Q&A → Chat")
            result = await orchestrator.compiled_navigator_workflow.ainvoke(state)
        else:
            print(f"❌ Unknown workflow type: {workflow_type}")
            return None
            
        end_time = time.time()
        execution_time = end_time - start_time
        
        # Display results
        print(f"\n⏱️ Total execution time: {execution_time:.3f}s")
        print(f"✅ Workflow completed successfully")
        
        # Show final state
        print(f"\n📊 Final State:")
        print(f"   • Intent: {result.get('intent', 'N/A')}")
        print(f"   • Security passed: {result.get('security_check_passed', False)}")
        print(f"   • Error: {result.get('error') or 'None'}")
        print(f"   • Response generated: {'Yes' if result.get('response_text') else 'No'}")
        
        response_text = result.get('response_text', '')
        if response_text:
            print(f"   • Response length: {len(response_text)} characters")
            if len(response_text) > 200:
                print(f"   • Response preview: {response_text[:200]}...")
            else:
                print(f"   • Full response: {response_text}")
        
        # Show workflow-specific results
        if workflow_type == "strategy_request":
            strategy_result = result.get('strategy_result', {})
            if strategy_result:
                print(f"\n🎯 Strategy Results:")
                print(f"   • Service: {strategy_result.get('recommended_service', 'N/A')}")
                print(f"   • Timeline: {strategy_result.get('estimated_timeline', 'N/A')}")
                print(f"   • Confidence: {strategy_result.get('confidence', 'N/A')}")
        
        # Record performance
        record_performance(f"workflow_{workflow_type}", start_time, end_time, not result.get('error'))
        
        return result
        
    except Exception as e:
        end_time = time.time()
        print(f"❌ Workflow error: {e}")
        record_performance(f"workflow_{workflow_type}", start_time, end_time, False)
        return None

# Test strategy request workflow
async def test_strategy_workflow(message="I need to find a cardiologist in Seattle"):
    """Test the strategy request workflow"""
    return await test_workflow("strategy_request", message)

# Test navigator-only workflow  
async def test_navigator_workflow(message="What does Medicare cover?"):
    """Test the navigator-only workflow"""
    return await test_workflow("navigator_only", message)

# Test workflow routing (like agent_orchestrator does)
def test_workflow_routing():
    """Test how messages are routed to workflows by agent_orchestrator"""
    print_section_header("Workflow Routing Test", "🧭")
    
    test_messages = [
        ("I need to find a cardiologist", "Strategy Request"),
        ("Where can I get an X-ray?", "Strategy Request"), 
        ("What does Medicare cover?", "Navigator Q&A"),
        ("Explain copays", "Navigator Q&A"),
        ("I need help", "Ambiguous"),
        ("Find me a doctor in Seattle", "Strategy Request"),
    ]
    
    print("📋 Testing workflow routing decisions:")
    for message, expected_category in test_messages:
        # Use the actual orchestrator method
        workflow_type = orchestrator._determine_workflow_type(message)
        
        # Map to display categories
        if workflow_type == "strategy_request":
            category = "Strategy Request"
        elif workflow_type == "navigator_only":
            category = "Navigator Q&A" 
        else:
            category = "Unknown"
            
        status = "✅" if category == expected_category else "❌"
        print(f"   {status} '{message}' → {workflow_type} ({category})")
    
    return "Routing test complete"

# Test available workflow node methods
def inspect_workflow_nodes():
    """Inspect available workflow node methods from agent_orchestrator"""
    print_section_header("Available Workflow Nodes", "🔍")
    
    # Get all workflow node methods
    node_methods = [method for method in dir(orchestrator) if method.endswith('_node')]
    
    print("📋 Available workflow nodes:")
    for method in node_methods:
        method_obj = getattr(orchestrator, method)
        if hasattr(method_obj, '__doc__'):
            doc = method_obj.__doc__ or "No description"
            print(f"   • {method}: {doc.split('.')[0]}")
        else:
            print(f"   • {method}")
    
    # Get decision methods
    decision_methods = [method for method in dir(orchestrator) if method.endswith('_decision')]
    
    print(f"\n🤔 Available decision methods:")
    for method in decision_methods:
        method_obj = getattr(orchestrator, method)
        if hasattr(method_obj, '__doc__'):
            doc = method_obj.__doc__ or "No description"
            print(f"   • {method}: {doc.split('.')[0]}")
        else:
            print(f"   • {method}")
    
    return {
        'node_methods': node_methods,
        'decision_methods': decision_methods
    }

# Run workflow tests
print("🔄 Testing Workflows from Agent Orchestrator")
print("=" * 50)

# Test routing first
routing_result = test_workflow_routing()

# Inspect available nodes
workflow_inspection = inspect_workflow_nodes()


In [None]:
# Test Strategy Request Workflow
print("🎯 Testing Strategy Request Workflow")
strategy_result = await test_strategy_workflow("I need to find a cardiologist in Seattle with Aetna insurance")


In [None]:
# Test Navigator-Only Workflow
print("❓ Testing Navigator-Only Workflow")
navigator_result = await test_navigator_workflow("What does Medicare Part B cover?")


In [None]:
# Test Workflow Node Methods Individually
async def test_workflow_node(node_name, state, test_message="I need to find a cardiologist"):
    """Test an individual workflow node method"""
    print_section_header(f"Testing {node_name} Node", "🔧")
    
    # Ensure state has required fields
    if "message" not in state:
        state["message"] = test_message
    if "user_id" not in state:
        state["user_id"] = TEST_USER_ID
    if "conversation_id" not in state:
        state["conversation_id"] = f"node_test_{int(time.time())}"
    
    try:
        # Get the node method from orchestrator
        node_method = getattr(orchestrator, node_name)
        
        print(f"📝 Testing {node_name} with message: {test_message}")
        print(f"📊 Input state keys: {list(state.keys())}")
        
        start_time = time.time()
        result_state = await node_method(state)
        end_time = time.time()
        
        print(f"⏱️ Execution time: {end_time - start_time:.3f}s")
        print(f"📊 Output state keys: {list(result_state.keys())}")
        
        # Show key changes
        new_keys = set(result_state.keys()) - set(state.keys())
        if new_keys:
            print(f"🆕 New state keys: {list(new_keys)}")
            for key in new_keys:
                value = result_state[key]
                if isinstance(value, (str, int, float, bool)):
                    print(f"   • {key}: {value}")
                elif isinstance(value, dict):
                    print(f"   • {key}: dict with {len(value)} keys")
                elif isinstance(value, list):
                    print(f"   • {key}: list with {len(value)} items")
                else:
                    print(f"   • {key}: {type(value).__name__}")
        
        # Show error if present
        if result_state.get('error'):
            print(f"❌ Error in state: {result_state['error']}")
        
        return result_state
        
    except Exception as e:
        print(f"❌ Error testing {node_name}: {e}")
        return None

# Test individual workflow nodes
async def test_all_workflow_nodes():
    """Test all workflow nodes individually"""
    print_section_header("Testing Individual Workflow Nodes", "🔧")
    
    # Base state for testing
    base_state = {
        "message": "I need to find a cardiologist in Seattle",
        "user_id": TEST_USER_ID,
        "conversation_id": f"nodes_test_{int(time.time())}",
        "intent": "",
        "workflow_type": "strategy_request",
        "response_text": "",
        "metadata": {},
        "security_check_passed": False,
        "error": None
    }
    
    node_results = {}
    
    # Test security check node
    state = base_state.copy()
    node_results['security_check'] = await test_workflow_node("_security_check_node", state)
    
    # Test navigator analysis node (requires security pass)
    if node_results['security_check'] and node_results['security_check'].get('security_check_passed'):
        state = node_results['security_check'].copy()
        node_results['navigator_analysis'] = await test_workflow_node("_navigator_analysis_node", state)
        
        # Test task requirements node (requires navigator result)
        if node_results['navigator_analysis']:
            state = node_results['navigator_analysis'].copy()
            node_results['task_requirements'] = await test_workflow_node("_task_requirements_node", state)
            
            # Test service strategy node (requires task requirements)
            if node_results['task_requirements']:
                state = node_results['task_requirements'].copy()
                node_results['service_strategy'] = await test_workflow_node("_service_strategy_node", state)
                
                # Test regulatory check node (requires strategy)
                if node_results['service_strategy']:
                    state = node_results['service_strategy'].copy()
                    node_results['regulatory_check'] = await test_workflow_node("_regulatory_check_node", state)
    
    # Test navigator Q&A node separately
    qa_state = base_state.copy()
    qa_state['security_check_passed'] = True  # Assume security passed
    node_results['navigator_qa'] = await test_workflow_node("_navigator_qa_node", qa_state)
    
    # Test chat response node with different states
    if node_results.get('regulatory_check'):
        final_state = node_results['regulatory_check'].copy()
        node_results['chat_response_strategy'] = await test_workflow_node("_chat_response_node", final_state)
    
    if node_results.get('navigator_qa'):
        qa_final_state = node_results['navigator_qa'].copy()
        node_results['chat_response_qa'] = await test_workflow_node("_chat_response_node", qa_final_state)
    
    print(f"\n🎯 Node testing summary:")
    for node_name, result in node_results.items():
        status = "✅ Success" if result and not result.get('error') else "❌ Failed"
        print(f"   • {node_name}: {status}")
    
    return node_results

# Run all node tests
print("🔧 Testing Individual Workflow Nodes")
node_test_results = await test_all_workflow_nodes()


In [None]:
# Workflow Definition File Analysis and Proposal
def analyze_workflow_definition_extraction():
    """Analyze the current workflow structure and propose extraction approach"""
    print_section_header("Workflow Definition File Analysis", "📁")
    
    print("🔍 CURRENT WORKFLOW STRUCTURE IN AGENT_ORCHESTRATOR:")
    print("=" * 55)
    
    # Analyze current orchestrator structure
    orchestrator_methods = [method for method in dir(orchestrator) if not method.startswith('__')]
    
    workflow_related = {
        'routing': [m for m in orchestrator_methods if 'workflow' in m or 'determine' in m],
        'nodes': [m for m in orchestrator_methods if m.endswith('_node')],
        'decisions': [m for m in orchestrator_methods if m.endswith('_decision')],
        'execution': [m for m in orchestrator_methods if 'execute' in m or 'build' in m]
    }
    
    for category, methods in workflow_related.items():
        print(f"\n📋 {category.upper()} METHODS:")
        for method in methods:
            print(f"   • {method}")
    
    print(f"\n💡 EXTRACTION BENEFITS:")
    print("=" * 22)
    print("✅ Pros:")
    print("   • Clearer separation of concerns")
    print("   • Easier workflow modification without touching orchestrator")
    print("   • Better version control for workflow changes")
    print("   • Easier testing of workflow definitions")
    print("   • Support for multiple workflow variants")
    print("   • Configuration-driven workflow management")
    
    print(f"\n⚠️ EXTRACTION CHALLENGES:")
    print("=" * 25)
    print("❌ Cons:")
    print("   • Additional abstraction layer")
    print("   • Need to maintain consistency between definitions and implementation")
    print("   • Potential performance overhead")
    print("   • More complex debugging")
    
    print(f"\n🏗️ PROPOSED ARCHITECTURE:")
    print("=" * 24)
    
    proposed_structure = {
        "graph/workflow_definitions.py": [
            "WorkflowDefinition class",
            "Standard workflow templates",
            "Node configuration",
            "Edge configuration", 
            "Decision logic definitions"
        ],
        "graph/workflow_builder.py": [
            "WorkflowBuilder class",
            "Dynamic workflow construction",
            "Validation logic",
            "LangGraph integration"
        ],
        "graph/agent_orchestrator.py": [
            "Agent coordination",
            "Node implementations", 
            "State management",
            "Error handling"
        ]
    }
    
    for file_path, components in proposed_structure.items():
        print(f"\n📄 {file_path}:")
        for component in components:
            print(f"   • {component}")
    
    print(f"\n📝 EXAMPLE WORKFLOW DEFINITION STRUCTURE:")
    print("=" * 40)
    
    example_definition = {
        "strategy_request": {
            "name": "Strategy Request Workflow",
            "description": "Complete provider/service access strategy workflow",
            "nodes": [
                {"name": "security_check", "agent": "prompt_security", "required": True},
                {"name": "navigator_analysis", "agent": "patient_navigator", "required": True},
                {"name": "task_requirements", "agent": "task_requirements", "required": True},
                {"name": "service_strategy", "agent": "service_access_strategy", "required": False},
                {"name": "regulatory_check", "agent": "regulatory", "required": False},
                {"name": "chat_response", "agent": "chat_communicator", "required": True}
            ],
            "edges": [
                {"from": "security_check", "to": "navigator_analysis"},
                {"from": "navigator_analysis", "to": "task_requirements"},
                {"from": "task_requirements", "to": "service_strategy", "condition": "sufficient_info"},
                {"from": "task_requirements", "to": "chat_response", "condition": "insufficient_info"},
                {"from": "service_strategy", "to": "regulatory_check"},
                {"from": "regulatory_check", "to": "chat_response"}
            ],
            "decisions": {
                "task_requirements_decision": {
                    "conditions": {
                        "insufficient_info": "state.get('task_result', {}).get('status') == 'insufficient_information'",
                        "continue": "True"  # Default
                    }
                }
            }
        }
    }
    
    print("```python")
    import json
    print(json.dumps(example_definition, indent=2))
    print("```")
    
    print(f"\n🎯 RECOMMENDATION:")
    print("=" * 17)
    print("📊 ASSESSMENT: **EXTRACT WORKFLOWS** - Benefits outweigh complexity")
    print(f"\n🔧 IMPLEMENTATION APPROACH:")
    print("1. Create graph/workflow_definitions.py with workflow configs")
    print("2. Create graph/workflow_builder.py for dynamic construction") 
    print("3. Refactor agent_orchestrator.py to use workflow definitions")
    print("4. Add validation and testing for workflow definitions")
    print("5. Migrate existing workflows to new structure")
    
    print(f"\n📋 IMMEDIATE NEXT STEPS:")
    print("1. Create workflow_definitions.py with current workflows")
    print("2. Test workflow loading and validation")
    print("3. Implement WorkflowBuilder class")
    print("4. Update agent_orchestrator to use new system")
    print("5. Add configuration management for workflow variants")
    
    return {
        "recommendation": "extract_workflows",
        "proposed_files": list(proposed_structure.keys()),
        "current_methods": workflow_related,
        "example_definition": example_definition
    }

# Run workflow definition analysis
workflow_analysis = analyze_workflow_definition_extraction()


## Complete Workflow Testing

Test the full orchestrator workflow with different scenarios.


In [None]:
# Test complete workflows
async def test_complete_workflow(scenario_name, message):
    """Test the complete orchestrator workflow"""
    print_section_header(f"Testing Workflow: {scenario_name}", "🔄")
    print(f"📝 Message: {message}")
    
    start_time = time.time()
    
    try:
        # Process message through orchestrator
        result = await orchestrator.process_message(
            message=message,
            user_id=TEST_USER_ID,
            conversation_id=f"test_{scenario_name}_{int(time.time())}"
        )
        
        end_time = time.time()
        execution_time = end_time - start_time
        
        # Display results
        print(f"\n⏱️ Total execution time: {execution_time:.3f}s")
        print(f"🔄 Workflow type: {result.get('workflow_type', 'N/A')}")
        print(f"💬 Response length: {len(result.get('text', ''))} characters")
        
        # Display response (truncated)
        response_text = result.get('text', '')
        if len(response_text) > 300:
            print(f"📄 Response preview: {response_text[:300]}...")
        else:
            print(f"📄 Response: {response_text}")
        
        # Display metadata summary
        metadata = result.get('metadata', {})
        print(f"\n📊 Metadata summary:")
        for key, value in metadata.items():
            if isinstance(value, dict):
                print(f"   {key}: {len(value)} items")
            else:
                print(f"   {key}: {value}")
        
        # Record performance
        success = 'error' not in result
        record_performance(
            f"workflow_{scenario_name}", 
            start_time, 
            end_time, 
            success,
            {
                'workflow_type': result.get('workflow_type'),
                'response_length': len(result.get('text', '')),
                'metadata_keys': list(metadata.keys())
            }
        )
        
        print(f"✅ Workflow completed successfully: {success}")
        return result
        
    except Exception as e:
        end_time = time.time()
        print(f"❌ Workflow failed: {e}")
        record_performance(f"workflow_{scenario_name}", start_time, end_time, False)
        import traceback
        traceback.print_exc()
        return None

# Test all scenarios
async def test_all_scenarios():
    """Test all predefined scenarios"""
    print_section_header("Testing All Scenarios", "🎯")
    
    results = {}
    for scenario_name, message in TEST_SCENARIOS.items():
        result = await test_complete_workflow(scenario_name, message)
        results[scenario_name] = result
        print("\n" + "="*60 + "\n")
    
    return results

# Run all scenario tests
scenario_results = await test_all_scenarios()


## Performance Analysis and Visualization


In [None]:
# Analyze performance data
def visualize_performance():
    """Create visualizations of performance data"""
    print_section_header("Performance Analysis", "📊")
    
    if not performance_data:
        print("❌ No performance data available")
        return
    
    # Convert to DataFrame
    df = pd.DataFrame(performance_data)
    
    # Overall statistics
    print("📈 Overall Performance Statistics:")
    analysis = analyze_workflow_performance(performance_data)
    for key, value in analysis.items():
        print(f"   {key}: {value}")
    
    # Create visualizations
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    fig.suptitle('Agent Orchestrator Performance Analysis', fontsize=16)
    
    # 1. Success rate by test
    success_by_test = df.groupby('test_name')['success'].mean()
    axes[0, 0].bar(success_by_test.index, success_by_test.values)
    axes[0, 0].set_title('Success Rate by Test')
    axes[0, 0].set_ylabel('Success Rate')
    axes[0, 0].tick_params(axis='x', rotation=45)
    
    # 2. Execution time by test
    time_by_test = df.groupby('test_name')['duration_seconds'].mean()
    axes[0, 1].bar(time_by_test.index, time_by_test.values)
    axes[0, 1].set_title('Average Execution Time by Test')
    axes[0, 1].set_ylabel('Time (seconds)')
    axes[0, 1].tick_params(axis='x', rotation=45)
    
    # 3. Timeline of execution times
    df['timestamp_formatted'] = pd.to_datetime(df['timestamp'])
    axes[1, 0].plot(df['timestamp_formatted'], df['duration_seconds'], 'o-')
    axes[1, 0].set_title('Execution Time Timeline')
    axes[1, 0].set_ylabel('Time (seconds)')
    axes[1, 0].tick_params(axis='x', rotation=45)
    
    # 4. Success/failure distribution
    success_counts = df['success'].value_counts()
    axes[1, 1].pie(success_counts.values, labels=['Success', 'Failure'], autopct='%1.1f%%')
    axes[1, 1].set_title('Success/Failure Distribution')
    
    plt.tight_layout()
    plt.show()
    
    # Display detailed performance table
    print("\n📋 Detailed Performance Data:")
    display_df = df[['test_name', 'duration_seconds', 'success', 'timestamp']].copy()
    display_df['timestamp'] = display_df['timestamp'].dt.strftime('%H:%M:%S')
    display_df = display_df.round({'duration_seconds': 3})
    display(display_df)

# Run performance analysis
visualize_performance()


## Step-by-Step Workflow Debugging

Walk through a workflow step-by-step to understand each agent's contribution.


In [None]:
# Step-by-step workflow debugging
async def debug_workflow_step_by_step(message="I need to find a cardiologist in Seattle"):
    """Debug a workflow by executing each step manually with real agent calls"""
    print_section_header("Step-by-Step Workflow Debug", "🔍")
    print(f"📝 Message: {message}")
    
    # Initialize state manually
    state = {
        "message": message,
        "user_id": TEST_USER_ID,
        "conversation_id": f"debug_{int(time.time())}",
        "intent": "",
        "workflow_type": "",
        "response_text": "",
        "metadata": {},
        "security_check_passed": False,
        "error": None
    }
    
    # Determine workflow type
    workflow_type = orchestrator._determine_workflow_type(message)
    state["workflow_type"] = workflow_type
    print(f"🔄 Determined workflow type: {workflow_type}")
    
    # Step 1: Security Check
    print("\n" + "="*50)
    print("1️⃣ SECURITY CHECK")
    print("="*50)
    start_time = time.time()
    state = await orchestrator._security_check_node(state)
    execution_time = time.time() - start_time
    print(f"⏱️ Execution time: {execution_time:.3f}s")
    print(f"✅ Security passed: {state.get('security_check_passed', False)}")
    if state.get('error'):
        print(f"❌ Error: {state['error']}")
        return state
    
    if workflow_type == "strategy_request":
        # Step 2: Navigator Analysis
        print("\n" + "="*50)
        print("2️⃣ NAVIGATOR ANALYSIS")
        print("="*50)
        start_time = time.time()
        state = await orchestrator._navigator_analysis_node(state)
        execution_time = time.time() - start_time
        print(f"⏱️ Execution time: {execution_time:.3f}s")
        print(f"🎯 Intent: {state.get('intent', 'N/A')}")
        
        # Display detailed navigator results
        navigator_result = state.get('navigator_result', {})
        if navigator_result:
            meta_intent = navigator_result.get('meta_intent', {})
            print(f"📊 Request type: {meta_intent.get('request_type', 'N/A')}")
            print(f"📍 Location: {meta_intent.get('location', 'Not specified')}")
            print(f"🏥 Insurance: {meta_intent.get('insurance', 'Not specified')}")
            print(f"🚨 Emergency: {meta_intent.get('emergency', False)}")
        
        navigator_metadata = state.get('metadata', {}).get('navigator_analysis', {})
        print(f"🧭 Navigator metadata: {navigator_metadata}")
        
        # Step 3: Task Requirements
        print("\n" + "="*50)
        print("3️⃣ TASK REQUIREMENTS")
        print("="*50)
        start_time = time.time()
        state = await orchestrator._task_requirements_node(state)
        execution_time = time.time() - start_time
        print(f"⏱️ Execution time: {execution_time:.3f}s")
        
        task_metadata = state.get('metadata', {}).get('task_requirements', {})
        task_result = state.get('task_requirements_result', {})
        print(f"📋 Requirements identified: {task_result.get('requirements_count', 0)}")
        print(f"📄 Documents needed: {task_result.get('documents_needed', [])}")
        print(f"📊 Status: {task_result.get('status', 'N/A')}")
        
        missing_context = task_result.get('missing_context', [])
        if missing_context:
            print(f"❌ Missing context: {missing_context}")
        
        # Decision point
        decision = orchestrator._task_requirements_decision(state)
        print(f"🤔 Task requirements decision: {decision}")
        
        if decision == "continue":
            # Step 4: Service Strategy
            print("\n" + "="*50)
            print("4️⃣ SERVICE STRATEGY")
            print("="*50)
            start_time = time.time()
            state = await orchestrator._service_strategy_node(state)
            execution_time = time.time() - start_time
            print(f"⏱️ Execution time: {execution_time:.3f}s")
            
            strategy_result = state.get('strategy_result', {})
            print(f"🎯 Recommended service: {strategy_result.get('recommended_service', 'N/A')}")
            print(f"⏱️ Estimated timeline: {strategy_result.get('estimated_timeline', 'N/A')}")
            print(f"🎖️ Confidence: {strategy_result.get('confidence', 'N/A')}")
            
            action_plan = strategy_result.get('action_plan', [])
            if action_plan:
                print(f"📋 Action plan ({len(action_plan)} steps):")
                for i, step in enumerate(action_plan[:3], 1):  # Show first 3 steps
                    step_desc = step.get('step_description', f'Step {i}')
                    timeline = step.get('expected_timeline', 'N/A')
                    print(f"   {i}. {step_desc} (Timeline: {timeline})")
                if len(action_plan) > 3:
                    print(f"   ... and {len(action_plan) - 3} more steps")
            
            # Step 5: Regulatory Check
            print("\n" + "="*50)
            print("5️⃣ REGULATORY CHECK")
            print("="*50)
            start_time = time.time()
            state = await orchestrator._regulatory_check_node(state)
            execution_time = time.time() - start_time
            print(f"⏱️ Execution time: {execution_time:.3f}s")
            regulatory_metadata = state.get('metadata', {}).get('regulatory_check', {})
            print(f"⚖️ Compliance status: {regulatory_metadata.get('compliance_status', 'N/A')}")
            print(f"📊 Regulations checked: {regulatory_metadata.get('regulations_checked', 0)}")
        else:
            print(f"⏭️ Skipping strategy and regulatory steps due to: {decision}")
    
    else:
        # Navigator Q&A workflow
        print("\n" + "="*50)
        print("2️⃣ NAVIGATOR Q&A")
        print("="*50)
        start_time = time.time()
        state = await orchestrator._navigator_qa_node(state)
        execution_time = time.time() - start_time
        print(f"⏱️ Execution time: {execution_time:.3f}s")
        qa_metadata = state.get('metadata', {}).get('navigator_qa', {})
        print(f"❓ Question type: {qa_metadata.get('question_type', 'N/A')}")
        print(f"🎖️ Confidence: {qa_metadata.get('confidence', 'N/A')}")
    
    # Final step: Chat Response
    print("\n" + "="*50)
    print("🏁 FINAL CHAT RESPONSE")
    print("="*50)
    start_time = time.time()
    state = await orchestrator._chat_response_node(state)
    execution_time = time.time() - start_time
    print(f"⏱️ Execution time: {execution_time:.3f}s")
    
    response_text = state.get('response_text', '')
    print(f"💬 Response length: {len(response_text)} characters")
    
    chat_metadata = state.get('metadata', {}).get('chat_response', {})
    print(f"💬 Response type: {chat_metadata.get('response_type', 'N/A')}")
    print(f"🎖️ Response confidence: {chat_metadata.get('confidence', 'N/A')}")
    print(f"📊 Has strategy data: {chat_metadata.get('has_strategy_data', False)}")
    
    if len(response_text) > 300:
        print(f"📄 Response preview: {response_text[:300]}...")
        print("\n📄 Full response:")
        print(response_text)
    else:
        print(f"📄 Full response: {response_text}")
    
    print("\n✅ Step-by-step debug complete")
    print(f"\n📊 FINAL STATE SUMMARY:")
    print(f"   • Workflow type: {state.get('workflow_type')}")
    print(f"   • Intent: {state.get('intent')}")
    print(f"   • Security passed: {state.get('security_check_passed')}")
    print(f"   • Error: {state.get('error') or 'None'}")
    print(f"   • Response generated: {'Yes' if response_text else 'No'}")
    
    return state

# Run step-by-step debug
debug_result = await debug_workflow_step_by_step()


## Interactive Testing

Use this section to test custom messages and explore agent behavior interactively.


In [None]:
# Interactive testing with real agent workflows
CUSTOM_MESSAGE = "I need help finding a dermatologist in Portland, Oregon. I have Aetna insurance."

async def interactive_test_real(message=None):
    """Interactive test function using real agent workflows"""
    test_message = message or CUSTOM_MESSAGE
    print_section_header("Interactive Real Agent Test", "🎮")
    print(f"📝 Testing message: {test_message}")
    
    # Quick test options
    quick_tests = [
        "I need urgent care in downtown Seattle",
        "How do I apply for disability benefits?", 
        "Find me a dermatologist accepting new patients",
        "What's covered under my Medicare plan?",
        "I need help filling out insurance forms"
    ]
    
    print("\n🚀 Or choose a quick test:")
    for i, test in enumerate(quick_tests, 1):
        print(f"   {i}. {test}")
    print("   (Use: await quick_test_real(number) to run)")
    
    # Run both current and new workflow analysis
    print("\n" + "="*60)
    print("CURRENT WORKFLOW ANALYSIS")
    print("="*60)
    
    workflow_type = orchestrator._determine_workflow_type(test_message)
    print(f"🔄 Determined workflow type: {workflow_type}")
    
    # Run complete current workflow  
    start_time = time.time()
    current_result = await test_complete_workflow("interactive", test_message)
    current_time = time.time() - start_time
    print(f"⏱️ Current workflow time: {current_time:.3f}s")
    
    # Run new prototype workflow
    print("\n" + "="*60)
    print("NEW PROTOTYPE WORKFLOW") 
    print("="*60)
    
    start_time = time.time()
    prototype_result = await prototype_new_workflow_real(test_message)
    prototype_time = time.time() - start_time
    print(f"⏱️ Prototype workflow time: {prototype_time:.3f}s")
    
    # Comparison
    print("\n" + "="*60)
    print("WORKFLOW COMPARISON")
    print("="*60)
    
    print(f"📊 Current approach: {workflow_type}")
    print(f"🆕 New approach: {prototype_result['prescribed_workflows']}")
    print(f"⏱️ Time comparison: Current {current_time:.3f}s vs New {prototype_time:.3f}s")
    
    doc_analysis = prototype_result['document_analysis']
    sufficient = doc_analysis['sufficient_documents']
    print(f"📄 Information sufficiency: {'✅ Complete' if sufficient else '❌ Needs clarification'}")
    
    if not sufficient:
        missing = doc_analysis.get('missing_context', [])
        print(f"📝 Missing information: {', '.join(missing)}")
    
    print(f"🎯 Next action: {prototype_result['next_action']}")
    
    return {
        'current_result': current_result,
        'prototype_result': prototype_result,
        'comparison': {
            'current_time': current_time,
            'prototype_time': prototype_time,
            'workflow_type': workflow_type,
            'prescribed_workflows': prototype_result['prescribed_workflows'],
            'sufficient_info': sufficient
        }
    }

async def quick_test_real(test_number):
    """Run a quick test with predefined messages"""
    quick_tests = [
        "I need urgent care in downtown Seattle",
        "How do I apply for disability benefits?", 
        "Find me a dermatologist accepting new patients",
        "What's covered under my Medicare plan?",
        "I need help filling out insurance forms"
    ]
    
    if 1 <= test_number <= len(quick_tests):
        message = quick_tests[test_number - 1]
        print(f"\n🎯 Running Quick Test {test_number}: {message}")
        return await interactive_test_real(message)
    else:
        print(f"❌ Invalid test number. Choose 1-{len(quick_tests)}")
        return None

# Helper function to test any custom message
async def test_any_message(message):
    """Test any custom message with both workflows"""
    return await interactive_test_real(message)

# Run interactive test with default message
print("🧪 Running Interactive Test with Real Agent Workflows")
interactive_result = await interactive_test_real()

print(f"\n💡 How to test more scenarios:")
print(f"   • await interactive_test_real('your custom message')")
print(f"   • await quick_test_real(1)  # Quick predefined tests")
print(f"   • await test_any_message('any message')  # Direct testing")


In [None]:
# Debug the interactive test step-by-step
await debug_workflow_step_by_step(CUSTOM_MESSAGE)


## Workflow Type Analysis

Analyze how different message types are classified and routed.


In [None]:
# Analyze workflow routing
def analyze_workflow_routing():
    """Analyze how different messages are routed to workflows"""
    print_section_header("Workflow Routing Analysis", "🧭")
    
    # Test messages for classification
    test_messages = {
        "Strategy Requests": [
            "I need to find a doctor",
            "Help me locate a cardiologist", 
            "Where can I get an X-ray?",
            "I need a specialist in Seattle",
            "Find me a dentist with my insurance"
        ],
        "Q&A Requests": [
            "What is Medicare?",
            "Explain copays",
            "What's the difference between Part A and Part B?",
            "Define deductible",
            "How does Medicare Advantage work?"
        ],
        "Ambiguous": [
            "I need help",
            "Tell me about insurance",
            "I have a question",
            "Can you assist me?",
            "What should I do?"
        ]
    }
    
    routing_results = {}
    
    for category, messages in test_messages.items():
        print(f"\n📋 {category.upper()}")
        print("-" * (len(category) + 5))
        
        category_results = []
        for message in messages:
            workflow_type = orchestrator._determine_workflow_type(message)
            category_results.append(workflow_type)
            print(f"   '{message}' → {workflow_type}")
        
        routing_results[category] = category_results
    
    # Summary statistics
    print(f"\n📊 ROUTING SUMMARY")
    print("=" * 20)
    
    for category, results in routing_results.items():
        strategy_count = results.count("strategy_request")
        navigator_count = results.count("navigator_only")
        total = len(results)
        
        print(f"{category}:")
        print(f"  Strategy requests: {strategy_count}/{total} ({strategy_count/total*100:.1f}%)")
        print(f"  Navigator only: {navigator_count}/{total} ({navigator_count/total*100:.1f}%)")
    
    return routing_results

# Run routing analysis
routing_analysis = analyze_workflow_routing()


## Prototype New Workflows

Use this section to prototype the new supervisor-based architecture discussed in our conversation.


In [None]:
# Prototype new supervisor-based workflow using real agent outputs
async def analyze_intent_with_real_navigator(message: str):
    """Use real PatientNavigator to analyze intent and determine workflows"""
    try:
        # Use real navigator analysis
        navigator_result = await orchestrator.patient_navigator_agent.analyze_request(
            message, f"prototype_{int(time.time())}"
        )
        
        analysis = navigator_result.analysis_details
        meta_intent = analysis.get("meta_intent", {})
        service_intent = analysis.get("service_intent", {})
        
        # Determine workflows based on real navigator output
        workflows = []
        
        request_type = meta_intent.get("request_type", "")
        specialty = service_intent.get("specialty", "")
        
        # Map request types to workflows
        if request_type in ["find_provider", "locate_service"]:
            workflows.extend(["retrieval", "strategy"])
        elif request_type in ["coverage_question", "policy_question"]:
            workflows.append("retrieval")
        elif "eligibility" in request_type or "qualify" in message.lower():
            workflows.extend(["retrieval", "eligibility"])
        elif "form" in message.lower() or "application" in message.lower():
            workflows.extend(["eligibility", "forms"])
        
        # Always include retrieval as baseline
        if not workflows:
            workflows = ["retrieval"]
        
        # Remove duplicates while preserving order
        seen = set()
        workflows = [w for w in workflows if not (w in seen or seen.add(w))]
        
        return {
            "workflows": workflows,
            "navigator_analysis": analysis,
            "confidence": navigator_result.confidence_score,
            "intent_type": navigator_result.intent_type
        }
        
    except Exception as e:
        print(f"❌ Error in intent analysis: {e}")
        return {
            "workflows": ["retrieval"],
            "navigator_analysis": {},
            "confidence": 0.5,
            "intent_type": "unknown"
        }

async def analyze_document_requirements(message: str, workflows: list, navigator_analysis: dict):
    """Use real TaskRequirements agent to determine document needs"""
    try:
        # Use real task requirements analysis
        task_result = await orchestrator.task_requirements_agent.analyze_requirements_structured(
            navigator_analysis, message
        )
        
        # Extract document requirements
        documents_needed = task_result.documents_needed
        missing_context = getattr(task_result, 'missing_context', [])
        status = getattr(task_result, 'status', 'complete')
        
        # Determine if we have sufficient information
        sufficient = status != "insufficient_information" and len(missing_context) == 0
        
        return {
            "required_documents": documents_needed,
            "sufficient_documents": sufficient,
            "missing_context": missing_context,
            "status": status,
            "requirements_count": task_result.requirements_count
        }
        
    except Exception as e:
        print(f"❌ Error in document analysis: {e}")
        return {
            "required_documents": ["insurance_card", "location"],
            "sufficient_documents": False,
            "missing_context": ["insurance_information", "location_details"],
            "status": "error",
            "requirements_count": 0
        }

async def prototype_new_workflow_real(message: str):
    """Prototype the new supervisor-based workflow using real agents"""
    print_section_header("Real Agent Workflow Prototype", "🆕")
    print(f"📝 Message: {message}")
    
    # Step 1: Intent Analysis with Real Navigator
    print("\n1️⃣ INTENT ANALYSIS (Real Navigator)")
    print("-" * 40)
    start_time = time.time()
    intent_result = await analyze_intent_with_real_navigator(message)
    execution_time = time.time() - start_time
    
    prescribed_workflows = intent_result["workflows"]
    navigator_analysis = intent_result["navigator_analysis"]
    
    print(f"⏱️ Execution time: {execution_time:.3f}s")
    print(f"🎯 Intent type: {intent_result['intent_type']}")
    print(f"🎖️ Confidence: {intent_result['confidence']:.2f}")
    print(f"🔄 Prescribed workflows: {prescribed_workflows}")
    
    # Display navigator insights
    meta_intent = navigator_analysis.get("meta_intent", {})
    if meta_intent:
        print(f"📊 Request type: {meta_intent.get('request_type', 'N/A')}")
        print(f"📍 Location detected: {meta_intent.get('location', 'Not specified')}")
        print(f"🏥 Insurance detected: {meta_intent.get('insurance', 'Not specified')}")
    
    # Step 2: Document Analysis with Real Task Requirements
    print("\n2️⃣ DOCUMENT ANALYSIS (Real Task Requirements)")
    print("-" * 45)
    start_time = time.time()
    doc_analysis = await analyze_document_requirements(message, prescribed_workflows, navigator_analysis)
    execution_time = time.time() - start_time
    
    print(f"⏱️ Execution time: {execution_time:.3f}s")
    print(f"📄 Required documents: {doc_analysis['required_documents']}")
    print(f"📊 Requirements count: {doc_analysis['requirements_count']}")
    print(f"📋 Status: {doc_analysis['status']}")
    print(f"✅ Sufficient information: {doc_analysis['sufficient_documents']}")
    
    missing_context = doc_analysis['missing_context']
    if missing_context:
        print(f"❌ Missing context: {missing_context}")
    
    # Step 3: Workflow Routing Decision
    print("\n3️⃣ WORKFLOW ROUTING DECISION")
    print("-" * 30)
    if not doc_analysis['sufficient_documents']:
        print("🚫 Insufficient information - requesting more details")
        next_action = "request_information"
        print(f"📝 Missing information: {', '.join(missing_context)}")
    else:
        print("✅ Sufficient information - proceeding with workflows")
        next_action = f"execute_workflows: {prescribed_workflows}"
    
    print(f"📋 Next action: {next_action}")
    
    # Step 4: Workflow Execution Plan
    print("\n4️⃣ WORKFLOW EXECUTION PLAN")
    print("-" * 30)
    
    if next_action.startswith("execute_workflows"):
        for i, workflow in enumerate(prescribed_workflows, 1):
            print(f"   {i}. 🔄 {workflow.upper()} WORKFLOW")
            if workflow == "retrieval":
                print("     📊 Information Retrieval Agent")
                print("     🔍 Knowledge base search")
                print("     🌐 External data sources")
            elif workflow == "strategy":
                print("     🎯 Service Access Strategy Agent")
                print("     ⚖️ Regulatory Compliance Agent")
                print("     💰 Cost estimation")
            elif workflow == "eligibility":
                print("     📋 Eligibility Parsing Agent")
                print("     🤖 ML Application Evaluation Model")
                print("     ✅ Eligibility verification")
            elif workflow == "forms":
                print("     📝 Form Preparation Agent")
                print("     📤 Submission Planning Agent")
                print("     ✍️ Form validation")
    else:
        print("   📨 Generate information request message")
        print(f"   🎯 Focus on: {', '.join(missing_context)}")
    
    # Step 5: Expected Outcomes
    print("\n5️⃣ EXPECTED OUTCOMES")
    print("-" * 20)
    if next_action.startswith("execute_workflows"):
        if "retrieval" in prescribed_workflows:
            print("   📚 Comprehensive information gathered")
        if "strategy" in prescribed_workflows:
            print("   🗺️ Detailed access strategy with action steps")
            print("   ⚖️ Regulatory compliance validation")
        if "eligibility" in prescribed_workflows:
            print("   📊 Eligibility assessment with ML scoring")
        if "forms" in prescribed_workflows:
            print("   📋 Pre-filled forms and submission timeline")
    else:
        print("   ❓ Clarifying questions for user")
        print("   🔄 Workflow restart with complete information")
    
    return {
        "prescribed_workflows": prescribed_workflows,
        "document_analysis": doc_analysis,
        "intent_analysis": intent_result,
        "next_action": next_action,
        "navigator_analysis": navigator_analysis
    }

# Test the real agent workflow prototype
test_messages = [
    "I need to find a cardiologist in Seattle with my Aetna insurance",
    "What forms do I need to apply for Medicare benefits?", 
    "Am I eligible for Medicare Advantage?",
    "Where can I get an X-ray covered by my insurance?",
    "I need a doctor"  # Test insufficient information case
]

print("🧪 Testing Real Agent Workflow Prototypes")
print("=" * 50)

real_prototype_results = {}
for i, msg in enumerate(test_messages):
    print(f"\n{'='*80}")
    print(f"TEST {i+1}: {msg}")
    print('='*80)
    result = await prototype_new_workflow_real(msg)
    real_prototype_results[f"test_{i+1}"] = result

print("\n🎉 Real agent workflow prototyping complete!")
print("\n📊 SUMMARY:")
for i, (test_key, result) in enumerate(real_prototype_results.items(), 1):
    workflows = result["prescribed_workflows"]
    sufficient = result["document_analysis"]["sufficient_documents"]
    action = result["next_action"]
    print(f"{i}. {workflows} → {'Proceed' if sufficient else 'Request info'} ({action.split(':')[0]})")


## Summary and Next Steps

View summary of all testing and plan next development steps.


In [None]:
# Final summary and recommendations
def generate_summary():
    """Generate a comprehensive summary of all testing"""
    print_section_header("Testing Summary & Recommendations", "📋")
    
    print("🔍 CURRENT ARCHITECTURE ANALYSIS")
    print("=" * 35)
    print("✅ Strengths:")
    print("   • Security-first approach with prompt validation")
    print("   • Clear workflow separation (strategy vs navigator)")
    print("   • Comprehensive agent coverage")
    print("   • Good error handling and state management")
    
    print("\n⚠️ Areas for Improvement:")
    print("   • Rigid workflow determination based on keywords")
    print("   • Limited document requirement analysis")
    print("   • Sequential agent execution (could be parallel)")
    print("   • PatientNavigator doing too many responsibilities")
    
    if performance_data:
        print(f"\n📊 PERFORMANCE METRICS")
        print("=" * 22)
        analysis = analyze_workflow_performance(performance_data)
        print(f"   • Total tests run: {analysis['total_tests']}")
        print(f"   • Overall success rate: {analysis['success_rate']:.1f}%")
        print(f"   • Average execution time: {analysis['avg_duration']:.3f}s")
        print(f"   • Slowest execution: {analysis['max_duration']:.3f}s")
        print(f"   • Fastest execution: {analysis['min_duration']:.3f}s")
    
    print(f"\n🆕 NEW ARCHITECTURE BENEFITS")
    print("=" * 30)
    print("✅ Supervisor Team Approach:")
    print("   • Intent analysis separated from workflow execution")
    print("   • Document requirements determined upfront")
    print("   • Flexible workflow routing based on needs")
    print("   • Parallel workflow execution capability")
    print("   • Clearer separation of concerns")
    
    print(f"\n📋 RECOMMENDED IMPLEMENTATION PLAN")
    print("=" * 37)
    print("Phase 1 - Supervisor Agents (Week 1-2):")
    print("   1. Create WorkflowPrescriptionAgent")
    print("   2. Create DocumentRequirementAgent") 
    print("   3. Implement basic workflow routing logic")
    print("   4. Add placeholder agents for eligibility/forms")
    
    print("\nPhase 2 - Orchestrator Refactor (Week 3):")
    print("   1. Replace PatientNavigator with supervisor team")
    print("   2. Implement new workflow graph structure")
    print("   3. Add conditional workflow execution")
    print("   4. Update state management")
    
    print("\nPhase 3 - Enhanced Workflows (Week 4-5):")
    print("   1. Implement parallel workflow execution")
    print("   2. Add workflow synthesis step")
    print("   3. Implement eligibility and forms workflows")
    print("   4. Add ML model integration for application evaluation")
    
    print("\nPhase 4 - Testing & Optimization (Week 6):")
    print("   1. Comprehensive testing with this notebook")
    print("   2. Performance optimization")
    print("   3. User acceptance testing")
    print("   4. Production deployment")
    
    print(f"\n🛠️ IMMEDIATE NEXT STEPS")
    print("=" * 24)
    print("1. Use this notebook to test current performance baseline")
    print("2. Create agent skeletons in agents/workflow_prescription/")
    print("3. Create agent skeletons in agents/document_requirement/")
    print("4. Update agents/__init__.py to include new agents")
    print("5. Begin implementing WorkflowPrescriptionAgent.prescribe_workflows()")
    
    print(f"\n🎯 SUCCESS CRITERIA")
    print("=" * 18)
    print("• ≥95% success rate for all workflow types")
    print("• <2s average response time for simple queries")
    print("• <5s average response time for complex workflows")
    print("• Clear document requirement communication")
    print("• Accurate workflow prescription (≥90% precision)")
    
generate_summary()
