# Agent Orchestrator Prototype Notebook

**Purpose**: Fast prototyping and iterative development of agent workflows

This notebook allows you to:
- Test individual agents in isolation
- Walk through complete workflows step-by-step 
- Measure performance and analyze results
- Prototype new workflow architectures
- Debug agent interactions

Based on the principles from:
- [How to refactor a Jupyter notebook](https://medium.com/data-science/how-to-refactor-a-jupyter-notebook-ed531b6a17)
- [Prototyping with Python](https://www.fuzzingbook.org/beta/html/PrototypingWithPython.html)


## Setup and Imports


In [None]:
import sys
import os
import asyncio
import json
import time
from datetime import datetime
from typing import Dict, Any, List, Optional
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display, HTML, Markdown
import warnings
warnings.filterwarnings('ignore')

# Add project root to path
project_root = os.path.abspath('.')
if project_root not in sys.path:
    sys.path.insert(0, project_root)

print(f"📁 Project root: {project_root}")
print(f"🐍 Python path updated")


## Import Project Components


In [None]:
# Import orchestrator and agents
try:
    from graph.agent_orchestrator import AgentOrchestrator, get_orchestrator, reset_orchestrator
    from agents import (
        PromptSecurityAgent,
        PatientNavigatorAgent, 
        TaskRequirementsAgent,
        ServiceAccessStrategyAgent,
        ChatCommunicatorAgent,
        RegulatoryAgent
    )
    from utils.config_manager import ConfigManager
    print("✅ All imports successful")
except ImportError as e:
    print(f"❌ Import error: {e}")
    print("Please ensure you're running from the project root directory")


## Configuration and Global Variables


In [None]:
# Global configuration
BYPASS_SECURITY = True  # Set to False for production testing
TEST_USER_ID = "prototype_user_001"
DEFAULT_CONVERSATION_ID = "prototype_conv_001"

# Performance tracking
performance_data = []
test_results = []

# Test scenarios
TEST_SCENARIOS = {
    "simple_qa": "What does Medicare cover?",
    "doctor_search": "I need to find a cardiologist in Seattle with Blue Cross insurance",
    "incomplete_info": "I need a doctor",
    "emergency": "I'm having chest pain and need immediate care",
    "complex_request": "I need an X-ray for my back pain, I have Medicare Part B, live in Portland Oregon"
}

print(f"🔧 Configuration loaded")
print(f"🛡️ Security bypass: {BYPASS_SECURITY}")
print(f"👤 Test user ID: {TEST_USER_ID}")
print(f"💬 Test scenarios: {len(TEST_SCENARIOS)} loaded")


## Utility Functions for Analysis


In [None]:
def format_timestamp(timestamp=None):
    """Format timestamp for display"""
    if timestamp is None:
        timestamp = datetime.now()
    return timestamp.strftime("%H:%M:%S")

def print_section_header(title, emoji="🔬"):
    """Print a formatted section header"""
    print(f"\n{emoji} {title}")
    print("=" * (len(title) + 3))

def record_performance(test_name, start_time, end_time, success, metadata=None):
    """Record performance data for analysis"""
    duration = end_time - start_time
    entry = {
        'test_name': test_name,
        'timestamp': datetime.now(),
        'duration_seconds': duration,
        'success': success,
        'metadata': metadata or {}
    }
    performance_data.append(entry)
    return entry

def display_agent_result(agent_name, result, execution_time=None):
    """Display agent result in a formatted way"""
    print(f"\n🤖 {agent_name.upper()}")
    print("-" * (len(agent_name) + 3))
    
    if execution_time:
        print(f"⏱️ Execution time: {execution_time:.3f}s")
    
    if isinstance(result, dict):
        for key, value in result.items():
            if key not in ['raw_response', 'full_output']:  # Skip verbose fields
                print(f"📊 {key}: {value}")
    else:
        print(f"📝 Result: {result}")

def analyze_workflow_performance(workflow_data):
    """Analyze workflow execution performance"""
    if not workflow_data:
        return "No performance data available"
    
    df = pd.DataFrame(workflow_data)
    
    analysis = {
        'total_tests': len(df),
        'success_rate': df['success'].mean() * 100,
        'avg_duration': df['duration_seconds'].mean(),
        'max_duration': df['duration_seconds'].max(),
        'min_duration': df['duration_seconds'].min()
    }
    
    return analysis

print("🛠️ Utility functions loaded")


In [None]:
# Reset and initialize orchestrator
print_section_header("Initializing Agent Orchestrator", "🚀")

try:
    # Reset any existing instance
    reset_orchestrator()
    print("🔄 Orchestrator reset")
    
    # Create new instance
    orchestrator = get_orchestrator(bypass_security=BYPASS_SECURITY)
    print("✅ Orchestrator initialized")
    print(f"🛡️ Security bypass: {orchestrator.bypass_security}")
    
    # Test basic functionality
    test_message = "Hello, test"
    workflow_type = orchestrator._determine_workflow_type(test_message)
    print(f"🧪 Test workflow determination: '{test_message}' → {workflow_type}")
    
except Exception as e:
    print(f"❌ Orchestrator initialization failed: {e}")
    import traceback
    traceback.print_exc()


In [None]:
# Test individual agents
async def test_individual_agents(test_message="I need to find a cardiologist"):
    """Test each agent individually"""
    print_section_header("Individual Agent Testing", "🧪")
    
    # Test data
    test_user_id = TEST_USER_ID
    test_conversation_id = f"test_{int(time.time())}"
    
    # 1. Test Prompt Security Agent
    print("\n1️⃣ Testing Prompt Security Agent")
    start_time = time.time()
    try:
        security_result = await orchestrator.prompt_security_agent.check_prompt_security(
            test_message, test_user_id
        )
        end_time = time.time()
        display_agent_result("Prompt Security", {
            "is_safe": security_result.is_safe,
            "risk_score": getattr(security_result, 'risk_score', 'N/A'),
            "issues": getattr(security_result, 'issues', [])
        }, end_time - start_time)
        record_performance("prompt_security", start_time, end_time, True)
    except Exception as e:
        print(f"❌ Error: {e}")
        record_performance("prompt_security", start_time, time.time(), False)
    
    # 2. Test Patient Navigator Agent
    print("\n2️⃣ Testing Patient Navigator Agent")
    start_time = time.time()
    try:
        navigator_result = await orchestrator.patient_navigator_agent.analyze_request(
            test_message, test_conversation_id
        )
        end_time = time.time()
        display_agent_result("Patient Navigator", {
            "intent_type": navigator_result.intent_type,
            "confidence_score": navigator_result.confidence_score,
            "analysis_summary": str(navigator_result.analysis_details)[:200] + "..."
        }, end_time - start_time)
        record_performance("patient_navigator", start_time, end_time, True)
    except Exception as e:
        print(f"❌ Error: {e}")
        record_performance("patient_navigator", start_time, time.time(), False)
    
    # 3. Test Task Requirements Agent
    print("\n3️⃣ Testing Task Requirements Agent")
    start_time = time.time()
    try:
        # Mock navigator result for task requirements
        mock_navigator_result = {
            "meta_intent": {"request_type": "find_provider", "location": "Seattle"},
            "service_intent": {"specialty": "cardiology"},
            "metadata": {"raw_user_text": test_message}
        }
        task_result = await orchestrator.task_requirements_agent.analyze_requirements_structured(
            mock_navigator_result, test_message
        )
        end_time = time.time()
        display_agent_result("Task Requirements", {
            "requirements_count": task_result.requirements_count,
            "documents_needed": task_result.documents_needed,
            "status": getattr(task_result, 'status', 'N/A')
        }, end_time - start_time)
        record_performance("task_requirements", start_time, end_time, True)
    except Exception as e:
        print(f"❌ Error: {e}")
        record_performance("task_requirements", start_time, time.time(), False)
    
    print("\n✅ Individual agent testing complete")

# Run the test
await test_individual_agents()


In [None]:
# Test complete workflows
async def test_complete_workflow(scenario_name, message):
    """Test the complete orchestrator workflow"""
    print_section_header(f"Testing Workflow: {scenario_name}", "🔄")
    print(f"📝 Message: {message}")
    
    start_time = time.time()
    
    try:
        # Process message through orchestrator
        result = await orchestrator.process_message(
            message=message,
            user_id=TEST_USER_ID,
            conversation_id=f"test_{scenario_name}_{int(time.time())}"
        )
        
        end_time = time.time()
        execution_time = end_time - start_time
        
        # Display results
        print(f"\n⏱️ Total execution time: {execution_time:.3f}s")
        print(f"🔄 Workflow type: {result.get('workflow_type', 'N/A')}")
        print(f"💬 Response length: {len(result.get('text', ''))} characters")
        
        # Display response (truncated)
        response_text = result.get('text', '')
        if len(response_text) > 300:
            print(f"📄 Response preview: {response_text[:300]}...")
        else:
            print(f"📄 Response: {response_text}")
        
        # Display metadata summary
        metadata = result.get('metadata', {})
        print(f"\n📊 Metadata summary:")
        for key, value in metadata.items():
            if isinstance(value, dict):
                print(f"   {key}: {len(value)} items")
            else:
                print(f"   {key}: {value}")
        
        # Record performance
        success = 'error' not in result
        record_performance(
            f"workflow_{scenario_name}", 
            start_time, 
            end_time, 
            success,
            {
                'workflow_type': result.get('workflow_type'),
                'response_length': len(result.get('text', '')),
                'metadata_keys': list(metadata.keys())
            }
        )
        
        print(f"✅ Workflow completed successfully: {success}")
        return result
        
    except Exception as e:
        end_time = time.time()
        print(f"❌ Workflow failed: {e}")
        record_performance(f"workflow_{scenario_name}", start_time, end_time, False)
        import traceback
        traceback.print_exc()
        return None

# Test all scenarios
async def test_all_scenarios():
    """Test all predefined scenarios"""
    print_section_header("Testing All Scenarios", "🎯")
    
    results = {}
    for scenario_name, message in TEST_SCENARIOS.items():
        result = await test_complete_workflow(scenario_name, message)
        results[scenario_name] = result
        print("\n" + "="*60 + "\n")
    
    return results

# Run all scenario tests
scenario_results = await test_all_scenarios()


In [None]:
# Analyze performance data
def visualize_performance():
    """Create visualizations of performance data"""
    print_section_header("Performance Analysis", "📊")
    
    if not performance_data:
        print("❌ No performance data available")
        return
    
    # Convert to DataFrame
    df = pd.DataFrame(performance_data)
    
    # Overall statistics
    print("📈 Overall Performance Statistics:")
    analysis = analyze_workflow_performance(performance_data)
    for key, value in analysis.items():
        print(f"   {key}: {value}")
    
    # Create visualizations
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    fig.suptitle('Agent Orchestrator Performance Analysis', fontsize=16)
    
    # 1. Success rate by test
    success_by_test = df.groupby('test_name')['success'].mean()
    axes[0, 0].bar(success_by_test.index, success_by_test.values)
    axes[0, 0].set_title('Success Rate by Test')
    axes[0, 0].set_ylabel('Success Rate')
    axes[0, 0].tick_params(axis='x', rotation=45)
    
    # 2. Execution time by test
    time_by_test = df.groupby('test_name')['duration_seconds'].mean()
    axes[0, 1].bar(time_by_test.index, time_by_test.values)
    axes[0, 1].set_title('Average Execution Time by Test')
    axes[0, 1].set_ylabel('Time (seconds)')
    axes[0, 1].tick_params(axis='x', rotation=45)
    
    # 3. Timeline of execution times
    df['timestamp_formatted'] = pd.to_datetime(df['timestamp'])
    axes[1, 0].plot(df['timestamp_formatted'], df['duration_seconds'], 'o-')
    axes[1, 0].set_title('Execution Time Timeline')
    axes[1, 0].set_ylabel('Time (seconds)')
    axes[1, 0].tick_params(axis='x', rotation=45)
    
    # 4. Success/failure distribution
    success_counts = df['success'].value_counts()
    axes[1, 1].pie(success_counts.values, labels=['Success', 'Failure'], autopct='%1.1f%%')
    axes[1, 1].set_title('Success/Failure Distribution')
    
    plt.tight_layout()
    plt.show()
    
    # Display detailed performance table
    print("\n📋 Detailed Performance Data:")
    display_df = df[['test_name', 'duration_seconds', 'success', 'timestamp']].copy()
    display_df['timestamp'] = display_df['timestamp'].dt.strftime('%H:%M:%S')
    display_df = display_df.round({'duration_seconds': 3})
    display(display_df)

# Run performance analysis
visualize_performance()
