## Step 4.1: Environment Setup and Configuration

In [None]:
# Environment setup
import os
import sys
import time
import re
import json
import asyncio
from typing import Dict, List, Tuple, Optional
from dotenv import load_dotenv
import warnings
warnings.filterwarnings('ignore')

# Add parent directory for module imports
sys.path.append(os.path.dirname(os.getcwd()))

# Load environment variables
load_dotenv()

print("‚úÖ Environment setup complete")
print(f"Working directory: {os.getcwd()}")
print(f"Python version: {sys.version}")

In [None]:
from foundry_local import FoundryLocalManager

# Initialize Foundry local service
manager = FoundryLocalManager(alias_or_model_id=None, bootstrap=True)

# Configuration from previous labs
LOCAL_ENDPOINT = manager.service_uri
LOCAL_MODEL_NAME = os.environ.get("LOCAL_MODEL_NAME", "phi-3.5-mini")

print(f"Local service: {LOCAL_ENDPOINT}")
print(f"Local endpoint: {manager.endpoint}")
print(f"Local model alias: {LOCAL_MODEL_NAME}")

In [None]:
# Azure AI Foundry and Agent Framework configuration
AZURE_AI_PROJECT_ENDPOINT = os.environ.get("AZURE_AI_FOUNDRY_PROJECT_ENDPOINT")
AZURE_AI_MODEL_DEPLOYMENT_NAME = os.environ.get("AZURE_DEPLOYMENT_NAME", "gpt-4o-mini")

# Azure OpenAI Direct Configuration (fallback)
AZURE_OPENAI_ENDPOINT = os.environ.get("AZURE_OPENAI_ENDPOINT")
AZURE_OPENAI_KEY = os.environ.get("AZURE_OPENAI_KEY")
AZURE_OPENAI_API_VERSION = os.environ.get("AZURE_OPENAI_API_VERSION", "2024-12-01-preview")

print("üîß Configuration loaded:")
print(f"   Local endpoint: {LOCAL_ENDPOINT}")
print(f"   Local model: {LOCAL_MODEL_NAME}")
print(f"   Azure AI Project: {AZURE_AI_PROJECT_ENDPOINT}")
print(f"   Model deployment: {AZURE_AI_MODEL_DEPLOYMENT_NAME}")
print(f"   Azure OpenAI endpoint: {AZURE_OPENAI_ENDPOINT}")

# Verify required configuration
config_complete = all([
    LOCAL_ENDPOINT, LOCAL_MODEL_NAME,
    AZURE_AI_PROJECT_ENDPOINT or AZURE_OPENAI_ENDPOINT
])

if config_complete:
    print("\n‚úÖ All required configuration available")
else:
    print("\n‚ùå Missing configuration. Please check your .env file.")

## Step 4.2: Initialize Model Clients and Agent Framework

In [None]:
# Import required libraries
from openai import OpenAI, AzureOpenAI

# Try to import Agent Framework
try:
    from agent_framework import ChatAgent, HostedCodeInterpreterTool
    from agent_framework.azure import AzureAIAgentClient
    from azure.ai.projects.aio import AIProjectClient
    from azure.identity.aio import DefaultAzureCredential
    from pydantic import Field
    
    agent_framework_available = True
    print("‚úÖ Agent Framework imported successfully")
except ImportError as e:
    agent_framework_available = False
    print(f"‚ö†Ô∏è Agent Framework not available: {e}")
    print("   Install with: pip install agent-framework-azure-ai")

# Initialize local client (Foundry Local)
try:
    local_client = OpenAI(
        base_url=f"{LOCAL_ENDPOINT}/v1",
        api_key="not-needed"
    )
    local_available = True
    print(f"‚úÖ Local client initialized: {LOCAL_MODEL_NAME}")
except Exception as e:
    local_available = False
    print(f"‚ùå Local client failed: {e}")

# Initialize direct Azure OpenAI client as fallback
try:
    azure_client = AzureOpenAI(
        api_key=AZURE_OPENAI_KEY,
        api_version=AZURE_OPENAI_API_VERSION,
        azure_endpoint=AZURE_OPENAI_ENDPOINT
    )
    azure_available = True
    print("‚úÖ Azure OpenAI client initialized (fallback)")
except Exception as e:
    azure_available = False
    print(f"‚ùå Azure OpenAI client failed: {e}")

# Check Agent Framework availability
use_agent_framework = agent_framework_available and AZURE_AI_PROJECT_ENDPOINT

print(f"\nüéØ Available routing targets:")
print(f"   Local Model (Foundry Local): {'‚úÖ' if local_available else '‚ùå'}")
print(f"   Agent Framework: {'‚úÖ' if use_agent_framework else '‚ùå'}")
print(f"   Direct Azure OpenAI: {'‚úÖ' if azure_available else '‚ùå'}")

if use_agent_framework:
    print("\nüéâ Agent Framework ready for complex cloud processing!")
elif azure_available:
    print("\n‚úÖ Direct Azure OpenAI ready for cloud processing!")
else:
    print("\n‚ö†Ô∏è No cloud AI services available - check configuration")

## Step 4.3: Create Agent Framework Helper Functions

Create async helper functions for agent operations.

In [None]:
async def create_ephemeral_agent(prompt: str, instructions: str = None) -> tuple:
    """
    Create an ephemeral agent and run a single query.
    Agent is automatically cleaned up after use.
    """
    if not use_agent_framework:
        return "Agent Framework not available", 0, False
    
    try:
        start_time = time.time()
        
        default_instructions = """You are an intelligent AI assistant in a hybrid local-cloud system.
You handle complex queries requiring advanced reasoning, analysis, and comprehensive responses.
Provide clear, well-structured answers that demonstrate deep understanding."""
        
        agent_instructions = instructions or default_instructions
        
        # Use async with for proper resource management
        async with DefaultAzureCredential() as credential:
            async with AzureAIAgentClient(
                project_endpoint=AZURE_AI_PROJECT_ENDPOINT,
                model_deployment_name=AZURE_AI_MODEL_DEPLOYMENT_NAME,
                async_credential=credential
            ).create_agent(
                name="HybridRouterAgent",
                instructions=agent_instructions
            ) as agent:
                result = await agent.run(prompt)
                end_time = time.time()
                
                return result.text, end_time - start_time, True
            
    except Exception as e:
        error_msg = str(e)
        print(f"‚ùå Agent Framework error: {error_msg}")
        
        # Provide helpful error messages
        if "authentication" in error_msg.lower():
            print("üí° Authentication issue. Try: az login")
        elif "endpoint" in error_msg.lower():
            print("üí° Check AZURE_AI_FOUNDRY_PROJECT_ENDPOINT in .env")
        elif "model" in error_msg.lower():
            print(f"üí° Model '{AZURE_AI_MODEL_DEPLOYMENT_NAME}' may not be available")
        
        return f"Agent error: {error_msg}", 0, False


def query_with_direct_openai(prompt: str, max_tokens: int = 500) -> tuple:
    """
    Fallback to direct Azure OpenAI when Agent Framework is unavailable.
    """
    if not azure_available:
        return "Azure OpenAI not available", 0, False
    
    try:
        start_time = time.time()
        
        response = azure_client.chat.completions.create(
            model=AZURE_AI_MODEL_DEPLOYMENT_NAME,
            messages=[{"role": "user", "content": prompt}],
            max_tokens=max_tokens
        )
        
        end_time = time.time()
        return response.choices[0].message.content, end_time - start_time, True
        
    except Exception as e:
        return f"Direct OpenAI error: {str(e)}", 0, False


print("‚úÖ Agent Framework helper functions created")
print("   - create_ephemeral_agent() [async]")
print("   - query_with_direct_openai() [sync fallback]")

## Step 4.4: Implement Query Analysis for Intelligent Routing

Create a sophisticated query analyzer to determine the optimal routing target.

In [None]:
def analyze_query_complexity(query: str) -> Dict:
    """
    Analyze query to determine complexity and optimal routing target.
    Returns detailed analysis for routing decisions.
    """
    query_lower = query.lower()
    word_count = len(query.split())
    char_count = len(query)
    
    # Initialize scores
    complexity_score = 0
    reasoning_indicators = 0
    creative_indicators = 0
    technical_indicators = 0
    
    # Simple query patterns (route to local)
    simple_patterns = [
        r'^(hi|hello|hey|good morning|good afternoon)',
        r'(what is|what are|define|meaning of)\s+\w+$',
        r'^(yes|no|ok|okay|thanks|thank you)',
        r'(help|assist|support)\s*(me)?$'
    ]
    
    # Complex reasoning patterns (route to Agent Framework)
    complex_patterns = {
        'reasoning': [
            'analyze', 'evaluate', 'compare', 'assess', 'determine',
            'explain why', 'how does', 'what if', 'consider',
            'reasoning', 'strategy', 'approach', 'methodology'
        ],
        'creative': [
            'create', 'design', 'write', 'compose', 'generate',
            'brainstorm', 'imagine', 'story', 'creative', 'innovative'
        ],
        'technical': [
            'code', 'program', 'algorithm', 'function', 'debug',
            'implement', 'architecture', 'system', 'technical', 'develop'
        ]
    }
    
    # Check for simple patterns
    is_simple = any(re.search(pattern, query_lower) for pattern in simple_patterns)
    
    # Count complex indicators
    for category, keywords in complex_patterns.items():
        count = sum(1 for keyword in keywords if keyword in query_lower)
        if category == 'reasoning':
            reasoning_indicators = count
        elif category == 'creative':
            creative_indicators = count
        elif category == 'technical':
            technical_indicators = count
    
    # Calculate complexity score (0-10 scale)
    complexity_score = 0
    
    # Length-based scoring
    if word_count < 5:
        complexity_score += 1
    elif word_count < 15:
        complexity_score += 3
    elif word_count < 30:
        complexity_score += 5
    else:
        complexity_score += 7
    
    # Pattern-based scoring
    if is_simple:
        complexity_score = min(complexity_score, 2)  # Cap simple queries
    
    if reasoning_indicators > 0:
        complexity_score += min(reasoning_indicators * 2, 4)
    
    if creative_indicators > 0:
        complexity_score += min(creative_indicators * 2, 4)
    
    if technical_indicators > 0:
        complexity_score += min(technical_indicators * 2, 4)
    
    # Question complexity
    question_words = ['why', 'how', 'what if', 'compare', 'analyze']
    question_complexity = sum(1 for qw in question_words if qw in query_lower)
    if question_complexity > 1:
        complexity_score += 2
    
    # Cap at 10
    complexity_score = min(complexity_score, 10)
    
    # Determine routing target
    if complexity_score <= 3 and local_available:
        target = 'local'
        reason = f"Simple query (score: {complexity_score}/10) - routing to fast local model"
    elif complexity_score >= 7 and use_agent_framework:
        target = 'agent'
        reason = f"Complex query (score: {complexity_score}/10) - routing to Agent Framework"
    elif use_agent_framework:
        target = 'agent'
        reason = f"Medium complexity (score: {complexity_score}/10) - using Agent Framework"
    elif azure_available:
        target = 'azure'
        reason = f"Cloud routing (score: {complexity_score}/10) - using Azure OpenAI"
    else:
        target = 'local'
        reason = f"Fallback to local model (score: {complexity_score}/10)"
    
    return {
        'query': query,
        'word_count': word_count,
        'char_count': char_count,
        'complexity_score': complexity_score,
        'is_simple': is_simple,
        'reasoning_indicators': reasoning_indicators,
        'creative_indicators': creative_indicators,
        'technical_indicators': technical_indicators,
        'target': target,
        'reason': reason
    }

print("‚úÖ Query complexity analyzer implemented")
print("   Analyzes: length, patterns, complexity indicators")
print("   Routes: local (simple) ‚Üí agent (complex) ‚Üí azure (fallback)")

## Step 4.5: Create Query Processing Functions

Implement the core query processing functions for each routing target.

In [None]:
def query_local_model(prompt: str, max_tokens: int = 200) -> Tuple[str, float, bool]:
    """Query local Foundry Local model."""
    if not local_available:
        return "Local model not available", 0, False
    
    try:
        start_time = time.time()
        response = local_client.chat.completions.create(
            model=LOCAL_MODEL_NAME,
            messages=[{"role": "user", "content": prompt}],
            max_tokens=max_tokens,
            temperature=0.7
        )
        end_time = time.time()
        
        content = response.choices[0].message.content
        return content, end_time - start_time, True
    except Exception as e:
        return f"Local model error: {str(e)}", 0, False


async def query_agent_framework(prompt: str) -> Tuple[str, float, bool]:
    """Query using Azure Agent Framework with ephemeral agent."""
    if not use_agent_framework:
        return "Agent Framework not available", 0, False
    
    return await create_ephemeral_agent(prompt)


def query_azure_direct(prompt: str, max_tokens: int = 500) -> Tuple[str, float, bool]:
    """Query Azure OpenAI directly (final fallback)."""
    return query_with_direct_openai(prompt, max_tokens)


print("‚úÖ Query processing functions created")
print("   - query_local_model() [sync]")
print("   - query_agent_framework() [async]")
print("   - query_azure_direct() [sync]")

## Step 4.6: Implement Unified Hybrid Routing System

Create the main routing function with intelligent fallback chains.

In [None]:
async def answer_with_hybrid_routing(
    user_query: str, 
    show_reasoning: bool = False,
    force_target: str = None
) -> Tuple[str, float, str, bool]:
    """
    Main hybrid routing function with three-tier fallback system.
    
    Args:
        user_query: The user's query
        show_reasoning: Whether to include routing reasoning in response
        force_target: Force routing to specific target ('local', 'agent', 'azure')
    
    Returns:
        (formatted_response, response_time, source, success)
    """
    # Analyze query for routing decision
    analysis = analyze_query_complexity(user_query)
    
    # Determine target
    if force_target:
        target = force_target
        reason = f"Forced routing to {force_target}"
    else:
        target = analysis['target']
        reason = analysis['reason']
    
    if show_reasoning:
        print(f"\nüìä Query Analysis:")
        print(f"   Complexity Score: {analysis['complexity_score']}/10")
        print(f"   Target: {target.upper()}")
        print(f"   Reasoning: {reason}")
    
    response = ""
    response_time = 0
    success = False
    actual_source = target
    
    # Route based on target with fallback chains
    if target == 'local':
        if show_reasoning:
            print(f"üè† Routing to LOCAL model...")
        response, response_time, success = query_local_model(user_query)
        
        # Fallback chain: Local ‚Üí Agent ‚Üí Azure
        if not success:
            if use_agent_framework:
                print(f"üîÑ Local failed, trying Agent Framework...")
                response, response_time, success = await query_agent_framework(user_query)
                actual_source = 'agent-fallback'
            elif azure_available:
                print(f"üîÑ Local failed, trying Azure...")
                response, response_time, success = query_azure_direct(user_query)
                actual_source = 'azure-fallback'
    
    elif target == 'agent':
        if show_reasoning:
            print(f"ü§ñ Routing to AGENT FRAMEWORK...")
        response, response_time, success = await query_agent_framework(user_query)
        
        # Fallback chain: Agent ‚Üí Azure ‚Üí Local
        if not success:
            if azure_available:
                print(f"üîÑ Agent Framework failed, trying Azure...")
                response, response_time, success = query_azure_direct(user_query)
                actual_source = 'azure-fallback'
            elif local_available:
                print(f"üîÑ Agent Framework failed, trying Local...")
                response, response_time, success = query_local_model(user_query)
                actual_source = 'local-fallback'
    
    elif target == 'azure':
        if show_reasoning:
            print(f"‚òÅÔ∏è Routing to AZURE OpenAI...")
        response, response_time, success = query_azure_direct(user_query)
        
        # Fallback chain: Azure ‚Üí Agent ‚Üí Local
        if not success:
            if use_agent_framework:
                print(f"üîÑ Azure failed, trying Agent Framework...")
                response, response_time, success = await query_agent_framework(user_query)
                actual_source = 'agent-fallback'
            elif local_available:
                print(f"üîÑ Azure failed, trying Local...")
                response, response_time, success = query_local_model(user_query)
                actual_source = 'local-fallback'
    
    # Format response with source indication
    if success:
        source_tags = {
            'local': '[LOCAL]',
            'agent': '[AGENT-FRAMEWORK]',
            'azure': '[AZURE]',
            'agent-fallback': '[AGENT*]',
            'azure-fallback': '[AZURE*]',
            'local-fallback': '[LOCAL*]'
        }
        
        source_tag = source_tags.get(actual_source, f'[{actual_source.upper()}]')
        
        if show_reasoning:
            formatted_response = f"{source_tag} {response}\n\n[Routing: {reason}]"
        else:
            formatted_response = f"{source_tag} {response}"
    else:
        formatted_response = f"[ERROR] All routing options failed: {response}"
    
    return formatted_response, response_time, actual_source, success

print("‚úÖ Hybrid three-tier routing system implemented")
print("üéØ Ready for Local ‚Üí Agent Framework ‚Üí Azure fallback chain!")

## Step 4.7: Test the Hybrid Routing System

Test various query types to demonstrate intelligent routing.

In [None]:
# Test scenarios for hybrid routing system
test_queries = [
    # Should route to LOCAL
    {
        'query': "Hello!",
        'expected': 'local',
        'category': 'Simple Greeting'
    },
    {
        'query': "What's 25 + 17?",
        'expected': 'local',
        'category': 'Simple Math'
    },
    {
        'query': "What is Python?",
        'expected': 'local',
        'category': 'Simple Definition'
    },
    
    # Should route to AGENT FRAMEWORK
    {
        'query': "Analyze the strategic implications of hybrid AI architectures in enterprise environments, considering security, cost, and performance trade-offs.",
        'expected': 'agent',
        'category': 'Complex Analysis'
    },
    {
        'query': "Write a creative story about an AI system that learns to understand human emotions through a hybrid local-cloud architecture.",
        'expected': 'agent',
        'category': 'Creative Writing'
    },
    {
        'query': "Design a scalable microservices architecture for deploying machine learning models with intelligent routing between edge and cloud.",
        'expected': 'agent',
        'category': 'Technical Design'
    },
    
    # Edge cases
    {
        'query': "Compare the advantages and disadvantages of three different database systems for a high-traffic web application.",
        'expected': 'agent',
        'category': 'Comparative Analysis'
    }
]

print("üß™ Testing Hybrid Routing System with Agent Framework")
print("=" * 70)

routing_stats = {'local': 0, 'agent': 0, 'azure': 0, 'fallback': 0, 'errors': 0}
total_time = 0

for i, test in enumerate(test_queries, 1):
    print(f"\n{'='*70}")
    print(f"Test {i}/{len(test_queries)}: {test['category']}")
    print(f"{'='*70}")
    print(f"Query: {test['query'][:100]}{'...' if len(test['query']) > 100 else ''}")
    print(f"Expected Route: {test['expected'].upper()}")
    
    # Run query with routing
    response, response_time, source, success = await answer_with_hybrid_routing(
        test['query'],
        show_reasoning=True
    )
    
    total_time += response_time
    
    # Track statistics
    if 'fallback' in source:
        routing_stats['fallback'] += 1
    elif source in routing_stats:
        routing_stats[source] += 1
    
    if not success:
        routing_stats['errors'] += 1
    
    print(f"\nüìù Response ({response_time:.3f}s):")
    print("-" * 70)
    # Show preview of response
    response_lines = response.split('\n')
    preview_lines = response_lines[:3] if len(response_lines) > 3 else response_lines
    for line in preview_lines:
        print(line[:100] + ('...' if len(line) > 100 else ''))
    if len(response_lines) > 3:
        print(f"... ({len(response_lines) - 3} more lines)")
    print("-" * 70)
    
    # Verify routing
    route_correct = test['expected'] in source
    print(f"‚úÖ Routing {'Correct' if route_correct else 'Different'}: {source.upper()}")

# Summary statistics
print(f"\n{'='*70}")
print("üìä ROUTING STATISTICS")
print(f"{'='*70}")

successful_routes = sum(v for k, v in routing_stats.items() if k != 'errors')
print(f"Total Queries: {len(test_queries)}")
print(f"Successful Routes: {successful_routes}")
print(f"Total Time: {total_time:.3f}s")
print(f"Average Time: {total_time/len(test_queries):.3f}s")
print()

for target, count in routing_stats.items():
    if count > 0:
        percentage = (count / len(test_queries)) * 100
        print(f"   {target.upper()}: {count} ({percentage:.1f}%)")

if successful_routes > 0:
    success_rate = (successful_routes / len(test_queries)) * 100
    print(f"\n‚úÖ Success Rate: {success_rate:.1f}%")

print(f"\nüéâ Hybrid routing with Agent Framework testing complete!")

## Step 4.8: Test Fallback Mechanisms

Test the fallback chains to ensure system resilience.

In [None]:
# Test fallback scenarios
print("üß™ Testing Fallback Mechanisms")
print("=" * 70)

fallback_tests = [
    {
        'query': "Explain quantum computing",
        'force_target': 'local',
        'description': 'Force complex query to local (will fallback if needed)'
    },
    {
        'query': "Hi there",
        'force_target': 'agent',
        'description': 'Force simple query to Agent Framework (expensive but works)'
    }
]

for i, test in enumerate(fallback_tests, 1):
    print(f"\nFallback Test {i}: {test['description']}")
    print("-" * 70)
    print(f"Query: {test['query']}")
    print(f"Forced Target: {test['force_target'].upper()}")
    
    response, response_time, source, success = await answer_with_hybrid_routing(
        test['query'],
        show_reasoning=True,
        force_target=test['force_target']
    )
    
    print(f"\n‚úÖ Response received via: {source.upper()}")
    print(f"‚è±Ô∏è Time: {response_time:.3f}s")
    print(f"üìù Response preview: {response[:150]}...")

print(f"\n{'='*70}")
print("‚úÖ Fallback testing complete - system is resilient!")

## Step 4.9: Performance Analysis and Recommendations

Analyze performance characteristics and provide recommendations.

In [None]:
print("üìä Hybrid Routing System Analysis with Agent Framework")
print("=" * 70)

# Local Model analysis
if local_available:
    print("\nüè† Local Model (Foundry Local) - Strengths:")
    local_strengths = [
        "‚ö° Sub-second response times for simple queries",
        "üîí Complete data privacy (no cloud transmission)",
        "üí∞ Zero API costs for high-volume requests",
        "üîå Works offline without internet dependency",
        "üìä Ideal for simple Q&A and basic tasks"
    ]
    for strength in local_strengths:
        print(f"   {strength}")
    
    print("\nü§î Local Model - Considerations:")
    local_considerations = [
        "‚ö†Ô∏è Limited reasoning capabilities for complex tasks",
        "‚ö†Ô∏è Smaller context window",
        "‚ö†Ô∏è May struggle with specialized domain knowledge",
        "‚ö†Ô∏è Requires local compute resources"
    ]
    for consideration in local_considerations:
        print(f"   {consideration}")

# Agent Framework analysis
if use_agent_framework:
    print("\nü§ñ Azure Agent Framework - Strengths:")
    agent_strengths = [
        "‚úÖ Advanced reasoning and analysis capabilities",
        "‚úÖ Modern async/await patterns for efficiency",
        "‚úÖ Automatic agent lifecycle management",
        "‚úÖ Built-in function tool support",
        "‚úÖ Native streaming capabilities",
        "‚úÖ Strong typing with Pydantic",
        "‚úÖ Simplified API with sensible defaults",
        "‚úÖ Enterprise-grade with Azure integration"
    ]
    for strength in agent_strengths:
        print(f"   {strength}")
    
    print("\nü§î Agent Framework - Considerations:")
    agent_considerations = [
        "‚ö†Ô∏è Higher latency than local (network + processing)",
        "‚ö†Ô∏è Requires Azure authentication (az login)",
        "‚ö†Ô∏è API costs for token usage",
        "‚ö†Ô∏è Requires internet connectivity"
    ]
    for consideration in agent_considerations:
        print(f"   {consideration}")

# Direct OpenAI analysis
if azure_available:
    print("\n‚òÅÔ∏è Direct Azure OpenAI - Strengths:")
    direct_strengths = [
        "‚úÖ Reliable fallback option",
        "‚úÖ Simple synchronous API",
        "‚úÖ Wide compatibility",
        "‚úÖ Good for batch processing"
    ]
    for strength in direct_strengths:
        print(f"   {strength}")

# Routing recommendations
print("\nüéØ Intelligent Routing Recommendations:")
print("-" * 70)

recommendations = {
    "Route to Local Models for:": [
        "‚ö° Simple queries (greetings, basic Q&A)",
        "üîí Privacy-sensitive data processing",
        "üí∞ High-volume, cost-sensitive requests",
        "üì± Offline or edge computing scenarios",
        "üéØ Response time critical applications (<100ms)"
    ],
    "Route to Agent Framework for:": [
        "üß† Complex reasoning and analysis",
        "üìä Strategic planning and business analysis",
        "üé® Creative content generation",
        "üíª Technical design and architecture",
        "üîß Tasks requiring function tools",
        "üåä Real-time streaming requirements",
        "üí¨ Multi-turn complex conversations"
    ],
    "Route to Direct Azure OpenAI for:": [
        "üîÑ Fallback when Agent Framework unavailable",
        "üìù Simple document processing",
        "‚öôÔ∏è Batch processing scenarios",
        "üîß Legacy system compatibility"
    ]
}

for category, items in recommendations.items():
    print(f"\n{category}")
    for item in items:
        print(f"   {item}")

# Cost optimization insights
print("\nüí∞ Cost Optimization Strategy:")
print("-" * 70)
print("1. Route 80% of simple queries to local models (zero cost)")
print("2. Use Agent Framework for 15% complex queries (high value)")
print("3. Reserve direct API for 5% fallback scenarios")
print("4. Estimated cost reduction: 75-85% vs all-cloud approach")

print("\n" + "=" * 70)
print("üéâ Analysis complete - hybrid system optimizes cost, performance, and capabilities!")

## Step 4.10: Create Production-Ready Hybrid Router Class

Package everything into a reusable class for integration.

In [None]:
class HybridAgentRouter:
    """
    Production-ready hybrid router using Agent Framework.
    Intelligently routes between local models, Agent Framework, and Azure OpenAI.
    """
    
    def __init__(self):
        self.local_available = local_available
        self.agent_framework_available = use_agent_framework
        self.azure_available = azure_available
        
        # Routing statistics
        self.stats = {
            'total_queries': 0,
            'local_routes': 0,
            'agent_routes': 0,
            'azure_routes': 0,
            'fallbacks': 0,
            'errors': 0,
            'total_time': 0.0
        }
    
    async def query(
        self, 
        prompt: str, 
        force_target: str = None,
        show_reasoning: bool = False
    ) -> Dict:
        """
        Query the hybrid system with intelligent routing.
        
        Args:
            prompt: User query
            force_target: Force routing to 'local', 'agent', or 'azure'
            show_reasoning: Include routing analysis in response
        
        Returns:
            Dictionary with response, metadata, and routing info
        """
        self.stats['total_queries'] += 1
        
        response, response_time, source, success = await answer_with_hybrid_routing(
            prompt,
            show_reasoning=show_reasoning,
            force_target=force_target
        )
        
        # Update statistics
        self.stats['total_time'] += response_time
        if 'fallback' in source:
            self.stats['fallbacks'] += 1
        
        if success:
            if 'local' in source:
                self.stats['local_routes'] += 1
            elif 'agent' in source:
                self.stats['agent_routes'] += 1
            elif 'azure' in source:
                self.stats['azure_routes'] += 1
        else:
            self.stats['errors'] += 1
        
        return {
            'response': response,
            'source': source,
            'response_time': response_time,
            'success': success,
            'query': prompt
        }
    
    def get_statistics(self) -> Dict:
        """Get routing statistics and performance metrics."""
        total = self.stats['total_queries']
        if total == 0:
            return {'message': 'No queries processed yet'}
        
        avg_time = self.stats['total_time'] / total
        success_rate = ((total - self.stats['errors']) / total) * 100
        
        return {
            'total_queries': total,
            'local_routes': self.stats['local_routes'],
            'agent_routes': self.stats['agent_routes'],
            'azure_routes': self.stats['azure_routes'],
            'fallbacks': self.stats['fallbacks'],
            'errors': self.stats['errors'],
            'success_rate': success_rate,
            'average_response_time': avg_time,
            'local_percentage': (self.stats['local_routes'] / total) * 100,
            'agent_percentage': (self.stats['agent_routes'] / total) * 100,
            'azure_percentage': (self.stats['azure_routes'] / total) * 100
        }
    
    def get_capabilities(self) -> Dict:
        """Get information about available routing targets."""
        return {
            'local_model': self.local_available,
            'agent_framework': self.agent_framework_available,
            'azure_openai': self.azure_available,
            'local_model_name': LOCAL_MODEL_NAME if self.local_available else None,
            'azure_model': AZURE_AI_MODEL_DEPLOYMENT_NAME if self.agent_framework_available or self.azure_available else None
        }

# Initialize the production router
hybrid_router = HybridAgentRouter()

print("‚úÖ HybridAgentRouter initialized and ready")
print("\nüìä Available Methods:")
print("   - await hybrid_router.query(prompt) - Main query method")
print("   - hybrid_router.get_statistics() - Get performance stats")
print("   - hybrid_router.get_capabilities() - Get available targets")

# Test the router
print("\nüß™ Testing HybridAgentRouter...")
test_result = await hybrid_router.query(
    "Explain the benefits of hybrid AI systems",
    show_reasoning=False
)

print(f"‚úÖ Router test successful!")
print(f"   Source: {test_result['source']}")
print(f"   Time: {test_result['response_time']:.3f}s")
print(f"   Response: {test_result['response'][:100]}...")

# Show capabilities
print(f"\nüìä Router Capabilities:")
capabilities = hybrid_router.get_capabilities()
for key, value in capabilities.items():
    print(f"   {key}: {value}")

## Step 4.11: Interactive Testing

Try the router with your own queries!

In [None]:
# Interactive testing section
print("üéÆ Interactive Hybrid Router Testing")
print("=" * 70)
print("Try different queries to see intelligent routing in action!")
print()

# Example queries to try
example_queries = [
    "What is 2+2?",  # Local
    "Analyze the business case for implementing AI in healthcare",  # Agent
    "Write a haiku about technology",  # Agent
    "Hello",  # Local
    "Design a cloud-native architecture for a social media platform"  # Agent
]

print("üìù Example queries to try:")
for i, q in enumerate(example_queries, 1):
    print(f"   {i}. {q}")

print("\n" + "=" * 70)

# Test a few automatically
for query in example_queries[:3]:
    print(f"\nüîç Testing: '{query}'")
    result = await hybrid_router.query(query, show_reasoning=True)
    print()

# Show final statistics
print("\nüìä Final Routing Statistics:")
print("=" * 70)
stats = hybrid_router.get_statistics()
for key, value in stats.items():
    if isinstance(value, float):
        print(f"   {key.replace('_', ' ').title()}: {value:.2f}")
    else:
        print(f"   {key.replace('_', ' ').title()}: {value}")

## üéâ Lab 4 Complete!

### What You've Accomplished:
- ‚úÖ **Agent Framework Integration**: Successfully integrated Microsoft Agent Framework for cloud AI
- ‚úÖ **Intelligent Routing**: Implemented smart three-tier routing (Local ‚Üí Agent ‚Üí Azure)
- ‚úÖ **Query Analysis**: Created sophisticated complexity analyzer for routing decisions
- ‚úÖ **Fallback Chains**: Built robust error handling with automatic fallbacks
- ‚úÖ **Production Ready**: Packaged as HybridAgentRouter class for easy integration
- ‚úÖ **Performance Optimized**: Balanced cost, speed, and capability

### Key Features:

üè† **Local Processing**: Fast, private, cost-effective for simple queries

ü§ñ **Agent Framework**: Modern async patterns for complex reasoning and analysis

‚òÅÔ∏è **Azure Fallback**: Reliable fallback ensuring system resilience

‚ö° **Smart Routing**: Automatic complexity analysis and optimal target selection

üìä **Telemetry**: Built-in statistics and performance tracking

### Routing Strategy:

**Complexity Score 0-3**: Route to Local
- Greetings, simple Q&A, basic math
- <100ms response time
- Zero cost

**Complexity Score 4-10**: Route to Agent Framework
- Analysis, design, creative tasks
- Advanced reasoning capabilities
- Async/await efficiency

**Fallback Chain**: Primary ‚Üí Secondary ‚Üí Tertiary
- Ensures high availability
- Graceful degradation
- Transparent to user

### Agent Framework Benefits:

‚ú® **Modern API**: Clean async/await patterns

üîß **Lifecycle Management**: Automatic agent creation and cleanup

üéØ **Type Safety**: Pydantic models for configuration

üåä **Streaming**: Native support for real-time responses

üîå **Extensible**: Easy tool integration

### Production Considerations:

üîê **Authentication**: Uses Azure CLI credentials (az login)

üìä **Monitoring**: Built-in telemetry and statistics

üí∞ **Cost Optimization**: 75-85% cost reduction vs all-cloud

‚ö° **Performance**: <100ms local, <2s cloud

üõ°Ô∏è **Resilience**: Multiple fallback layers

### Next Steps:

- Integrate HybridAgentRouter into your applications
- Add custom tools to Agent Framework for domain-specific tasks
- Implement streaming responses for real-time user feedback
- Add caching layer for frequently asked questions
- Deploy with monitoring and alerting

**Your hybrid AI routing system with Agent Framework is production-ready!** üöÄ

*This lab demonstrates the modern Microsoft approach to building intelligent hybrid AI systems, combining the best of local models and cloud AI services through the Agent Framework.*