# Lab 7: Frontend Chat Interface Integration (Streamlit)

**Purpose:** Build a user-friendly web interface for the hybrid chatbot using Streamlit, allowing stakeholders to interact with the system and see the routing decisions in action.

## Overview

In this lab, we'll:
- Create a Streamlit web application for the chatbot
- Integrate all previous lab components into a cohesive UI
- Display routing transparency and conversation history
- Add real-time performance monitoring
- Test the complete hybrid system through the web interface

## Step 7.1: Import Dependencies and Load Configurations

In [None]:
import os
import sys
import time
# import pickle
import streamlit as st
from datetime import datetime
from openai import OpenAI, AzureOpenAI

from azure.ai.agents.models import CodeInterpreterTool
from azure.ai.projects import AIProjectClient
from azure.identity import DefaultAzureCredential
from dotenv import find_dotenv, load_dotenv

# Add parent directory to path for module imports
parent_dir = os.path.dirname(os.path.dirname(os.path.abspath('__file__')))
if parent_dir not in sys.path:
    sys.path.append(parent_dir)

# Load environment variables
load_dotenv()

print("✅ Dependencies loaded successfully")

In [None]:
from foundry_local import FoundryLocalManager

# Initialize and optionally bootstrap with a model
manager = FoundryLocalManager(alias_or_model_id=None, bootstrap=True)

LOCAL_ENDPOINT = manager.service_uri
LOCAL_MODEL_ALIAS = os.environ["LOCAL_MODEL_NAME"]
AZURE_OPENAI_API_VERSION = os.environ["AZURE_OPENAI_API_VERSION"]

print(f"Local service: {LOCAL_ENDPOINT}")
print(f"Local endpoint: {manager.endpoint}")
print(f"Local model alias: {LOCAL_MODEL_ALIAS}")

In [None]:


# List models in cache
local_models = manager.list_cached_models()
print(f"Models in cache: {local_models}")
print(f"Model Alias: {local_models[0].alias}")

## Step 7.2: Create Core Routing Functions

Let's recreate the essential functions from previous labs for the Streamlit app:

In [None]:
def analyze_query_characteristics(query):
    """Analyze query characteristics for routing decisions."""
    import re
    
    analysis = {
        'length': len(query),
        'word_count': len(query.split()),
        'has_complex_keywords': False,
        'is_greeting': False,
        'is_simple_question': False,
        'is_calculation': False
    }
    
    query_lower = query.lower().strip()
    
    # Complex keywords
    complex_keywords = [
        'summarize', 'analyze', 'explain in detail', 'comprehensive',
        'business plan', 'strategy', 'compare', 'evaluate', 'assess'
    ]
    
    # Check patterns
    greeting_patterns = [r"^(hi|hello|hey|good morning)"]
    simple_patterns = [r"^what is", r"^who is", r"^where is"]
    calc_patterns = [r'\d+\s*[+\-*/]\s*\d+', r'calculate|compute']
    
    # Analyze
    for keyword in complex_keywords:
        if keyword in query_lower:
            analysis['has_complex_keywords'] = True
            break
    
    for pattern in greeting_patterns:
        if re.match(pattern, query_lower):
            analysis['is_greeting'] = True
            break
    
    for pattern in simple_patterns:
        if re.match(pattern, query_lower):
            analysis['is_simple_question'] = True
            break
    
    for pattern in calc_patterns:
        if re.search(pattern, query_lower):
            analysis['is_calculation'] = True
            break
    
    return analysis

def route_query(query, analysis=None):
    """Determine routing based on query analysis."""
    if analysis is None:
        analysis = analyze_query_characteristics(query)
    
    # Route to LOCAL for simple tasks
    if analysis['is_greeting']:
        return 'local', 'Simple greeting - fast local response'
    
    if analysis['is_calculation']:
        return 'local', 'Mathematical calculation - local processing'
    
    if analysis['is_simple_question'] and analysis['word_count'] <= 10:
        return 'local', 'Simple factual question - local efficient'
    
    if analysis['word_count'] <= 5:
        return 'local', 'Very short query - likely simple'
    
    # Route to CLOUD for complex tasks
    if analysis['has_complex_keywords']:
        return 'cloud', 'Complex analysis keywords - requires cloud'
    
    if analysis['word_count'] > 20:
        return 'cloud', 'Long query - sophisticated processing needed'
    
    # Default routing
    if analysis['word_count'] <= 15:
        return 'local', 'Default local for moderate queries'
    else:
        return 'cloud', 'Default cloud for longer queries'

print("✅ Routing functions created")

## Step 7.3: Initialize Model Clients

In [None]:
def initialize_clients():
    """Initialize both local and Azure OpenAI clients."""
    clients = {'local': None, 'azure': None}
    
    # Local client setup
    try:
        LOCAL_ENDPOINT = os.environ.get("LOCAL_MODEL_ENDPOINT", "http://localhost:59413")
        # Use the correct model name from the local server
        LOCAL_MODEL = "Phi-3.5-mini-instruct-generic-cpu"  # Fixed: actual model name from server
        
        clients['local'] = OpenAI(
            base_url=f"{LOCAL_ENDPOINT}/v1",
            api_key="not-needed"
        )
        clients['local_model'] = LOCAL_MODEL
        print(f"✅ Local client initialized: {LOCAL_MODEL}")
    except Exception as e:
        print(f"❌ Local client failed: {e}")
    
    # Azure client setup
    try:
        AZURE_OPENAI_ENDPOINT = os.environ["AZURE_OPENAI_ENDPOINT"]
        AZURE_OPENAI_KEY = os.environ["AZURE_OPENAI_KEY"]
        AZURE_OPENAI_DEPLOYMENT = os.environ["AZURE_DEPLOYMENT_NAME"]
        AZURE_OPENAI_API_VERSION = os.environ["AZURE_OPENAI_API_VERSION"]

        if all([AZURE_OPENAI_ENDPOINT, AZURE_OPENAI_KEY, AZURE_OPENAI_DEPLOYMENT, AZURE_OPENAI_API_VERSION]):
            # Initialize Azure OpenAI client with correct parameters
            clients['azure'] = AzureOpenAI(
                api_key=AZURE_OPENAI_KEY,
                api_version=AZURE_OPENAI_API_VERSION,
                azure_endpoint=AZURE_OPENAI_ENDPOINT
            )
            clients['azure_model'] = AZURE_OPENAI_DEPLOYMENT
            print(f"✅ Azure client initialized: {AZURE_OPENAI_DEPLOYMENT}")
        else:
            print("❌ Azure configuration incomplete")
    except Exception as e:
        print(f"❌ Azure client failed: {e}")
    
    return clients

# Initialize clients with fixed configuration
clients = initialize_clients()

## Step 7.4: Create the Main Chat Function

In [None]:
def route_and_respond(user_message, chat_history=None):
    """Route message and generate response with telemetry."""
    if chat_history is None:
        chat_history = []
    
    # Analyze and route
    analysis = analyze_query_characteristics(user_message)
    target, reason = route_query(user_message, analysis)
    
    # Prepare messages
    messages = chat_history + [{"role": "user", "content": user_message}]
    
    start_time = time.time()
    
    try:
        if target == "local" and clients['local']:
            response = clients['local'].chat.completions.create(
                model=clients['local_model'],
                messages=messages,
                max_tokens=200,
                temperature=0.7
            )
            source_tag = "Local"
            
        elif target == "cloud" and clients['azure']:
            response = clients['azure'].chat.completions.create(
                model=clients['azure_model'],
                messages=messages,
                max_tokens=400,
                temperature=0.7
            )
            source_tag = "Cloud"
            
        else:
            # Fallback logic
            if clients['azure']:
                response = clients['azure'].chat.completions.create(
                    model=clients['azure_model'],
                    messages=messages,
                    max_tokens=400,
                    temperature=0.7
                )
                source_tag = "Cloud (Fallback)"
            elif clients['local']:
                response = clients['local'].chat.completions.create(
                    model=clients['local_model'],
                    messages=messages,
                    max_tokens=200,
                    temperature=0.7
                )
                source_tag = "Local (Fallback)"
            else:
                return "Error: No models available", 0, "Error", reason
        
        end_time = time.time()
        response_time = end_time - start_time
        
        content = response.choices[0].message.content
        
        return content, response_time, source_tag, reason
        
    except Exception as e:
        return f"Error: {str(e)}", 0, "Error", reason

print("✅ Chat function created")

## Step 7.5: Create the Streamlit Application Code

In [None]:
# Create the Streamlit app code
streamlit_app_code = '''
import os
import sys
import time
import streamlit as st
from datetime import datetime
from dotenv import load_dotenv
from openai import OpenAI, AzureOpenAI
import re

# Page configuration
st.set_page_config(
    page_title="Hybrid AI Chatbot",
    page_icon="🤖",
    layout="wide"
)

# Load environment variables
load_dotenv()

# Initialize session state
if "messages" not in st.session_state:
    st.session_state.messages = []
if "routing_stats" not in st.session_state:
    st.session_state.routing_stats = {"local": 0, "cloud": 0}

def analyze_query_characteristics(query):
    """Analyze query characteristics for routing decisions."""
    analysis = {
        \'length\': len(query),
        \'word_count\': len(query.split()),
        \'has_complex_keywords\': False,
        \'is_greeting\': False,
        \'is_simple_question\': False,
        \'is_calculation\': False
    }
    
    query_lower = query.lower().strip()
    
    # Complex keywords
    complex_keywords = [
        \'summarize\', \'analyze\', \'explain in detail\', \'comprehensive\',
        \'business plan\', \'strategy\', \'compare\', \'evaluate\', \'assess\'
    ]
    
    # Check patterns
    greeting_patterns = [r\'^\'(hi|hello|hey|good morning)\']
    simple_patterns = [r\'^\'what is\', r\'^\'who is\', r\'^\'where is\']
    calc_patterns = [r\'\\d+\\s*[+\\-*/]\\s*\\d+\', r\'calculate|compute\']
    
    # Analyze
    for keyword in complex_keywords:
        if keyword in query_lower:
            analysis[\'has_complex_keywords\'] = True
            break
    
    for pattern in greeting_patterns:
        if re.match(pattern, query_lower):
            analysis[\'is_greeting\'] = True
            break
    
    for pattern in simple_patterns:
        if re.match(pattern, query_lower):
            analysis[\'is_simple_question\'] = True
            break
    
    for pattern in calc_patterns:
        if re.search(pattern, query_lower):
            analysis[\'is_calculation\'] = True
            break
    
    return analysis

def route_query(query, analysis=None):
    """Determine routing based on query analysis."""
    if analysis is None:
        analysis = analyze_query_characteristics(query)
    
    # Route to LOCAL for simple tasks
    if analysis[\'is_greeting\']:
        return \'local\', \'Simple greeting - fast local response\'
    
    if analysis[\'is_calculation\']:
        return \'local\', \'Mathematical calculation - local processing\'
    
    if analysis[\'is_simple_question\'] and analysis[\'word_count\'] <= 10:
        return \'local\', \'Simple factual question - local efficient\'
    
    if analysis[\'word_count\'] <= 5:
        return \'local\', \'Very short query - likely simple\'
    
    # Route to CLOUD for complex tasks
    if analysis[\'has_complex_keywords\']:
        return \'cloud\', \'Complex analysis keywords - requires cloud\'
    
    if analysis[\'word_count\'] > 20:
        return \'cloud\', \'Long query - sophisticated processing needed\'
    
    # Default routing
    if analysis[\'word_count\'] <= 15:
        return \'local\', \'Default local for moderate queries\'
    else:
        return \'cloud\', \'Default cloud for longer queries\'

@st.cache_resource
def initialize_clients():
    """Initialize both local and Azure OpenAI clients."""
    clients = {\'local\': None, \'azure\': None}
    
    # Local client setup
    try:
        LOCAL_ENDPOINT = os.environ.get("LOCAL_MODEL_ENDPOINT", "http://localhost:52329")
        LOCAL_MODEL = os.environ.get("LOCAL_MODEL_NAME", "phi-3.5-mini")
        
        clients[\'local\'] = OpenAI(
            base_url=f"{LOCAL_ENDPOINT}/v1",
            api_key="not-needed"
        )
        clients[\'local_model\'] = LOCAL_MODEL
    except Exception as e:
        st.error(f"Local client failed: {e}")
    
    # Azure client setup
    try:
        AZURE_ENDPOINT = os.getenv(\'AZURE_OPENAI_ENDPOINT\')
        AZURE_KEY = os.getenv(\'AZURE_OPENAI_KEY\')
        AZURE_DEPLOYMENT = os.getenv(\'AZURE_DEPLOYMENT_NAME\')
        AZURE_VERSION = os.getenv(\'AZURE_OPENAI_API_VERSION\')
        
        if all([AZURE_ENDPOINT, AZURE_KEY, AZURE_DEPLOYMENT, AZURE_VERSION]):
            clients[\'azure\'] = AzureOpenAI(
                api_key=AZURE_KEY,
                api_version=AZURE_VERSION,
                azure_endpoint=AZURE_ENDPOINT
            )
            clients[\'azure_model\'] = AZURE_DEPLOYMENT
    except Exception as e:
        st.error(f"Azure client failed: {e}")
    
    return clients

def route_and_respond(user_message, chat_history=None):
    """Route message and generate response with telemetry."""
    if chat_history is None:
        chat_history = []
    
    # Analyze and route
    analysis = analyze_query_characteristics(user_message)
    target, reason = route_query(user_message, analysis)
    
    # Prepare messages
    messages = chat_history + [{\'role\': \'user\', \'content\': user_message}]
    
    start_time = time.time()
    
    try:
        if target == "local" and clients[\'local\']:
            response = clients[\'local\'].chat.completions.create(
                model=clients[\'local_model\'],
                messages=messages,
                max_tokens=200,
                temperature=0.7
            )
            source_tag = "Local"
            
        elif target == "cloud" and clients[\'azure\']:
            response = clients[\'azure\'].chat.completions.create(
                model=clients[\'azure_model\'],
                messages=messages,
                max_tokens=400,
                temperature=0.7
            )
            source_tag = "Cloud"
            
        else:
            # Fallback logic
            if clients[\'azure\']:
                response = clients[\'azure\'].chat.completions.create(
                    model=clients[\'azure_model\'],
                    messages=messages,
                    max_tokens=400,
                    temperature=0.7
                )
                source_tag = "Cloud (Fallback)"
            elif clients[\'local\']:
                response = clients[\'local\'].chat.completions.create(
                    model=clients[\'local_model\'],
                    messages=messages,
                    max_tokens=200,
                    temperature=0.7
                )
                source_tag = "Local (Fallback)"
            else:
                return "Error: No models available", 0, "Error", reason
        
        end_time = time.time()
        response_time = end_time - start_time
        
        content = response.choices[0].message.content
        
        return content, response_time, source_tag, reason
        
    except Exception as e:
        return f"Error: {str(e)}", 0, "Error", reason

# Initialize clients
clients = initialize_clients()

# App header
st.title("🤖 Hybrid AI Chatbot")
st.markdown("**Intelligent routing between local and cloud models**")

# Sidebar with stats and info
with st.sidebar:
    st.header("📊 System Status")
    
    # Model availability
    st.subheader("Model Availability")
    if clients[\'local\']:
        st.success("🟢 Local Model: Ready")
    else:
        st.error("🔴 Local Model: Unavailable")
    
    if clients[\'azure\']:
        st.success("🟢 Cloud Model: Ready")
    else:
        st.error("🔴 Cloud Model: Unavailable")
    
    # Routing statistics
    st.subheader("Routing Statistics")
    total_queries = st.session_state.routing_stats[\'local\'] + st.session_state.routing_stats[\'cloud\']
    if total_queries > 0:
        local_pct = (st.session_state.routing_stats[\'local\'] / total_queries) * 100
        cloud_pct = (st.session_state.routing_stats[\'cloud\'] / total_queries) * 100
        st.metric("Local Queries", st.session_state.routing_stats[\'local\'], f"{local_pct:.1f}%")
        st.metric("Cloud Queries", st.session_state.routing_stats[\'cloud\'], f"{cloud_pct:.1f}%")
    else:
        st.info("No queries processed yet")
    
    # Clear conversation
    if st.button("🗑️ Clear Conversation"):
        st.session_state.messages = []
        st.session_state.routing_stats = {\'local\': 0, \'cloud\': 0}
        st.rerun()

# Main chat interface
st.subheader("💬 Chat Interface")

# Display chat messages
for message in st.session_state.messages:
    with st.chat_message(message[\'role\']):
        if message[\'role\'] == \'assistant\':
            # Show response with source and timing
            st.markdown(message[\'content\'])
            if \'source\' in message and \'time\' in message:
                st.caption(f"Source: {message[\'source\']} | Response time: {message[\'time\']:.3f}s")
        else:
            st.markdown(message[\'content\'])

# Chat input
if prompt := st.chat_input("Ask me anything..."):
    # Add user message to chat
    st.session_state.messages.append({\'role\': \'user\', \'content\': prompt})
    
    # Display user message
    with st.chat_message(\'user\'):
        st.markdown(prompt)
    
    # Generate and display assistant response
    with st.chat_message(\'assistant\'):
        with st.spinner(\'Thinking...\'):
            # Convert messages to format expected by route_and_respond
            chat_history = [{\'role\': msg[\'role\'], \'content\': msg[\'content\']} 
                          for msg in st.session_state.messages[:-1] 
                          if msg[\'role\'] != \'assistant\' or \'content\' in msg]
            
            response, response_time, source, reason = route_and_respond(prompt, chat_history)
            
            # Display response
            st.markdown(response)
            st.caption(f"Source: {source} | Response time: {response_time:.3f}s")
            
            # Update routing statistics
            if source.lower().startswith(\'local\'):
                st.session_state.routing_stats[\'local\'] += 1
            elif source.lower().startswith(\'cloud\'):
                st.session_state.routing_stats[\'cloud\'] += 1
            
            # Add assistant response to chat
            st.session_state.messages.append({
                \'role\': \'assistant\', 
                \'content\': response,
                \'source\': source,
                \'time\': response_time,
                \'reason\': reason
            })

# Example queries
st.subheader("💡 Try These Examples")
col1, col2 = st.columns(2)

with col1:
    st.markdown("**Simple Queries (Local)**")
    example_simple = [
        "Hi there!",
        "What is 15 + 27?",
        "What is the capital of France?"
    ]
    for example in example_simple:
        if st.button(f"💬 {example}", key=f"simple_{example}"):
            # Trigger the example by setting it as the next input
            st.session_state[\'next_input\'] = example

with col2:
    st.markdown("**Complex Queries (Cloud)**")
    example_complex = [
        "Analyze the benefits of hybrid AI",
        "Write a summary of machine learning",
        "Compare local vs cloud computing"
    ]
    for example in example_complex:
        if st.button(f"☁️ {example}", key=f"complex_{example}"):
            # Trigger the example by setting it as the next input
            st.session_state[\'next_input\'] = example

# Footer
st.markdown("---")
st.markdown("**Hybrid AI Chatbot** - Demonstrating intelligent routing between local and cloud models")
'''

# # Write the Streamlit app to file
app_path = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath('__file__'))), 'app.py')
# with open(app_path, 'w', encoding='utf-8') as f:
#     f.write(streamlit_app_code)

print(f"✅ Streamlit app created: {app_path}")
print("To run the app, use: streamlit run app.py")

## Step 7.6: Test the Application Setup

In [None]:
# Test that all components work together
test_queries = [
    "Hello!",
    "What is 25 * 4?",
    "Analyze the benefits of hybrid AI systems"
]

print("🧪 Testing the chat system:")
print("=" * 50)

for query in test_queries:
    print(f"\nUser: {query}")
    
    response, response_time, source, reason = route_and_respond(query)
    
    print(f"Assistant ({source}): {response[:100]}{'...' if len(response) > 100 else ''}")
    print(f"⏱️ Time: {response_time:.3f}s | Reason: {reason}")
    print("-" * 30)

print("\n✅ Chat system test completed!")

## Step 7.7: Launch Instructions and Usage Guide

In [None]:
print("🚀 Lab 7 Complete! Here's how to use your Hybrid AI Chatbot:")
print("="*60)

print("\n📂 Files Created:")
print(f"   - app.py (Streamlit application)")
print(f"   - lab7_frontend_chat_interface.ipynb (this notebook)")

print("\n🔧 To Launch the Web Interface:")
print("   1. Open a terminal in the workshop directory")
print("   2. Run: streamlit run app.py")
print("   3. The app will open in your browser automatically")

print("\n💬 Using the Chat Interface:")
print("   • Type simple questions (greetings, math) → Fast local responses")
print("   • Ask complex queries (analysis, summaries) → Cloud processing")
print("   • Watch the sidebar for routing statistics")
print("   • See response times and source transparency")

print("\n🎯 Demo Scenarios for Stakeholders:")
print("   1. Start with: 'Hello!' → Shows instant local response")
print("   2. Ask: 'What is 15 + 27?' → Math handled locally")
print("   3. Try: 'Analyze the benefits of hybrid AI' → Complex cloud processing")
print("   4. Follow up: 'Summarize that in one sentence' → Context maintained")

print("\n📊 Features Demonstrated:")
print("   ✅ Intelligent routing between local and cloud")
print("   ✅ Transparent source indication")
print("   ✅ Performance monitoring and statistics")
print("   ✅ Conversation continuity across models")
print("   ✅ Real-time response time tracking")
print("   ✅ Fallback mechanisms for reliability")

print("\n🎉 Your hybrid AI chatbot is ready for demonstration!")
print("   Use this to show stakeholders how local + cloud AI can work together seamlessly.")

## 🎉 Lab 7 Complete!

### What You've Accomplished:
- ✅ Created a professional Streamlit web interface for the hybrid chatbot
- ✅ Integrated all previous lab components into a cohesive user experience
- ✅ Added real-time routing statistics and performance monitoring
- ✅ Implemented transparent source indication for user trust
- ✅ Built example scenarios for stakeholder demonstrations
- ✅ Created a production-ready frontend for the hybrid AI system

### Key Features of the Web Interface:
1. **Clean Chat Interface**: Familiar chat UI with message history
2. **Routing Transparency**: Clear indication of local vs cloud responses
3. **Performance Metrics**: Real-time response times and statistics
4. **System Status**: Live monitoring of model availability
5. **Example Queries**: Guided demonstrations for different complexity levels
6. **Conversation Management**: Clear history and restart functionality

### Demonstration Value:
**For Stakeholders**: The web interface provides a tangible way to experience the hybrid AI system's benefits:
- **Speed**: Instant responses for simple queries (local)
- **Quality**: Sophisticated answers for complex tasks (cloud)
- **Transparency**: Clear source indication builds trust
- **Reliability**: Fallback mechanisms ensure system availability
- **Intelligence**: Smart routing optimizes user experience

### Workshop Conclusion:
You now have a complete **hybrid LLM chatbot system** that demonstrates:
- Intelligent routing between on-device and cloud models
- Seamless user experience with transparent processing
- Performance optimization for both speed and quality
- Production-ready architecture with monitoring and fallbacks

**Next Steps**: Use this system to gather feedback, refine routing logic, and explore additional capabilities like device-specific functions or enhanced local model fine-tuning.

The hybrid AI future is here – fast, smart, and transparent! 🚀

## 🔧 Troubleshooting: Fixed Streamlit App

If you're getting a 400 error, the issue is likely with the Azure OpenAI configuration. I've created a fixed version with better error handling and debugging.

In [None]:
# Copy the fixed Streamlit app to replace the original
import shutil

# Copy the fixed app
try:
    shutil.copy('app_fixed.py', 'app.py')
    print("✅ Fixed Streamlit app copied to app.py")
    print("\n🔧 Key fixes applied:")
    print("   - Better error handling for Azure client")
    print("   - Debug information in sidebar")
    print("   - Improved environment variable checking")
    print("   - More detailed error messages")
    print("\n🚀 Try running: streamlit run app.py")
    print("   Check the sidebar for debug information if errors occur")
except Exception as e:
    print(f"❌ Failed to copy fixed app: {e}")
    print("   You can manually run: streamlit run app_fixed.py")

In [None]:
# Debug: Check environment variables and local endpoint
import os

print("🔍 Debugging local model configuration:")
print(f"LOCAL_MODEL_ENDPOINT: {os.environ.get('LOCAL_MODEL_ENDPOINT', 'Not set')}")
print(f"LOCAL_MODEL_NAME: {os.environ.get('LOCAL_MODEL_NAME', 'Not set')}")

# Test local endpoint directly
try:
    import requests
    local_endpoint = os.environ.get("LOCAL_MODEL_ENDPOINT", "http://localhost:59413")
    print(f"\n🧪 Testing local endpoint: {local_endpoint}")
    
    # Test health endpoint
    health_response = requests.get(f"{local_endpoint}/health", timeout=5)
    print(f"Health check status: {health_response.status_code}")
    
    # Test models endpoint
    models_response = requests.get(f"{local_endpoint}/v1/models", timeout=5)
    print(f"Models endpoint status: {models_response.status_code}")
    if models_response.status_code == 200:
        models_data = models_response.json()
        print(f"Available models: {[model['id'] for model in models_data.get('data', [])]}")
    
except Exception as e:
    print(f"❌ Local endpoint test failed: {e}")
    print("   This explains the 400 error - local model server may not be running")

In [None]:
# Fix the local model name mismatch
print("\n🔧 Fixing local model configuration...")

# Update the clients with the correct model name
def fix_initialize_clients():
    """Initialize both local and Azure OpenAI clients with correct model names."""
    clients = {'local': None, 'azure': None}
    
    # Local client setup with correct model name
    try:
        LOCAL_ENDPOINT = os.environ.get("LOCAL_MODEL_ENDPOINT", "http://localhost:59413")
        # Use the actual model name from the server
        LOCAL_MODEL = "Phi-3.5-mini-instruct-generic-cpu"  # Fixed model name
        
        clients['local'] = OpenAI(
            base_url=f"{LOCAL_ENDPOINT}/v1",
            api_key="not-needed"
        )
        clients['local_model'] = LOCAL_MODEL
        print(f"✅ Local client fixed with model: {LOCAL_MODEL}")
    except Exception as e:
        print(f"❌ Local client failed: {e}")
    
    # Azure client setup (should already be working)
    try:
        AZURE_OPENAI_ENDPOINT = os.environ["AZURE_OPENAI_ENDPOINT"]
        AZURE_OPENAI_KEY = os.environ["AZURE_OPENAI_KEY"]
        AZURE_OPENAI_DEPLOYMENT = os.environ["AZURE_DEPLOYMENT_NAME"]
        AZURE_OPENAI_API_VERSION = os.environ["AZURE_OPENAI_API_VERSION"]

        if all([AZURE_OPENAI_ENDPOINT, AZURE_OPENAI_KEY, AZURE_OPENAI_DEPLOYMENT, AZURE_OPENAI_API_VERSION]):
            clients['azure'] = AzureOpenAI(
                api_key=AZURE_OPENAI_KEY,
                api_version=AZURE_OPENAI_API_VERSION,
                azure_endpoint=AZURE_OPENAI_ENDPOINT
            )
            clients['azure_model'] = AZURE_OPENAI_DEPLOYMENT
            print(f"✅ Azure client confirmed: {AZURE_OPENAI_DEPLOYMENT}")
        else:
            print("❌ Azure configuration incomplete")
    except Exception as e:
        print(f"❌ Azure client failed: {e}")
    
    return clients

# Update the global clients variable
clients = fix_initialize_clients()
print(f"\n📊 Updated clients: {clients.keys()}")
print(f"Local model: {clients.get('local_model', 'Not available')}")
print(f"Azure model: {clients.get('azure_model', 'Not available')}")

In [None]:
# Test the fixed chat system
test_queries_fixed = [
    "Hello!",
    "What is 25 * 4?", 
    "Analyze the benefits of hybrid AI systems"
]

print("🧪 Testing FIXED chat system:")
print("=" * 50)

for query in test_queries_fixed:
    print(f"\nUser: {query}")
    
    response, response_time, source, reason = route_and_respond(query)
    
    print(f"Assistant ({source}): {response[:100]}{'...' if len(response) > 100 else ''}")
    print(f"⏱️ Time: {response_time:.3f}s | Reason: {reason}")
    print("-" * 30)

print("\n✅ Fixed chat system test completed!")