# FM-038 Phase 2: Interactive Chat Flow Debugging Notebook

**Date:** 2025-10-09  
**Priority:** P0 - Critical Investigation  
**Status:** Phase 2 - Interactive Debugging  

---

## Overview

This notebook provides an interactive environment for debugging the complete chat flow from authentication through response generation. Each cell can be executed independently to analyze specific aspects of the system.

### Investigation Scope

1. **Authentication Flow** - Login, JWT tokens, session management
2. **Request Processing** - Endpoint handling, validation, parsing
3. **Agent Orchestration** - Agent selection, function calls, coordination
4. **RAG Operations** - Embedding generation, database queries, similarity
5. **Response Generation** - LLM processing, formatting, delivery

### Key Features

- **Function Input/Output Analysis** - Detailed logging of all function parameters and returns
- **Performance Profiling** - Timing analysis for bottleneck identification
- **Error Detection** - Silent failure identification and error propagation tracking
- **Rich Visualization** - Charts and graphs for data analysis

---

## Prerequisites

- Phase 1 investigation script completed
- Access to production environment
- Valid test user credentials

---


## Cell 1: Setup and Environment Configuration

This cell:
- Imports required dependencies
- Loads environment variables
- Configures comprehensive logging
- Sets up test credentials
- Initializes data structures


In [None]:
import asyncio
import aiohttp
import json
import logging
import sys
import time
import os
from typing import Dict, Any, List, Optional
from datetime import datetime
from dataclasses import dataclass, field
from dotenv import load_dotenv
import traceback
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display, HTML, Markdown

# Set up plotting style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)

# Load environment variables
load_dotenv('.env.production')

# Configure comprehensive logging
logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s [%(levelname)8s] %(name)s - %(message)s',
    handlers=[
        logging.StreamHandler(sys.stdout)
    ]
)

logger = logging.getLogger(__name__)

# Test credentials
TEST_USER_EMAIL = "sendaqmail@gmail.com"
TEST_USER_PASSWORD = "xasdez-katjuc-zyttI2"
TEST_USER_ID = "cae3b3ec-b355-4509-bd4e-0f7da8cb2858"

# API Configuration
API_BASE_URLS = [
    os.getenv("PRODUCTION_API_URL", "").rstrip('/'),
    "https://insurance-navigator-3o99.onrender.com",
    "http://localhost:8000"
]

# Remove empty strings
API_BASE_URLS = [url for url in API_BASE_URLS if url]

# Display configuration
display(Markdown("### ✅ Configuration Loaded Successfully"))
display(HTML(f"""
<table style="width:100%; border: 1px solid #ddd;">
    <tr><th style="text-align:left; background-color:#f2f2f2;">Configuration</th><th style="text-align:left; background-color:#f2f2f2;">Value</th></tr>
    <tr><td>Test User Email</td><td>{TEST_USER_EMAIL}</td></tr>
    <tr><td>Test User ID</td><td>{TEST_USER_ID}</td></tr>
    <tr><td>API Endpoints</td><td>{len(API_BASE_URLS)} configured</td></tr>
    <tr><td>Logging Level</td><td>DEBUG</td></tr>
</table>
"""))

print("\n✅ Environment setup complete!")


## Cell 2: Data Classes and Helper Functions

Define data structures for tracking function calls, metrics, and investigation results.


In [None]:
@dataclass
class FunctionCall:
    """Track a function call with inputs/outputs"""
    name: str
    start_time: float
    end_time: Optional[float] = None
    inputs: Dict[str, Any] = field(default_factory=dict)
    outputs: Any = None
    error: Optional[str] = None
    duration_ms: Optional[float] = None
    
    def complete(self, outputs: Any = None, error: Optional[str] = None):
        """Mark function call as complete"""
        self.end_time = time.time()
        self.duration_ms = (self.end_time - self.start_time) * 1000
        self.outputs = outputs
        self.error = error

@dataclass
class InvestigationMetrics:
    """Track metrics for the complete investigation"""
    total_requests: int = 0
    successful_requests: int = 0
    failed_requests: int = 0
    function_calls: List[FunctionCall] = field(default_factory=list)
    start_time: float = field(default_factory=time.time)
    
    def add_function_call(self, func_call: FunctionCall):
        """Add a function call to tracking"""
        self.function_calls.append(func_call)
    
    def get_summary(self) -> Dict[str, Any]:
        """Get investigation summary"""
        total_duration = time.time() - self.start_time
        return {
            "total_duration_seconds": total_duration,
            "total_requests": self.total_requests,
            "successful_requests": self.successful_requests,
            "failed_requests": self.failed_requests,
            "function_calls": len(self.function_calls),
            "average_function_duration_ms": sum(fc.duration_ms for fc in self.function_calls if fc.duration_ms) / len(self.function_calls) if self.function_calls else 0
        }

# Initialize global metrics
metrics = InvestigationMetrics()
session = None
access_token = None
user_data = None
api_base_url = None

print("✅ Data structures initialized!")


## Cell 3: API Endpoint Discovery

Find a working API endpoint from the configured list. This tests connectivity to each endpoint's health check.


In [None]:
async def find_working_api_endpoint() -> Optional[str]:
    """Find a working API endpoint from the list"""
    global api_base_url, session
    
    display(Markdown("### 🔍 Testing API Endpoints"))
    
    timeout = aiohttp.ClientTimeout(total=120)
    session = aiohttp.ClientSession(timeout=timeout)
    
    results = []
    
    for base_url in API_BASE_URLS:
        logger.info(f"Testing endpoint: {base_url}")
        try:
            health_url = f"{base_url}/health"
            start = time.time()
            async with session.get(health_url, timeout=aiohttp.ClientTimeout(total=10)) as response:
                duration = (time.time() - start) * 1000
                if response.status == 200:
                    logger.info(f"✅ Found working endpoint: {base_url}")
                    api_base_url = base_url
                    results.append({
                        'endpoint': base_url,
                        'status': '✅ Working',
                        'response_time': f"{duration:.2f}ms"
                    })
                    break
                else:
                    logger.warning(f"⚠️  Endpoint responded with status {response.status}")
                    results.append({
                        'endpoint': base_url,
                        'status': f'⚠️  Status {response.status}',
                        'response_time': f"{duration:.2f}ms"
                    })
        except Exception as e:
            logger.warning(f"❌ Endpoint not accessible: {str(e)}")
            results.append({
                'endpoint': base_url,
                'status': f'❌ Error: {str(e)[:50]}',
                'response_time': 'N/A'
            })
    
    # Display results table
    df = pd.DataFrame(results)
    display(HTML(df.to_html(index=False, escape=False)))
    
    if api_base_url:
        display(Markdown(f"### ✅ Active API Endpoint: `{api_base_url}`"))
        return api_base_url
    else:
        display(Markdown("### ❌ No working API endpoint found!"))
        return None

# Run endpoint discovery
await find_working_api_endpoint()


## Cell 4: Authentication Flow Analysis

**Step 1: Authentication**

This cell:
- Authenticates with the test user credentials
- Obtains JWT access token
- Logs authentication details
- Tracks authentication performance

**Function Analysis:**
- **Input:** Email, password
- **Output:** JWT token, user data
- **Side Effects:** Session state updated


In [None]:
async def authenticate() -> bool:
    """Authenticate and obtain JWT token"""
    global access_token, user_data, metrics
    
    display(Markdown("### 🔐 Authentication Flow"))
    
    func_call = FunctionCall(
        name="authenticate",
        start_time=time.time(),
        inputs={
            "email": TEST_USER_EMAIL,
            "password": "***REDACTED***"
        }
    )
    
    try:
        login_url = f"{api_base_url}/login"
        logger.info(f"🔐 Attempting login at: {login_url}")
        
        payload = {
            "email": TEST_USER_EMAIL,
            "password": TEST_USER_PASSWORD
        }
        
        async with session.post(login_url, json=payload) as response:
            response_text = await response.text()
            
            if response.status == 200:
                auth_data = json.loads(response_text)
                access_token = auth_data.get("access_token")
                user_data = auth_data.get("user", {})
                
                func_call.complete(outputs={
                    "success": True,
                    "user_id": user_data.get('id'),
                    "token_present": bool(access_token)
                })
                metrics.add_function_call(func_call)
                
                # Display success details
                display(HTML(f"""
                <div style="padding: 15px; background-color: #d4edda; border: 1px solid #c3e6cb; border-radius: 5px;">
                    <h4 style="color: #155724; margin-top: 0;">✅ Authentication Successful</h4>
                    <table style="width:100%;">
                        <tr><td><strong>User ID:</strong></td><td>{user_data.get('id')}</td></tr>
                        <tr><td><strong>Email:</strong></td><td>{user_data.get('email')}</td></tr>
                        <tr><td><strong>Token Length:</strong></td><td>{len(access_token) if access_token else 0} characters</td></tr>
                        <tr><td><strong>Duration:</strong></td><td>{func_call.duration_ms:.2f}ms</td></tr>
                    </table>
                </div>
                """))
                return True
            else:
                error_msg = f"Login failed with status {response.status}: {response_text}"
                func_call.complete(error=error_msg)
                metrics.add_function_call(func_call)
                
                display(HTML(f"""
                <div style="padding: 15px; background-color: #f8d7da; border: 1px solid #f5c6cb; border-radius: 5px;">
                    <h4 style="color: #721c24; margin-top: 0;">❌ Authentication Failed</h4>
                    <p><strong>Status:</strong> {response.status}</p>
                    <p><strong>Error:</strong> {response_text[:200]}</p>
                </div>
                """))
                return False
                
    except Exception as e:
        error_msg = f"Authentication exception: {str(e)}"
        func_call.complete(error=error_msg)
        metrics.add_function_call(func_call)
        
        display(HTML(f"""
        <div style="padding: 15px; background-color: #f8d7da; border: 1px solid #f5c6cb; border-radius: 5px;">
            <h4 style="color: #721c24; margin-top: 0;">❌ Authentication Exception</h4>
            <pre>{traceback.format_exc()}</pre>
        </div>
        """))
        return False

# Run authentication
auth_success = await authenticate()
if not auth_success:
    raise RuntimeError("Authentication failed - cannot proceed with chat tests")


## Cell 5: Single Chat Request Analysis

**Step 2: Chat Request**

This cell sends a single test message and analyzes the complete request/response flow.

**Function Analysis:**
- **Input:** Message text, conversation ID, user context
- **Output:** Response text, metadata
- **Side Effects:** RAG operations, agent function calls


In [None]:
async def send_chat_message(message: str, conversation_id: str = "") -> Optional[Dict[str, Any]]:
    """Send a chat message and analyze the response"""
    global metrics
    
    display(Markdown(f"### 💬 Chat Request: `{message[:80]}...`"))
    
    func_call = FunctionCall(
        name="send_chat_message",
        start_time=time.time(),
        inputs={
            "message": message[:200],
            "message_length": len(message),
            "conversation_id": conversation_id,
            "user_id": user_data.get('id') if user_data else None
        }
    )
    
    try:
        chat_url = f"{api_base_url}/chat"
        
        headers = {
            "Content-Type": "application/json",
            "Authorization": f"Bearer {access_token}",
            "Accept": "application/json"
        }
        
        payload = {
            "message": message,
            "conversation_id": conversation_id,
            "user_language": "en",
            "context": {}
        }
        
        request_start = time.time()
        
        async with session.post(chat_url, json=payload, headers=headers) as response:
            request_duration = (time.time() - request_start) * 1000
            response_text = await response.text()
            
            if response.status == 200:
                response_data = json.loads(response_text)
                
                func_call.complete(outputs={
                    "success": True,
                    "response_length": len(response_data.get('text', '')),
                    "request_duration_ms": request_duration
                })
                metrics.add_function_call(func_call)
                metrics.successful_requests += 1
                
                # Display response analysis
                display(HTML(f"""
                <div style="padding: 15px; background-color: #d4edda; border: 1px solid #c3e6cb; border-radius: 5px;">
                    <h4 style="color: #155724; margin-top: 0;">✅ Chat Request Successful</h4>
                    <table style="width:100%;">
                        <tr><td><strong>Request Duration:</strong></td><td>{request_duration:.2f}ms</td></tr>
                        <tr><td><strong>Response Length:</strong></td><td>{len(response_data.get('text', ''))} characters</td></tr>
                        <tr><td><strong>Conversation ID:</strong></td><td>{response_data.get('conversation_id', 'N/A')}</td></tr>
                    </table>
                    <h5>Response Preview:</h5>
                    <div style="padding: 10px; background-color: white; border-radius: 3px; margin-top: 10px;">
                        {response_data.get('text', '')[:500]}{'...' if len(response_data.get('text', '')) > 500 else ''}
                    </div>
                </div>
                """))
                return response_data
            else:
                error_msg = f"Chat request failed with status {response.status}: {response_text}"
                func_call.complete(error=error_msg)
                metrics.add_function_call(func_call)
                metrics.failed_requests += 1
                
                display(HTML(f"""
                <div style="padding: 15px; background-color: #f8d7da; border: 1px solid #f5c6cb; border-radius: 5px;">
                    <h4 style="color: #721c24; margin-top: 0;">❌ Chat Request Failed</h4>
                    <p><strong>Status:</strong> {response.status}</p>
                    <p><strong>Error:</strong> {response_text[:200]}</p>
                </div>
                """))
                return None
                
    except Exception as e:
        error_msg = f"Chat request exception: {str(e)}"
        func_call.complete(error=error_msg)
        metrics.add_function_call(func_call)
        metrics.failed_requests += 1
        
        display(HTML(f"""
        <div style="padding: 15px; background-color: #f8d7da; border: 1px solid #f5c6cb; border-radius: 5px;">
            <h4 style="color: #721c24; margin-top: 0;">❌ Chat Request Exception</h4>
            <pre>{traceback.format_exc()}</pre>
        </div>
        """))
        return None

# Test with a single message
test_message = "What mental health services are covered under my insurance plan?"
metrics.total_requests += 1
response = await send_chat_message(test_message)


## Cell 6: Multiple Chat Requests

Send multiple test messages to analyze consistency and performance patterns.


In [None]:
display(Markdown("### 📊 Multiple Chat Request Analysis"))

test_messages = [
    "Does my policy cover ambulance services?",
    "What are the copay requirements for primary care visits?"
]

responses = []

for i, message in enumerate(test_messages, 1):
    display(Markdown(f"#### Test Message {i}/{len(test_messages)}"))
    metrics.total_requests += 1
    response = await send_chat_message(message)
    responses.append(response)
    
    if i < len(test_messages):
        logger.info("⏳ Waiting 5 seconds before next request...")
        await asyncio.sleep(5)

display(Markdown("### ✅ All test messages completed!"))


## Cell 7: Function Call Timeline Visualization

Visualize the timeline of function calls with duration analysis.


In [None]:
display(Markdown("### 📈 Function Call Timeline Analysis"))

if metrics.function_calls:
    # Create DataFrame from function calls
    func_data = []
    for fc in metrics.function_calls:
        func_data.append({
            'Function': fc.name,
            'Duration (ms)': fc.duration_ms if fc.duration_ms else 0,
            'Status': '✅ Success' if not fc.error else '❌ Error',
            'Has Error': bool(fc.error)
        })
    
    df_funcs = pd.DataFrame(func_data)
    
    # Display function call table
    display(Markdown("#### Function Call Summary"))
    display(HTML(df_funcs.to_html(index=False, escape=False)))
    
    # Create visualization
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
    
    # Duration bar chart
    colors = ['red' if err else 'green' for err in df_funcs['Has Error']]
    ax1.barh(range(len(df_funcs)), df_funcs['Duration (ms)'], color=colors, alpha=0.7)
    ax1.set_yticks(range(len(df_funcs)))
    ax1.set_yticklabels([f"{i+1}. {name}" for i, name in enumerate(df_funcs['Function'])])
    ax1.set_xlabel('Duration (ms)')
    ax1.set_title('Function Call Durations')
    ax1.grid(True, alpha=0.3)
    
    # Success/Error pie chart
    success_count = len([fc for fc in metrics.function_calls if not fc.error])
    error_count = len([fc for fc in metrics.function_calls if fc.error])
    ax2.pie([success_count, error_count], labels=['Success', 'Error'], 
            autopct='%1.1f%%', colors=['green', 'red'], startangle=90)
    ax2.set_title('Function Call Success Rate')
    
    plt.tight_layout()
    plt.show()
else:
    display(Markdown("⚠️ No function calls recorded yet."))


## Cell 8: Performance Metrics Analysis

Analyze overall system performance and identify bottlenecks.


## Cell 9: Production Log Analysis

Guidance for analyzing production logs to understand RAG operations and backend behavior.


In [None]:
display(Markdown("### 📊 Production Log Analysis"))

display(HTML("""
<div style="padding: 15px; background-color: #d1ecf1; border: 1px solid #bee5eb; border-radius: 5px;">
    <h4 style="color: #0c5460; margin-top: 0;">📝 How to Analyze Production Logs</h4>
    <p>To complete the investigation, check production logs in the Render dashboard:</p>
    <ol>
        <li><strong>Service ID:</strong> srv-d0v2nqvdiees73cejf0g</li>
        <li><strong>Look for:</strong>
            <ul>
                <li><code>RAG Operation Started</code> logs with operation IDs</li>
                <li><code>RAG Operation SUCCESS/FAILED</code> with duration and chunk counts</li>
                <li><code>Chunks:X/Y</code> format showing retrieved vs available chunks</li>
                <li><code>CHECKPOINT A-H</code> logs showing execution flow</li>
                <li><code>PRE-EMBEDDING</code> and <code>POST-EMBEDDING</code> logs</li>
            </ul>
        </li>
        <li><strong>Key Questions:</strong>
            <ul>
                <li>Are embeddings being generated successfully?</li>
                <li>Are database queries returning results?</li>
                <li>What are the actual similarity scores?</li>
                <li>Are there any silent failures or errors?</li>
            </ul>
        </li>
    </ol>
</div>
"""))


## Cell 10: Function Input/Output Deep Dive

Detailed analysis of function inputs and outputs for debugging.


In [None]:
display(Markdown("### 🔬 Function Input/Output Analysis"))

if metrics.function_calls:
    for i, fc in enumerate(metrics.function_calls, 1):
        status_icon = "✅" if not fc.error else "❌"
        duration_str = f"{fc.duration_ms:.2f}ms" if fc.duration_ms else "N/A"
        
        display(Markdown(f"#### {i}. {status_icon} `{fc.name}` - {duration_str}"))
        
        # Inputs
        if fc.inputs:
            display(Markdown("**Inputs:**"))
            input_table = "<table style='width:100%; border: 1px solid #ddd;'>"
            for key, value in fc.inputs.items():
                value_str = str(value)[:200]
                input_table += f"<tr><td style='padding: 5px;'><strong>{key}</strong></td><td style='padding: 5px;'>{value_str}</td></tr>"
            input_table += "</table>"
            display(HTML(input_table))
        
        # Outputs
        if fc.outputs and not fc.error:
            display(Markdown("**Outputs:**"))
            output_str = str(fc.outputs)[:500]
            display(HTML(f"<pre style='background-color: #f5f5f5; padding: 10px; border-radius: 5px;'>{output_str}</pre>"))
        
        # Errors
        if fc.error:
            display(HTML(f"""
            <div style="padding: 10px; background-color: #f8d7da; border: 1px solid #f5c6cb; border-radius: 5px; margin: 10px 0;">
                <strong style="color: #721c24;">Error:</strong> {fc.error[:200]}
            </div>
            """))
        
        display(Markdown("---"))
else:
    display(Markdown("⚠️ No function calls recorded yet."))


## Cell 11: Save Investigation Report

Save the complete investigation results to a JSON file for further analysis.


In [None]:
display(Markdown("### 💾 Saving Investigation Report"))

timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
report_file = f"chat_flow_investigation_report_{timestamp}.json"

report = {
    "timestamp": datetime.now().isoformat(),
    "test_user": TEST_USER_EMAIL,
    "test_user_id": TEST_USER_ID,
    "api_base_url": api_base_url,
    "metrics": metrics.get_summary(),
    "function_calls": [
        {
            "name": fc.name,
            "duration_ms": fc.duration_ms,
            "inputs": fc.inputs,
            "outputs": str(fc.outputs)[:500] if fc.outputs else None,
            "error": fc.error
        }
        for fc in metrics.function_calls
    ]
}

with open(report_file, 'w') as f:
    json.dump(report, f, indent=2)

display(HTML(f"""
<div style="padding: 15px; background-color: #d4edda; border: 1px solid #c3e6cb; border-radius: 5px;">
    <h4 style="color: #155724; margin-top: 0;">✅ Report Saved Successfully</h4>
    <p><strong>File:</strong> <code>{report_file}</code></p>
    <p>This report contains all investigation data and can be analyzed further.</p>
</div>
"""))


## Cell 12: Cleanup

Close connections and clean up resources.


In [None]:
display(Markdown("### 🧹 Cleanup"))

if session:
    await session.close()
    logger.info("✅ HTTP session closed")

display(Markdown("### ✅ Investigation Complete!"))

display(HTML("""
<div style="padding: 15px; background-color: #d1ecf1; border: 1px solid #bee5eb; border-radius: 5px;">
    <h4 style="color: #0c5460; margin-top: 0;">📝 Next Steps</h4>
    <ol>
        <li>Review the investigation report JSON file</li>
        <li>Check production logs in Render dashboard</li>
        <li>Look for RAG operation logs and CHECKPOINT entries</li>
        <li>Proceed to Phase 3: Analysis & Documentation</li>
    </ol>
</div>
"""))


---

## Summary

This notebook provides a complete interactive environment for debugging the chat flow. Each cell can be:
- **Executed independently** for focused analysis
- **Re-run** to test specific scenarios
- **Modified** to add custom debugging logic
- **Extended** with additional analysis cells

### Key Capabilities

- ✅ Authentication flow analysis
- ✅ Chat request processing
- ✅ Function input/output tracking
- ✅ Performance profiling
- ✅ Error detection and analysis
- ✅ Rich visualization
- ✅ Comprehensive reporting

### Troubleshooting

If issues occur:
1. Check authentication is successful (Cell 4)
2. Verify API endpoint is accessible (Cell 3)
3. Review function call errors (Cell 10)
4. Analyze performance metrics (Cell 8)
5. Check production logs for backend issues (Cell 9)

---

**Phase:** 2 - Interactive Debugging Notebook  
**Status:** ✅ Complete  
**Next Phase:** Phase 3 - Analysis & Documentation  
**Date:** 2025-10-09


In [None]:
display(Markdown("### 📊 Performance Metrics Summary"))

summary = metrics.get_summary()

# Display metrics in a formatted table
display(HTML(f"""
<table style="width:100%; border: 1px solid #ddd;">
    <tr style="background-color: #f2f2f2;">
        <th colspan="2" style="text-align:left; padding: 10px;">Investigation Metrics</th>
    </tr>
    <tr><td style="padding: 8px;"><strong>Total Duration:</strong></td><td>{summary['total_duration_seconds']:.2f}s</td></tr>
    <tr><td style="padding: 8px;"><strong>Total Requests:</strong></td><td>{summary['total_requests']}</td></tr>
    <tr style="background-color: #d4edda;"><td style="padding: 8px;"><strong>Successful Requests:</strong></td><td>{summary['successful_requests']}</td></tr>
    <tr style="background-color: #f8d7da;"><td style="padding: 8px;"><strong>Failed Requests:</strong></td><td>{summary['failed_requests']}</td></tr>
    <tr><td style="padding: 8px;"><strong>Function Calls:</strong></td><td>{summary['function_calls']}</td></tr>
    <tr><td style="padding: 8px;"><strong>Avg Function Duration:</strong></td><td>{summary['average_function_duration_ms']:.2f}ms</td></tr>
</table>
"""))

# Create performance visualization
if metrics.function_calls:
    fig, ax = plt.subplots(figsize=(12, 6))
    
    durations = [fc.duration_ms for fc in metrics.function_calls if fc.duration_ms]
    function_names = [fc.name for fc in metrics.function_calls if fc.duration_ms]
    
    ax.plot(range(len(durations)), durations, marker='o', linewidth=2, markersize=8)
    ax.set_xlabel('Function Call Sequence')
    ax.set_ylabel('Duration (ms)')
    ax.set_title('Function Call Performance Over Time')
    ax.grid(True, alpha=0.3)
    
    # Add average line
    avg_duration = sum(durations) / len(durations)
    ax.axhline(y=avg_duration, color='r', linestyle='--', label=f'Average: {avg_duration:.2f}ms')
    ax.legend()
    
    plt.tight_layout()
    plt.show()

# Key observations
display(Markdown("### 🔍 Key Observations"))

observations = []
if summary['failed_requests'] > 0:
    observations.append(f"⚠️ **{summary['failed_requests']} request(s) failed** - Requires investigation")
if summary['successful_requests'] > 0:
    observations.append(f"✅ **{summary['successful_requests']} request(s) succeeded** - System is functional")
if summary['average_function_duration_ms'] > 5000:
    observations.append(f"⚠️ **Average function duration is high** ({summary['average_function_duration_ms']:.2f}ms) - Performance bottleneck detected")

auth_calls = [fc for fc in metrics.function_calls if fc.name == "authenticate"]
if auth_calls and auth_calls[0].error:
    observations.append("❌ **Authentication failed** - Cannot proceed with chat tests")

chat_calls = [fc for fc in metrics.function_calls if fc.name == "send_chat_message"]
if chat_calls:
    successful_chats = [fc for fc in chat_calls if not fc.error]
    if successful_chats:
        observations.append(f"✅ **{len(successful_chats)} chat request(s) completed** - Ready for RAG analysis")

for obs in observations:
    display(Markdown(f"- {obs}"))
