In [1]:
import logging
import os
import pathlib
import sys
import time
import json
import asyncio
from typing import List, Dict, Any, Optional, Union
from dataclasses import dataclass
from datetime import datetime

from dotenv import load_dotenv
from nest_asyncio import apply

# Load environment variables
load_dotenv()
apply()

# Add parent directory to path for imports to handle relative imports
sys.path.append(str(pathlib.Path().absolute().parent))
sys.path.append(str(pathlib.Path().absolute().parent / "src"))

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger("chat_tool_calling_demo")

# Import required components
from src.utils.llm_pipeline import LLMWithTools
from src.tools.retrieve import retrieve_documents, RetrieveRequest
from src.tools.search_product import search_products, ProductSearchRequest

print("Successfully imported all components")
print(f"LLMWithTools class: {LLMWithTools}")
print(f"Retrieve function: {retrieve_documents}")
print(f"Search products function: {search_products}")
print("✅ Updated with streaming functionality!")

  from .autonotebook import tqdm as notebook_tqdm


{"asctime": "2025-10-31 15:18:05,122", "name": "pinecone_index_client", "levelname": "INFO", "message": "Initialized PineconeIndexClient for dense index 'curator-pommeline' (dim: 768, metric: dotproduct)"}


2025-10-31 15:18:05,122 - pinecone_index_client - INFO - Initialized PineconeIndexClient for dense index 'curator-pommeline' (dim: 768, metric: dotproduct)


{"asctime": "2025-10-31 15:18:05,134", "name": "pinecone_vector_store", "levelname": "INFO", "message": "Connected to Pinecone Index container: {'namespaces': {'curator-pommeline-7b1a7bbb': {'vectorCount': 0}, 'curator-pommeline-12fa085f': {'vectorCount': 0}, 'curator-pommeline-f03bab83': {'vectorCount': 0}, 'curator-pommeline': {'vectorCount': 1118}, 'curator-pommeline-a9b4d456': {'vectorCount': 0}, 'pommeline': {'vectorCount': 0}, '': {'vectorCount': 0}}, 'dimension': 768, 'indexFullness': 0.0, 'totalVectorCount': 1118}"}


2025-10-31 15:18:05,134 - pinecone_vector_store - INFO - Connected to Pinecone Index container: {'namespaces': {'curator-pommeline-7b1a7bbb': {'vectorCount': 0}, 'curator-pommeline-12fa085f': {'vectorCount': 0}, 'curator-pommeline-f03bab83': {'vectorCount': 0}, 'curator-pommeline': {'vectorCount': 1118}, 'curator-pommeline-a9b4d456': {'vectorCount': 0}, 'pommeline': {'vectorCount': 0}, '': {'vectorCount': 0}}, 'dimension': 768, 'indexFullness': 0.0, 'totalVectorCount': 1118}


{"asctime": "2025-10-31 15:18:05,136", "name": "cache", "levelname": "INFO", "message": "Started cache cleanup task with 300s interval"}


2025-10-31 15:18:05,136 - cache - INFO - Started cache cleanup task with 300s interval


Successfully imported all components
LLMWithTools class: <class 'src.utils.llm_pipeline.LLMWithTools'>
Retrieve function: <function retrieve_documents at 0x14c5cc8b0>
Search products function: <function search_products at 0x14c5cd3f0>
✅ Updated with streaming functionality!


In [2]:
import traceback

from src.tools.retrieve import retrieve_documents
from src.tools.search_product import search_products

# Tool Function Definitions and Wrappers
def retrieve_knowledge_tool(query: str, top_k: int = 5, search_mode: str = "hybrid", similarity_threshold: float = 0.15) -> Dict[str, Any]:
    """
    Tool function for retrieving knowledge base documents.
    
    Args:
        query: Search query for knowledge base (string)
        top_k: Maximum number of documents to return (default: 5)
        search_mode: Search mode - "semantic", "keyword", or "hybrid" (default: "hybrid")
        similarity_threshold: Minimum similarity threshold (default: 0.15)
    
    Returns:
        Dict containing search results with documents and metadata
    """
    try:        
        # Ensure parameters are of correct type
        if not isinstance(query, str):
            query = str(query) if query is not None else ""
        
        if not isinstance(top_k, int):
            top_k = int(top_k) if top_k is not None else 5
            
        if not isinstance(search_mode, str):
            search_mode = str(search_mode) if search_mode is not None else "hybrid"
            
        if not isinstance(similarity_threshold, (int, float)):
            similarity_threshold = float(similarity_threshold) if similarity_threshold is not None else 0.15
        
        # Call retrieve_documents directly with string query + kwargs
        response = retrieve_documents(
            query=query,  # This must be a string
            top_k=top_k,
            search_mode=search_mode,
            similarity_threshold=similarity_threshold,
            include_scores=True
        )
        
        # Format results for LLM consumption
        results = []
        for doc in response.results:
            results.append({
                "content": doc.content,
                "source_file": doc.source_file,
                "score": doc.score,
                "metadata": doc.metadata
            })
        
        return {
            "query": query,
            "results": results,
            "total_results": len(results),
            "search_metadata": response.search_metadata
        }
        
    except Exception as e:
        print(f"Error in retrieve_knowledge_tool: {e}")
        traceback.print_exc(limit=3)
        return {
            "query": query,
            "results": [],
            "total_results": 0,
            "error": str(e)
        }

def search_products_tool(query: str, category: str = None, min_price: float = None, max_price: float = None, 
                        brand: str = None, limit: int = 10, sort_by: str = "relevance") -> Dict[str, Any]:
    """
    Tool function for searching product inventory.
    
    Args:
        query: Product search query (string)
        category: Filter by product category (string, optional)
        min_price: Minimum price filter (number, optional)
        max_price: Maximum price filter (number, optional)
        brand: Filter by brand (string, optional)
        limit: Maximum number of results (default: 10)
        sort_by: Sort order - "relevance", "price_low", "price_high", "rating" (default: "relevance")
    
    Returns:
        Dict containing product search results
    """
    try:
        
        # Ensure parameters are of correct type
        if not isinstance(query, str):
            query = str(query) if query is not None else ""
            
        if category is not None and not isinstance(category, str):
            category = str(category)
            
        if brand is not None and not isinstance(brand, str):
            brand = str(brand)
            
        if limit is not None and not isinstance(limit, int):
            limit = int(limit) if limit is not None else 10
            
        if sort_by is not None and not isinstance(sort_by, str):
            sort_by = str(sort_by) if sort_by is not None else "relevance"
            
        if min_price is not None and not isinstance(min_price, (int, float)):
            min_price = float(min_price) if min_price is not None else None
            
        if max_price is not None and not isinstance(max_price, (int, float)):
            max_price = float(max_price) if max_price is not None else None
        
        # Call search_products directly with string query + kwargs
        response = search_products(
            query=query,  # This must be a string
            category=category,
            min_price=min_price,
            max_price=max_price,
            brand=brand,
            limit=limit,
            sort_by=sort_by
        )
        
        # Format results for LLM consumption
        products = []
        for product in response.products:
            products.append({
                "id": product.id,
                "name": product.name,
                "description": product.description,
                "price": product.price,
                "brand": product.brand,
                "category": product.category,
                "availability": product.availability,
                "rating": product.rating,
                "specifications": product.specifications
            })
        
        return {
            "query": query,
            "products": products,
            "total_results": len(products),
            "filters_applied": response.filters_applied,
            "search_metadata": response.search_metadata
        }
        
    except Exception as e:
        print(f"Error in search_products_tool: {e}")
        traceback.print_exc(limit=3)
        return {
            "query": query,
            "products": [],
            "total_results": 0,
            "error": str(e)
        }

# Test the tool functions with direct calls
print("Testing tool functions...")

# Test retrieve tool
try:
    print("Testing retrieve_knowledge_tool...")
    retrieve_result = retrieve_knowledge_tool("iPhone features", top_k=3)
    print(f"Retrieve tool test: Found {retrieve_result['total_results']} documents")
    if retrieve_result['results']:
        print(f"First result: {retrieve_result['results'][0]['content'][:100]}...")
    else:
        print("No documents found - this may be expected if no data is ingested")
except Exception as e:
    print(f"Retrieve tool test failed: {e}")

# Test search products tool
try:
    print("Testing search_products_tool...")
    search_result = search_products_tool("iPhone", limit=3)
    print(f"Search products tool test: Found {search_result['total_results']} products")
    if search_result['products']:
        print(f"First product: {search_result['products'][0]['name']} - ${search_result['products'][0]['price']}")
    else:
        print("No products found - this may be expected if no product data is available")
except Exception as e:
    print(f"Search products tool test failed: {e}")

print("\nTool functions are defined (results may vary based on available data).")

Testing tool functions...
Testing retrieve_knowledge_tool...
{"asctime": "2025-10-31 15:19:40,657", "name": "embedder", "levelname": "INFO", "message": "Loading embedding model: google/embeddinggemma-300m"}


2025-10-31 15:19:40,657 - embedder - INFO - Loading embedding model: google/embeddinggemma-300m
2025-10-31 15:19:40,659 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: google/embeddinggemma-300m
2025-10-31 15:19:47,761 - sentence_transformers.SentenceTransformer - INFO - 14 prompts are loaded, with the keys: ['query', 'document', 'BitextMining', 'Clustering', 'Classification', 'InstructionRetrieval', 'MultilabelClassification', 'PairClassification', 'Reranking', 'Retrieval', 'Retrieval-query', 'Retrieval-document', 'STS', 'Summarization']


{"asctime": "2025-10-31 15:19:48,820", "name": "embedder", "levelname": "INFO", "message": "Model loaded successfully. Embedding dimension: 768"}


2025-10-31 15:19:48,820 - embedder - INFO - Model loaded successfully. Embedding dimension: 768


{"asctime": "2025-10-31 15:19:48,964", "name": "bm25_vectorizer", "levelname": "INFO", "message": "Initialized BM25Vectorizer with k1=1.2, b=0.75, fixed_dim=768"}


2025-10-31 15:19:48,964 - bm25_vectorizer - INFO - Initialized BM25Vectorizer with k1=1.2, b=0.75, fixed_dim=768


{"asctime": "2025-10-31 15:19:48,965", "name": "bm25_vectorizer", "levelname": "INFO", "message": "BM25Vectorizer loaded from data/models/bm25_44344f0d.pkl"}


2025-10-31 15:19:48,965 - bm25_vectorizer - INFO - BM25Vectorizer loaded from data/models/bm25_44344f0d.pkl


Retrieve tool test: Found 3 documents
First result: ## All in the family.

### All the must‑haves. All on iPhone.

[Image: Three iPhone 17 devices showi...
Testing search_products_tool...
{"asctime": "2025-10-31 15:19:48,975", "name": "search_product_tool", "levelname": "INFO", "message": "Searching products for query: 'iPhone'"}


2025-10-31 15:19:48,975 - search_product_tool - INFO - Searching products for query: 'iPhone'


{"asctime": "2025-10-31 15:19:49,140", "name": "search_product_tool", "levelname": "INFO", "message": "Found 3 products"}


2025-10-31 15:19:49,140 - search_product_tool - INFO - Found 3 products


Search products tool test: Found 3 products
First product: iPhone 16 Pro - $966.2508157309719

Tool functions are defined (results may vary based on available data).


In [3]:
# Load System Instructions
system_prompt = ""
with open("../prompts/system_instructions.txt", "r") as f:
    system_prompt = f.read()

print("System prompt loaded successfully")
print(f"System prompt length: {len(system_prompt)} characters")

# Define tool schemas for LLM
tools_schema = [
    {
        "name": "retrieve_knowledge",
        "description": "Search the knowledge base for product information, policies, and general information. Use this when you need factual information about products, return policies, or general knowledge.",
        "parameters": {
            "type": "object",
            "properties": {
                "query": {
                    "type": "string",
                    "description": "Search query for the knowledge base"
                },
                "top_k": {
                    "type": "integer",
                    "description": "Maximum number of documents to return (default: 5)",
                    "default": 6
                },
                "search_mode": {
                    "type": "string",
                    "description": "Search mode: 'semantic' for conceptual search, 'keyword' for exact term matching, 'hybrid' for both (default: 'hybrid')",
                    "enum": ["semantic", "keyword", "hybrid"],
                    "default": "hybrid"
                },
                "similarity_threshold": {
                    "type": "number",
                    "description": "Minimum similarity threshold (default: 0.15)",
                    "default": 0.15
                }
            },
            "required": ["query"]
        }
    },
    {
        "name": "search_products",
        "description": "Search the product inventory for specific items with pricing and availability. Use this when looking for specific products to buy or compare.",
        "parameters": {
            "type": "object",
            "properties": {
                "query": {
                    "type": "string",
                    "description": "Product search query"
                },
                "category": {
                    "type": "string",
                    "description": "Filter by product category (e.g., 'Smartphones', 'Laptops', 'Audio')"
                },
                "min_price": {
                    "type": "number",
                    "description": "Minimum price filter"
                },
                "max_price": {
                    "type": "number",
                    "description": "Maximum price filter"
                },
                "brand": {
                    "type": "string",
                    "description": "Filter by brand (e.g., 'Apple', 'Samsung', 'Sony')"
                },
                "limit": {
                    "type": "integer",
                    "description": "Maximum number of results (default: 10)",
                    "default": 10
                },
                "sort_by": {
                    "type": "string",
                    "description": "Sort order: 'relevance', 'price_low', 'price_high', 'rating'",
                    "enum": ["relevance", "price_low", "price_high", "rating"],
                    "default": "relevance"
                }
            },
            "required": ["query"]
        }
    }
]

print(f"Tool schemas defined: {len(tools_schema)} tools")
for tool in tools_schema:
    print(f"  - {tool['name']}: {tool['description'][:80]}...")

System prompt loaded successfully
System prompt length: 3530 characters
Tool schemas defined: 2 tools
  - retrieve_knowledge: Search the knowledge base for product information, policies, and general informa...
  - search_products: Search the product inventory for specific items with pricing and availability. U...


In [4]:
# Initialize LLM with Tools (Streaming Version)

print("Initializing LLM with Tools (STREAMING)...")
print("Using the real LLMWithTools class with streaming and tool calling capabilities...")

# Create LLM with tools instance using the real class with streaming enabled
llm_with_tools = LLMWithTools(
    system_prompt=system_prompt,
    model="glm-4.5-air",
    tools=tools_schema,
    tool_choice="auto",
    max_timeout_per_request=60,
    stream=True  # Enable streaming!
)

# Register real tool functions
llm_with_tools.register_function("retrieve_knowledge", retrieve_knowledge_tool)
llm_with_tools.register_function("search_products", search_products_tool)

print("LLM with Tools (STREAMING) initialized successfully!")
print(f"Model: {llm_with_tools.model}")
print(f"Streaming enabled: {llm_with_tools.stream}")
print(f"Tools registered: {list(llm_with_tools.available_functions.keys())}")

# Test single message with tools (STREAMING)
print("\n" + "="*50)
print("TESTING SINGLE MESSAGE WITH STREAMING TOOL CALLING")
print("="*50)

test_queries = [
    "What iPhones do you have available under $1000?",
    "Tell me about the iPhone 16 Pro features",
    "Show me products from Apple",
    "What's the difference between the 128GB and 256GB models?"
]

async def test_streaming_with_tools():
    """Test streaming with tool execution."""
    for i, query in enumerate(test_queries, 1):
        print(f"\nTest {i}: {query}")
        print("-" * 40)
        
        start_time = time.time()
        first_token_time = None
        content_chunks = []
        tool_events = []
        
        try:
            # Generate streaming response with tool execution
            async for chunk in llm_with_tools.generate_with_tool_execution_stream(
                user_prompt=query,
                max_retries=2,
                max_tool_iterations=3
            ):
                current_time = time.time()
                
                # Track first token time
                if first_token_time is None and chunk["type"] in ["content", "tool_calls"]:
                    first_token_time = current_time
                
                if chunk["type"] == "content":
                    content_chunks.append(chunk["content"])
                    print(chunk["content"], end="", flush=True)
                
                elif chunk["type"] == "tool_calls":
                    tool_names = [tc["name"] for tc in chunk.get("tool_calls", [])]
                    tool_events.append({"type": "tool_calls", "tools": tool_names})
                    print(f"\n\n[🔧 Tools called: {tool_names}]")
                
                elif chunk["type"] == "tool_execution_start":
                    tool_events.append({"type": "execution_start"})
                    print(f"\n[⚡ {chunk['content']}]")
                
                elif chunk["type"] == "tool_result":
                    tool_events.append({"type": "tool_result", "tool": chunk["tool_name"]})
                    print(f"\n[✅ Tool result: {chunk['tool_name']} - {chunk['status']}]")
                
                elif chunk["type"] == "error":
                    print(f"\n❌ Error: {chunk['content']}")
                    break
            
            end_time = time.time()
            
            # Calculate latencies
            ttft_ms = (first_token_time - start_time) * 1000 if first_token_time else 0
            generation_time_s = end_time - start_time
            
            print(f"\n\n[📊 Latency Metrics]")
            print(f"   Time to First Token (TTFT): {ttft_ms:.2f}ms")
            print(f"   Total Generation Time: {generation_time_s:.2f}s")
            print(f"   Tools used: {len([e for e in tool_events if e['type'] == 'tool_calls'])}")
            print(f"   Tool results received: {len([e for e in tool_events if e['type'] == 'tool_result'])}")
            
        except Exception as e:
            print(f"\nError processing query: {str(e)}")
            import traceback
            print(traceback.format_exc(limit=3))

# Run the streaming test
await test_streaming_with_tools()

print("\nStreaming message testing completed!")
print("✅ Real LLMWithTools class with streaming tool calling is working!")

Initializing LLM with Tools (STREAMING)...
Using the real LLMWithTools class with streaming and tool calling capabilities...
{"asctime": "2025-10-31 15:20:49,363", "name": "src.utils.llm_pipeline", "levelname": "INFO", "message": "Initializing LLM singleton with model: glm-4.5-air, stream: True"}


2025-10-31 15:20:49,363 - src.utils.llm_pipeline - INFO - Initializing LLM singleton with model: glm-4.5-air, stream: True


LLM with Tools (STREAMING) initialized successfully!
Model: glm-4.5-air
Streaming enabled: True
Tools registered: ['retrieve_knowledge', 'search_products']

TESTING SINGLE MESSAGE WITH STREAMING TOOL CALLING

Test 1: What iPhones do you have available under $1000?
----------------------------------------

I'll search for iPhones under $1000 to see what's currently available.


[🔧 Tools called: ['search_products']]

[⚡ Executing tools...]
{"asctime": "2025-10-31 15:20:52,494", "name": "search_product_tool", "levelname": "INFO", "message": "Searching products for query: 'iPhone'"}


2025-10-31 15:20:52,494 - search_product_tool - INFO - Searching products for query: 'iPhone'


{"asctime": "2025-10-31 15:20:52,750", "name": "search_product_tool", "levelname": "INFO", "message": "Found 1 products"}


2025-10-31 15:20:52,750 - search_product_tool - INFO - Found 1 products



[✅ Tool result: search_products - success]

```json
{
  "tool_calls": [
    {
      "name": "search_products",
      "arguments": {
        "query": "iPhone",
        "max_price": 1000,
        "sort_by": "price_low"
      }
    }
  ]
}
```

[📊 Latency Metrics]
   Time to First Token (TTFT): 1284.69ms
   Total Generation Time: 17.55s
   Tools used: 1
   Tool results received: 1

Test 2: Tell me about the iPhone 16 Pro features
----------------------------------------



[🔧 Tools called: ['retrieve_knowledge']]

[⚡ Executing tools...]

[✅ Tool result: retrieve_knowledge - success]

I'll search for more specific information about the iPhone 16 Pro features to provide you with comprehensive details.


[🔧 Tools called: ['retrieve_knowledge']]

[⚡ Executing tools...]

[✅ Tool result: retrieve_knowledge - success]

# iPhone 16 Pro Features

Based on the information available, here are the key features of the iPhone 16 Pro:

## Overview
The iPhone 16 Pro represents Apple's latest flagship sm

2025-10-31 15:21:30,293 - search_product_tool - INFO - Searching products for query: 'Apple'


{"asctime": "2025-10-31 15:21:30,565", "name": "search_product_tool", "levelname": "INFO", "message": "Found 9 products"}


2025-10-31 15:21:30,565 - search_product_tool - INFO - Found 9 products



[✅ Tool result: search_products - success]

Here are the Apple products currently available in our inventory:

## Smartphones
**iPhone 16 Pro**
- Price: Starting at $966.25
- Features: 
  - A18 Pro chip
  - Titanium design
  - 48MP main camera
  - 6.3-inch display
  - 256GB storage
  - All-day battery life
- Multiple price options available with ratings ranging from 3.7 to 4.9

## Audio
**AirPods Pro 2**
- Price: Starting at $233.04
- Features:
  - Active noise cancellation
  - Spatial audio
  - Personalized fit
  - 6 hours listening time
  - MagSafe charging case
  - Bluetooth 5.3 connectivity
- Multiple price options available with ratings ranging from 3.8 to 4.2

## Laptops
**MacBook Air M3**
- Price: Starting at $1,050.39
- Features:
  - Apple M3 chip
  - 13.6-inch Liquid Retina display
  - 8GB unified memory
  - 256GB SSD storage
  - Up to 18 hours battery life
- Multiple price options available with ratings ranging from 3.5 to 4.3

All products are currently in stock. Would you 

In [5]:
# ChatBot Class with Real Tool Integration and Streaming

@dataclass
class ChatMessage:
    """Message data structure for chat history."""
    role: str  # "user", "assistant", "system"
    content: str
    timestamp: str
    ttft_ms: Optional[float] = None  # Time to first token
    generation_time_s: Optional[float] = None  # Total generation time
    tools_used: Optional[List[str]] = None
    was_streamed: bool = False

class ChatBotWithTools:
    """
    Advanced chatbot class with real tool integration, streaming, and conversation history.
    
    Features:
    - Single message queries with automatic tool execution
    - Multi-turn conversations with context persistence
    - Real tool calling (retrieve_knowledge, search_products)
    - Streaming responses with real-time content display
    - Latency tracking (TTFT and total generation time)
    - Message history management
    - Error handling and recovery
    """
    
    def __init__(self, llm_with_tools, max_history_length: int = 20, enable_streaming: bool = True):
        """
        Initialize the ChatBot with tools.
        
        Args:
            llm_with_tools: Configured LLMWithTools instance
            max_history_length: Maximum number of messages to keep in history
            enable_streaming: Whether to enable streaming responses
        """
        self.llm = llm_with_tools
        self.max_history_length = max_history_length
        self.enable_streaming = enable_streaming
        self.conversation_history: List[ChatMessage] = []
        self.session_start_time = datetime.now()
        
        print(f"ChatBot initialized with {len(llm_with_tools.available_functions)} tools")
        print(f"Available tools: {list(llm_with_tools.available_functions.keys())}")
        print(f"Max history length: {max_history_length} messages")
        print(f"Streaming enabled: {enable_streaming}")
    
    async def single_turn_chat(self, user_message: str, stream_output: bool = None) -> Dict[str, Any]:
        """
        Process a single message without conversation history.
        Tools are automatically called based on user query.
        
        Args:
            user_message: The user's input message
            stream_output: Whether to stream output (overrides default if provided)
            
        Returns:
            Dict containing response and metadata
        """
        should_stream = stream_output if stream_output is not None else self.enable_streaming
        
        start_time = time.time()
        first_token_time = None
        tools_used = []
        content_chunks = []
        
        try:
            if should_stream:
                # Streaming version
                print("🔄 Streaming response...", end="", flush=True)
                
                async for chunk in self.llm.generate_with_tool_execution_stream(
                    user_prompt=user_message,
                    max_retries=2,
                    max_tool_iterations=3
                ):
                    current_time = time.time()
                    
                    # Track first token time
                    if first_token_time is None and chunk["type"] in ["content", "tool_calls"]:
                        first_token_time = current_time
                        print("\r", end="", flush=True)  # Clear the "Streaming..." message
                    
                    if chunk["type"] == "content":
                        content_chunks.append(chunk["content"])
                        print(chunk["content"], end="", flush=True)
                    
                    elif chunk["type"] == "tool_calls":
                        tool_names = [tc["name"] for tc in chunk.get("tool_calls", [])]
                        tools_used.extend(tool_names)
                        print(f"\n\n[🔧 Tools called: {tool_names}]\n", end="", flush=True)
                    
                    elif chunk["type"] == "tool_execution_start":
                        print(f"[⚡ {chunk['content']}]\n", end="", flush=True)
                    
                    elif chunk["type"] == "tool_result":
                        print(f"[✅ {chunk['tool_name']}: {chunk['status']}]\n", end="", flush=True)
                    
                    elif chunk["type"] == "error":
                        print(f"\n❌ Error: {chunk['content']}")
                        break
                
                end_time = time.time()
                print()  # Final newline
                
            else:
                # Non-streaming version
                response = await self.llm.generate_with_tool_execution(
                    user_prompt=user_message,
                    max_retries=2,
                    max_tool_iterations=3
                )
                
                end_time = time.time()
                
                if response["type"] == "text":
                    content_chunks = [response["content"]]
                elif response["type"] == "tool_calls":
                    tools_used = [tc["name"] for tc in response.get("tool_calls", [])]
                    content_chunks = [response.get("content", "Tool execution completed.")]
                else:
                    raise Exception(response.get("content", "Unknown error"))
            
            # Calculate latencies
            ttft_ms = (first_token_time - start_time) * 1000 if first_token_time else 0
            generation_time_s = end_time - start_time
            
            final_content = "".join(content_chunks)
            
            return {
                "success": True,
                "response": final_content,
                "ttft_ms": ttft_ms,
                "generation_time_s": generation_time_s,
                "tools_used": tools_used,
                "message_type": "single_turn",
                "tool_calls_made": len(tools_used),
                "was_streamed": should_stream
            }
                
        except Exception as e:
            end_time = time.time()
            ttft_ms = (first_token_time - start_time) * 1000 if first_token_time else 0
            generation_time_s = end_time - start_time
            
            return {
                "success": False,
                "error": str(e),
                "ttft_ms": ttft_ms,
                "generation_time_s": generation_time_s,
                "tools_used": tools_used,
                "message_type": "single_turn",
                "tool_calls_made": 0,
                "was_streamed": should_stream
            }
    
    async def multi_turn_chat(self, user_message: str, stream_output: bool = None) -> Dict[str, Any]:
        """
        Process a message with conversation history for multi-turn conversations.
        Context and previous tool results are considered.
        
        Args:
            user_message: The user's input message
            stream_output: Whether to stream output (overrides default if provided)
            
        Returns:
            Dict containing response and metadata
        """
        should_stream = stream_output if stream_output is not None else self.enable_streaming
        
        start_time = time.time()
        first_token_time = None
        tools_used = []
        content_chunks = []
        
        # Add user message to history
        user_chat_message = ChatMessage(
            role="user",
            content=user_message,
            timestamp=datetime.now().isoformat()
        )
        self.conversation_history.append(user_chat_message)
        
        try:
            # Build conversation context from history
            conversation_context = self._build_conversation_context()
            
            if should_stream:
                # Streaming version
                print("🔄 Streaming response with context...", end="", flush=True)
                
                async for chunk in self.llm.generate_with_tool_execution_stream(
                    user_prompt=conversation_context,
                    max_retries=2,
                    max_tool_iterations=3
                ):
                    current_time = time.time()
                    
                    # Track first token time
                    if first_token_time is None and chunk["type"] in ["content", "tool_calls"]:
                        first_token_time = current_time
                        print("\r", end="", flush=True)
                    
                    if chunk["type"] == "content":
                        content_chunks.append(chunk["content"])
                        print(chunk["content"], end="", flush=True)
                    
                    elif chunk["type"] == "tool_calls":
                        tool_names = [tc["name"] for tc in chunk.get("tool_calls", [])]
                        tools_used.extend(tool_names)
                        print(f"\n\n[🔧 Tools called: {tool_names}]\n", end="", flush=True)
                    
                    elif chunk["type"] == "tool_execution_start":
                        print(f"[⚡ {chunk['content']}]\n", end="", flush=True)
                    
                    elif chunk["type"] == "tool_result":
                        print(f"[✅ {chunk['tool_name']}: {chunk['status']}]\n", end="", flush=True)
                    
                    elif chunk["type"] == "error":
                        print(f"\n❌ Error: {chunk['content']}")
                        break
                
                end_time = time.time()
                print()
                
            else:
                # Non-streaming version
                response = await self.llm.generate_with_tool_execution(
                    user_prompt=conversation_context,
                    max_retries=2,
                    max_tool_iterations=3
                )
                
                end_time = time.time()
                
                if response["type"] == "text":
                    content_chunks = [response["content"]]
                elif response["type"] == "tool_calls":
                    tools_used = [tc["name"] for tc in response.get("tool_calls", [])]
                    content_chunks = [response.get("content", "Tool execution completed.")]
                else:
                    raise Exception(response.get("content", "Unknown error"))
            
            # Calculate latencies
            ttft_ms = (first_token_time - start_time) * 1000 if first_token_time else 0
            generation_time_s = end_time - start_time
            
            final_content = "".join(content_chunks)
            
            # Add assistant response to history
            assistant_message = ChatMessage(
                role="assistant",
                content=final_content,
                timestamp=datetime.now().isoformat(),
                ttft_ms=ttft_ms,
                generation_time_s=generation_time_s,
                tools_used=tools_used,
                was_streamed=should_stream
            )
            self.conversation_history.append(assistant_message)
            
            # Trim history if needed
            self._trim_history()
            
            return {
                "success": True,
                "response": final_content,
                "ttft_ms": ttft_ms,
                "generation_time_s": generation_time_s,
                "tools_used": tools_used,
                "message_type": "multi_turn",
                "history_length": len(self.conversation_history),
                "tool_calls_made": len(tools_used),
                "was_streamed": should_stream
            }
                
        except Exception as e:
            end_time = time.time()
            ttft_ms = (first_token_time - start_time) * 1000 if first_token_time else 0
            generation_time_s = end_time - start_time
            
            return {
                "success": False,
                "error": str(e),
                "ttft_ms": ttft_ms,
                "generation_time_s": generation_time_s,
                "tools_used": tools_used,
                "message_type": "multi_turn",
                "history_length": len(self.conversation_history),
                "tool_calls_made": 0,
                "was_streamed": should_stream
            }
    
    def _build_conversation_context(self) -> str:
        """Build conversation context from message history."""
        context_parts = []
        
        # Include recent conversation history (exclude system message)
        recent_messages = [msg for msg in self.conversation_history if msg.role != "system"]
        
        # Limit to last 6 exchanges for better context with tools
        context_messages = recent_messages[-6:]
        
        for msg in context_messages:
            if msg.role == "user":
                context_parts.append(f"User: {msg.content}")
            elif msg.role == "assistant":
                context_parts.append(f"Assistant: {msg.content}")
        
        # Add the latest user message at the end if not already included
        if context_parts:
            latest_user_msg = [msg for msg in self.conversation_history if msg.role == "user"][-1]
            if not context_parts[-1].startswith(f"User: {latest_user_msg.content}"):
                context_parts.append(f"User: {latest_user_msg.content}")
        
        return "\n".join(context_parts)
    
    def _trim_history(self):
        """Trim conversation history to maintain maximum length."""
        if len(self.conversation_history) > self.max_history_length:
            # Keep system message and recent messages
            system_messages = [msg for msg in self.conversation_history if msg.role == "system"]
            other_messages = [msg for msg in self.conversation_history if msg.role != "system"]
            
            # Keep most recent messages
            recent_other_messages = other_messages[-(self.max_history_length - len(system_messages)):]
            
            self.conversation_history = system_messages + recent_other_messages
    
    def get_conversation_history(self) -> List[Dict[str, Any]]:
        """Get formatted conversation history."""
        return [
            {
                "role": msg.role,
                "content": msg.content,
                "timestamp": msg.timestamp,
                "ttft_ms": msg.ttft_ms,
                "generation_time_s": msg.generation_time_s,
                "tools_used": msg.tools_used or [],
                "was_streamed": msg.was_streamed
            }
            for msg in self.conversation_history
        ]
    
    def clear_history(self):
        """Clear conversation history but keep system message."""
        system_messages = [msg for msg in self.conversation_history if msg.role == "system"]
        self.conversation_history = system_messages
        print("Conversation history cleared")
    
    def get_stats(self) -> Dict[str, Any]:
        """Get chatbot session statistics."""
        user_messages = [msg for msg in self.conversation_history if msg.role == "user"]
        assistant_messages = [msg for msg in self.conversation_history if msg.role == "assistant"]
        
        # Calculate average latencies
        valid_ttfts = [msg.ttft_ms for msg in assistant_messages if msg.ttft_ms is not None]
        valid_gen_times = [msg.generation_time_s for msg in assistant_messages if msg.generation_time_s is not None]
        
        avg_ttft_ms = sum(valid_ttfts) / len(valid_ttfts) if valid_ttfts else 0
        avg_generation_time_s = sum(valid_gen_times) / len(valid_gen_times) if valid_gen_times else 0
        
        # Count tool usage
        all_tools_used = []
        for msg in assistant_messages:
            if msg.tools_used:
                all_tools_used.extend(msg.tools_used)
        
        tool_usage_counts = {}
        for tool in all_tools_used:
            tool_usage_counts[tool] = tool_usage_counts.get(tool, 0) + 1
        
        session_duration = (datetime.now() - self.session_start_time).total_seconds()
        
        return {
            "session_duration_seconds": session_duration,
            "total_messages": len(user_messages) + len(assistant_messages),
            "user_messages": len(user_messages),
            "assistant_messages": len(assistant_messages),
            "average_ttft_ms": round(avg_ttft_ms, 2),
            "average_generation_time_s": round(avg_generation_time_s, 2),
            "tools_available": list(self.llm.available_functions.keys()),
            "tool_usage_counts": tool_usage_counts,
            "total_tool_calls": sum(tool_usage_counts.values()),
            "history_length": len(self.conversation_history),
            "streaming_enabled": self.enable_streaming
        }

# Initialize ChatBot with real tools and streaming
print("Initializing ChatBot with real tool integration and streaming...")
chatbot = ChatBotWithTools(llm_with_tools, max_history_length=15, enable_streaming=True)
print("ChatBot with real tools and streaming initialized successfully!")

# Display initial stats
stats = chatbot.get_stats()
print(f"Session stats: {stats}")

print("\n✅ ChatBot with real tool integration and streaming is working!")
print("✅ Ready for testing single and multi-turn conversations with streaming and automatic tool calling!")

Initializing ChatBot with real tool integration and streaming...
ChatBot initialized with 2 tools
Available tools: ['retrieve_knowledge', 'search_products']
Max history length: 15 messages
Streaming enabled: True
ChatBot with real tools and streaming initialized successfully!
Session stats: {'session_duration_seconds': 0.000214, 'total_messages': 0, 'user_messages': 0, 'assistant_messages': 0, 'average_ttft_ms': 0, 'average_generation_time_s': 0, 'tools_available': ['retrieve_knowledge', 'search_products'], 'tool_usage_counts': {}, 'total_tool_calls': 0, 'history_length': 0, 'streaming_enabled': True}

✅ ChatBot with real tool integration and streaming is working!
✅ Ready for testing single and multi-turn conversations with streaming and automatic tool calling!


In [6]:
# Interactive Demo and Usage Examples with Real Tool Integration and Streaming

print("=" * 70)
print("INTERACTIVE CHATBOT DEMO WITH REAL TOOLS AND STREAMING")
print("=" * 70)

async def interactive_demo():
    """Interactive demonstration of the chatbot with real tool integration and streaming."""
    
    print("\nThis interactive demo allows you to test the chatbot with real tool integration and streaming.")
    print("The chatbot will automatically call tools based on your queries:")
    print("- 📋 retrieve_knowledge: Search knowledge base for product information, policies")
    print("- 🛒 search_products: Search product inventory with pricing and availability")
    print("\nCommands: 'quit' to exit, 'stats' for session stats, 'history' for conversation history, 'clear' to clear history")
    print("          'stream' to toggle streaming mode")
    print("-" * 70)
    
    conversation_mode = "single"  # Start with single turn mode
    
    while True:
        try:
            # Get user input
            user_input = input("\nYou: ").strip()
            
            if not user_input:
                continue
                
            if user_input.lower() == 'quit':
                print("Goodbye!")
                break
                
            if user_input.lower() == 'stats':
                stats = chatbot.get_stats()
                print("\nSession Statistics:")
                for key, value in stats.items():
                    print(f"  {key}: {value}")
                continue
                
            if user_input.lower() == 'history':
                history = chatbot.get_conversation_history()
                print(f"\nConversation History ({len(history)} messages):")
                for i, msg in enumerate(history):
                    if msg['role'] != 'system':  # Skip system messages
                        timestamp = msg['timestamp'][:19]
                        role = msg['role'].upper()
                        content_preview = msg['content'][:150] + "..." if len(msg['content']) > 150 else msg['content']
                        tools_info = f" [Tools: {', '.join(msg['tools_used'])}]" if msg['tools_used'] else ""
                        stream_info = " 🌊" if msg['was_streamed'] else ""
                        latency_info = f" [TTFT: {msg['ttft_ms']:.0f}ms, Time: {msg['generation_time_s']:.1f}s]" if msg['ttft_ms'] else ""
                        print(f"  [{timestamp}] {role}{stream_info}: {content_preview}{tools_info}{latency_info}")
                continue
                
            if user_input.lower() == 'clear':
                chatbot.clear_history()
                print("Conversation history cleared!")
                continue
                
            if user_input.lower() == 'mode':
                conversation_mode = "multi" if conversation_mode == "single" else "single"
                print(f"Switched to {conversation_mode}-turn mode")
                continue
                
            if user_input.lower() == 'stream':
                chatbot.enable_streaming = not chatbot.enable_streaming
                print(f"Streaming {'enabled' if chatbot.enable_streaming else 'disabled'}")
                continue
            
            # Process the message
            print(f"\nProcessing with {conversation_mode}-turn mode (streaming: {chatbot.enable_streaming})...")
            
            if conversation_mode == "single":
                result = await chatbot.single_turn_chat(user_input)
            else:
                result = await chatbot.multi_turn_chat(user_input)
            
            # Display response (already printed if streaming)
            if not result["was_streamed"]:
                print(f"\nAssistant: {result['response']}")
            
            # Show performance information
            performance_info = []
            if result["ttft_ms"] is not None:
                performance_info.append(f"TTFT: {result['ttft_ms']:.2f}ms")
            performance_info.append(f"Total: {result['generation_time_s']:.2f}s")
            
            if result["tool_calls_made"] > 0:
                performance_info.append(f"Tools: {result['tool_calls_made']}")
            if result["tools_used"]:
                performance_info.append(f"Used: {', '.join(result['tools_used'])}")
            
            mode_indicator = "🌊 STREAM" if result["was_streamed"] else "📄 BATCH"
            print(f"\n[{mode_indicator}] | {' | '.join(performance_info)}")
                
            if result["message_type"] == "multi-turn":
                print(f"[History length: {result['history_length']} messages]")
                
        except KeyboardInterrupt:
            print("\n\nGoodbye!")
            break
        except Exception as e:
            print(f"\nError: {e}")

# Comprehensive Performance Testing with Real Tools and Streaming
print("\n" + "=" * 70)
print("PERFORMANCE TESTING WITH REAL TOOLS AND STREAMING")
print("=" * 70)

async def comprehensive_performance_test():
    """Run comprehensive performance tests on the chatbot with real tools and streaming."""
    
    test_scenarios = [
        {
            "name": "Product Search (should use search_products)",
            "query": "What iPhones do you have available under $1000?",
            "expected_tools": ["search_products"]
        },
        {
            "name": "Product Features (should use retrieve_knowledge)",
            "query": "What are the key features of iPhone 16 Pro?",
            "expected_tools": ["retrieve_knowledge"]
        },
        {
            "name": "Brand Search (should use search_products)",
            "query": "Show me all Apple products",
            "expected_tools": ["search_products"]
        },
        {
            "name": "Policy Query (should use retrieve_knowledge)",
            "query": "What is your return policy?",
            "expected_tools": ["retrieve_knowledge"]
        },
        {
            "name": "Complex Query (might use multiple tools)",
            "query": "I need a phone for photography with good battery life under $800. Compare options and tell me about return policies.",
            "expected_tools": ["search_products", "retrieve_knowledge"]
        }
    ]
    
    print("Running comprehensive performance tests...")
    print("-" * 60)
    
    # Test with streaming and non-streaming
    for streaming_mode in [True, False]:
        mode_name = "STREAMING" if streaming_mode else "NON-STREAMING"
        print(f"\n{mode_name} TESTS")
        print("=" * 30)
        
        results = []
        
        for scenario in test_scenarios:
            print(f"\nTesting: {scenario['name']}")
            print(f"Query: {scenario['query']}")
            print(f"Expected tools: {scenario['expected_tools']}")
            
            # Clear history for fair comparison
            chatbot.clear_history()
            
            # Test single turn with specified streaming mode
            print(f"Testing {mode_name.lower()} single-turn...")
            start = time.time()
            result = await chatbot.single_turn_chat(scenario['query'], stream_output=streaming_mode)
            
            # Store results
            results.append({
                "scenario": scenario['name'],
                "success": result['success'],
                "ttft_ms": result.get('ttft_ms', 0),
                "generation_time_s": result.get('generation_time_s', 0),
                "tools_used": result.get('tools_used', []),
                "tool_calls": result.get('tool_calls_made', 0),
                "was_streamed": result['was_streamed']
            })
            
            # Display results
            if result['success']:
                print(f"✅ Success - TTFT: {result.get('ttft_ms', 0):.2f}ms - Total: {result.get('generation_time_s', 0):.2f}s")
                print(f"   Tools used: {result.get('tools_used', [])}")
                print(f"   Tool calls made: {result.get('tool_calls_made', 0)}")
                response_preview = result['response'][:100] + "..." if len(result['response']) > 100 else result['response']
                print(f"   Response preview: {response_preview}")
            else:
                print(f"❌ Error: {result.get('error', 'Unknown error')}")
        
        # Performance Analysis for this mode
        successful_results = [r for r in results if r['success']]
        
        if successful_results:
            avg_ttft = sum(r['ttft_ms'] for r in successful_results) / len(successful_results)
            avg_generation_time = sum(r['generation_time_s'] for r in successful_results) / len(successful_results)
            total_tool_calls = sum(r['tool_calls'] for r in successful_results)
            
            print(f"\n{mode_name} PERFORMANCE SUMMARY:")
            print(f"  Success Rate: {len(successful_results)}/{len(results)} ({len(successful_results)/len(results)*100:.1f}%)")
            print(f"  Average TTFT: {avg_ttft:.2f}ms")
            print(f"  Average Generation Time: {avg_generation_time:.2f}s")
            print(f"  Total Tool Calls: {total_tool_calls}")
            
            # Tool usage breakdown
            tool_usage = {}
            for result in successful_results:
                for tool in result['tools_used']:
                    tool_usage[tool] = tool_usage.get(tool, 0) + 1
            
            if tool_usage:
                print(f"  Tool Usage:")
                for tool, count in tool_usage.items():
                    print(f"    {tool}: {count}")
    
    print(f"\n✅ Performance testing completed!")
    print(f"✅ Tool integration and streaming are working correctly!")

# Run comprehensive performance test
await comprehensive_performance_test()

# Quick Demo of Streaming vs Non-Streaming
print("\n" + "=" * 70)
print("STREAMING VS NON-STREAMING COMPARISON")
print("=" * 70)

demo_query = "Tell me about iPhone features and show me what's available"

print(f"\nDemo Query: '{demo_query}'")
print("-" * 50)

# Test streaming
print("\n🌊 STREAMING VERSION:")
chatbot.clear_history()
stream_result = await chatbot.single_turn_chat(demo_query, stream_output=True)

print(f"\n📄 NON-STREAMING VERSION:") 
chatbot.clear_history()
non_stream_result = await chatbot.single_turn_chat(demo_query, stream_output=False)

print("\n" + "=" * 50)
print("COMPARISON SUMMARY:")
print(f"Streaming - TTFT: {stream_result.get('ttft_ms', 0):.2f}ms, Total: {stream_result.get('generation_time_s', 0):.2f}s")
print(f"Non-Streaming - Total: {non_stream_result.get('generation_time_s', 0):.2f}s")
if stream_result.get('ttft_ms') and non_stream_result.get('generation_time_s'):
    print(f"Time to first visible output: {stream_result.get('ttft_ms', 0):.2f}ms vs {non_stream_result.get('generation_time_s', 0)*1000:.2f}ms")

print("\n" + "=" * 70)
print("READY FOR INTERACTIVE USE")
print("=" * 70)
print("\nTo start interactive demo, run:")
print("await interactive_demo()")
print("\nTo test individual queries:")
print("result = await chatbot.single_turn_chat('your query here', stream_output=True)  # Streaming")
print("result = await chatbot.single_turn_chat('your query here', stream_output=False) # Non-streaming")
print("or")
print("result = await chatbot.multi_turn_chat('your query here')  # Uses default streaming setting")

print("\nChatBot Features:")
print("✅ Real tool integration (retrieve_knowledge, search_products)")
print("✅ Automatic tool calling based on query analysis")
print("✅ Single-turn chat with tool execution")
print("✅ Multi-turn chat with conversation context")
print("✅ Streaming responses with real-time content display")
print("✅ Latency tracking (TTFT and total generation time)")
print("✅ Conversation persistence with tool context")
print("✅ Error handling and recovery")
print("✅ Session statistics and tool usage analytics")
print("✅ Comprehensive performance monitoring")
print("✅ Toggle between streaming and non-streaming modes")

# Display current session stats
current_stats = chatbot.get_stats()
print(f"\nCurrent session stats: {current_stats}")

print("\nExample queries to try:")
print("- 'What iPhones do you have under $1000?'")
print("- 'Tell me about iPhone 16 Pro features'")
print("- 'What is your return policy?'")
print("- 'Show me Apple products with good ratings'")
print("- 'Compare laptops and tell me about warranty policies'")
print("\n💡 Tip: Use 'stream' command in interactive mode to toggle streaming on/off!")

INTERACTIVE CHATBOT DEMO WITH REAL TOOLS AND STREAMING

PERFORMANCE TESTING WITH REAL TOOLS AND STREAMING
Running comprehensive performance tests...
------------------------------------------------------------

STREAMING TESTS

Testing: Product Search (should use search_products)
Query: What iPhones do you have available under $1000?
Expected tools: ['search_products']
Conversation history cleared
Testing streaming single-turn...
🔄 Streaming response...
I'll help you find iPhones under $1000 that are currently available. Let me search our inventory for you.


[🔧 Tools called: ['search_products']]
[⚡ Executing tools...]
{"asctime": "2025-10-31 15:23:33,051", "name": "search_product_tool", "levelname": "INFO", "message": "Searching products for query: 'iPhone'"}


2025-10-31 15:23:33,051 - search_product_tool - INFO - Searching products for query: 'iPhone'


{"asctime": "2025-10-31 15:23:33,204", "name": "search_product_tool", "levelname": "INFO", "message": "Found 1 products"}


2025-10-31 15:23:33,204 - search_product_tool - INFO - Found 1 products


[✅ search_products: success]

I found 1 iPhone model available under $1000:

**iPhone 16 Pro**
- **Price:** $966.25
- **Description:** Latest iPhone with A18 Pro chip, titanium design, and advanced camera system
- **Specifications:**
  - Screen: 6.3 inches
  - Storage: 256GB
  - Camera: 48MP main camera
  - Processor: A18 Pro
  - Battery: All-day battery life
- **Rating:** 4.1/5
- **Availability:** In stock

This appears to be the current flagship iPhone model that fits within your budget. It's equipped with the latest A18 Pro processor and a premium titanium design with an advanced camera system.

Would you like more information about this model, or would you like me to search for other Apple products in a different price range?
✅ Success - TTFT: 1012.88ms - Total: 14.61s
   Tools used: ['search_products']
   Tool calls made: 1
   Response preview: 
I'll help you find iPhones under $1000 that are currently available. Let me search our inventory fo...

Testing: Product Features (should

2025-10-31 15:24:27,492 - search_product_tool - INFO - Searching products for query: 'Apple'


{"asctime": "2025-10-31 15:24:27,757", "name": "search_product_tool", "levelname": "INFO", "message": "Found 9 products"}


2025-10-31 15:24:27,757 - search_product_tool - INFO - Found 9 products


[✅ search_products: success]

Here are all the Apple products currently available in our inventory:

## Smartphones

### iPhone 16 Pro
- **Description**: Latest iPhone with A18 Pro chip, titanium design, and advanced camera system
- **Specifications**:
  - Screen size: 6.3 inches
  - Storage: 256GB
  - Camera: 48MP main camera
  - Processor: A18 Pro
  - Battery: All-day battery life
- **Price Options**:
  - $966.25 (Rating: 4.1/5)
  - $1,070.76 (Rating: 3.7/5)
  - $1,091.46 (Rating: 4.9/5)
- **Availability**: In stock

## Laptops

### MacBook Air M3
- **Description**: Ultra-thin laptop with M3 chip, all-day battery, and brilliant display
- **Specifications**:
  - Processor: Apple M3 chip
  - Memory: 8GB unified memory
  - Storage: 256GB SSD
  - Display: 13.6-inch Liquid Retina
  - Battery: Up to 18 hours
- **Price Options**:
  - $1,050.39 (Rating: 3.5/5)
  - $1,069.54 (Rating: 4.3/5)
  - $1,055.78 (Rating: 4.1/5)
- **Availability**: In stock

## Audio

### AirPods Pro 2
- **Description

2025-10-31 15:25:40,822 - search_product_tool - INFO - Searching products for query: 'photography phone good battery life'


{"asctime": "2025-10-31 15:25:40,974", "name": "search_product_tool", "levelname": "INFO", "message": "Found 0 products"}


2025-10-31 15:25:40,974 - search_product_tool - INFO - Found 0 products


[✅ search_products: success]
[✅ retrieve_knowledge: success]

I notice that our product search didn't return any specific phones matching your criteria. Let me try a more targeted search to find suitable options for photography with good battery life under $800.


[🔧 Tools called: ['search_products']]
[⚡ Executing tools...]
{"asctime": "2025-10-31 15:25:45,225", "name": "search_product_tool", "levelname": "INFO", "message": "Searching products for query: 'smartphone camera battery'"}


2025-10-31 15:25:45,225 - search_product_tool - INFO - Searching products for query: 'smartphone camera battery'


{"asctime": "2025-10-31 15:25:45,492", "name": "search_product_tool", "levelname": "INFO", "message": "Found 0 products"}


2025-10-31 15:25:45,492 - search_product_tool - INFO - Found 0 products


[✅ search_products: success]

I apologize, but I'm having trouble finding specific phone products that match your criteria through our search system. However, I can provide information about our return policies to help you with your potential purchase decision.

## Return Policy Information

### Standard Return Policy
We want you to be completely satisfied with your purchase. If you're not satisfied, you can return the item within the specified timeframe for a refund.

### Return Conditions
- Items must be in original condition with all original packaging and accessories
- Items must be undamaged and without signs of excessive wear
- Original receipt or proof of purchase is required
- All accessories, manuals, and documentation must be included
- Software must be unopened (for physical software purchases)

### Return Shipping and Logistics
- Free return shipping for most returns
- Prepaid shipping labels are available online
- Package tracking is provided for return shipments
- Insuran

2025-10-31 15:26:04,913 - search_product_tool - INFO - Searching products for query: 'iPhone'


{"asctime": "2025-10-31 15:26:05,091", "name": "search_product_tool", "levelname": "INFO", "message": "Found 1 products"}


2025-10-31 15:26:05,091 - search_product_tool - INFO - Found 1 products


✅ Success - TTFT: 0.00ms - Total: 14.70s
   Tools used: []
   Tool calls made: 0
   Response preview: I found one iPhone available under $1000 in our inventory:

**iPhone 16 Pro**
- **Price:** $966.25
-...

Testing: Product Features (should use retrieve_knowledge)
Query: What are the key features of iPhone 16 Pro?
Expected tools: ['retrieve_knowledge']
Conversation history cleared
Testing non-streaming single-turn...
✅ Success - TTFT: 0.00ms - Total: 39.91s
   Tools used: []
   Tool calls made: 0
   Response preview: Based on the information from our knowledge base, I can provide you with the key features of the iPh...

Testing: Brand Search (should use search_products)
Query: Show me all Apple products
Expected tools: ['search_products']
Conversation history cleared
Testing non-streaming single-turn...
{"asctime": "2025-10-31 15:26:59,109", "name": "search_product_tool", "levelname": "INFO", "message": "Searching products for query: 'Apple'"}


2025-10-31 15:26:59,109 - search_product_tool - INFO - Searching products for query: 'Apple'


{"asctime": "2025-10-31 15:26:59,360", "name": "search_product_tool", "levelname": "INFO", "message": "Found 9 products"}


2025-10-31 15:26:59,360 - search_product_tool - INFO - Found 9 products


✅ Success - TTFT: 0.00ms - Total: 28.59s
   Tools used: []
   Tool calls made: 0
   Response preview: I've found all the available Apple products in our inventory. Here's what we have:

## iPhone 16 Pro...

Testing: Policy Query (should use retrieve_knowledge)
Query: What is your return policy?
Expected tools: ['retrieve_knowledge']
Conversation history cleared
Testing non-streaming single-turn...
✅ Success - TTFT: 0.00ms - Total: 18.62s
   Tools used: []
   Tool calls made: 0
   Response preview: Based on our return policy information, here's what you need to know about our return and refund pol...

Testing: Complex Query (might use multiple tools)
Query: I need a phone for photography with good battery life under $800. Compare options and tell me about return policies.
Expected tools: ['search_products', 'retrieve_knowledge']
Conversation history cleared
Testing non-streaming single-turn...
{"asctime": "2025-10-31 15:27:52,737", "name": "search_product_tool", "levelname": "INFO", "me

2025-10-31 15:27:52,737 - search_product_tool - INFO - Searching products for query: 'phone photography battery life'


{"asctime": "2025-10-31 15:27:53,041", "name": "search_product_tool", "levelname": "INFO", "message": "Found 0 products"}


2025-10-31 15:27:53,041 - search_product_tool - INFO - Found 0 products


✅ Success - TTFT: 0.00ms - Total: 45.31s
   Tools used: []
   Tool calls made: 0
   Response preview: I apologize, but it seems there are no specific products matching your criteria for a phone with goo...

NON-STREAMING PERFORMANCE SUMMARY:
  Success Rate: 5/5 (100.0%)
  Average TTFT: 0.00ms
  Average Generation Time: 29.43s
  Total Tool Calls: 0

✅ Performance testing completed!
✅ Tool integration and streaming are working correctly!

STREAMING VS NON-STREAMING COMPARISON

Demo Query: 'Tell me about iPhone features and show me what's available'
--------------------------------------------------

🌊 STREAMING VERSION:
Conversation history cleared
🔄 Streaming response...
I'll help you learn about iPhone features and show you what's currently available. Let me search for both general iPhone information and current iPhone products.


[🔧 Tools called: ['retrieve_knowledge', 'search_products']]
[⚡ Executing tools...]
{"asctime": "2025-10-31 15:28:40,950", "name": "search_product_tool", "lev

2025-10-31 15:28:40,950 - search_product_tool - INFO - Searching products for query: 'iPhone'


{"asctime": "2025-10-31 15:28:41,197", "name": "search_product_tool", "levelname": "INFO", "message": "Found 3 products"}


2025-10-31 15:28:41,197 - search_product_tool - INFO - Found 3 products


[✅ retrieve_knowledge: success]
[✅ search_products: success]

# iPhone Features and Available Options

## Current iPhone Features

Based on the latest information, here are the key features of current iPhone models:

### iPhone 17 (Standard)
- **Display**: Brighter display with ProMotion up to 120Hz, Ceramic Shield 2 front with 3x better scratch resistance
- **Camera**: 48MP Dual Fusion camera system
- **Chip**: A19 chip for all-day battery life
- **Software**: iOS 26 with new design
- **Special Features**: Apple Intelligence with helpful features like image creation and Live Translation
- **Price**: From S$1,299

### iPhone 17 Pro
- **Design**: Heat-forged aluminum unibody design
- **Chip**: A19 Pro chip with vapor cooling
- **Camera**: 48MP Fusion rear cameras with the longest zoom ever on an iPhone
- **Front Camera**: New Center Stage front camera for better selfies and flexible framing
- **Software**: iOS 26 with Apple Intelligence features
- **Price**: From S$1,599

### iPhone Air

2025-10-31 15:29:00,177 - search_product_tool - INFO - Searching products for query: 'iPhone'


{"asctime": "2025-10-31 15:29:00,448", "name": "search_product_tool", "levelname": "INFO", "message": "Found 3 products"}


2025-10-31 15:29:00,448 - search_product_tool - INFO - Found 3 products



COMPARISON SUMMARY:
Streaming - TTFT: 1198.01ms, Total: 27.64s
Non-Streaming - Total: 9.45s
Time to first visible output: 1198.01ms vs 9451.95ms

READY FOR INTERACTIVE USE

To start interactive demo, run:
await interactive_demo()

To test individual queries:
result = await chatbot.single_turn_chat('your query here', stream_output=True)  # Streaming
result = await chatbot.single_turn_chat('your query here', stream_output=False) # Non-streaming
or
result = await chatbot.multi_turn_chat('your query here')  # Uses default streaming setting

ChatBot Features:
✅ Real tool integration (retrieve_knowledge, search_products)
✅ Automatic tool calling based on query analysis
✅ Single-turn chat with tool execution
✅ Multi-turn chat with conversation context
✅ Streaming responses with real-time content display
✅ Latency tracking (TTFT and total generation time)
✅ Conversation persistence with tool context
✅ Error handling and recovery
✅ Session statistics and tool usage analytics
✅ Comprehensive pe