In [1]:
import logging
import os
import pathlib
import sys
import time
import json
import asyncio
from typing import List, Dict, Any, Optional, Union
from dataclasses import dataclass
from datetime import datetime

from dotenv import load_dotenv
from nest_asyncio import apply

# Load environment variables
load_dotenv()
apply()

# Add parent directory to path for imports to handle relative imports
sys.path.append(str(pathlib.Path().absolute().parent))
sys.path.append(str(pathlib.Path().absolute().parent / "src"))

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger("chat_tool_calling_demo")

# Import required components
from src.utils.llm_pipeline import LLMWithTools
from src.tools.retrieve import retrieve_documents, RetrieveRequest
from src.tools.search_product import search_products, ProductSearchRequest

print("Successfully imported all components")
print(f"LLMWithTools class: {LLMWithTools}")
print(f"Retrieve function: {retrieve_documents}")
print(f"Search products function: {search_products}")

  from .autonotebook import tqdm as notebook_tqdm


{"asctime": "2025-10-30 13:35:29,772", "name": "pinecone_index_client", "levelname": "INFO", "message": "Initialized PineconeIndexClient for dense index 'curator-pommeline' (dim: 768, metric: dotproduct)"}


2025-10-30 13:35:29,772 - pinecone_index_client - INFO - Initialized PineconeIndexClient for dense index 'curator-pommeline' (dim: 768, metric: dotproduct)


{"asctime": "2025-10-30 13:35:29,785", "name": "pinecone_vector_store", "levelname": "INFO", "message": "Connected to Pinecone Index container: {'namespaces': {'curator-pommeline-7b1a7bbb': {'vectorCount': 0}, 'pommeline': {'vectorCount': 0}, 'curator-pommeline-a9b4d456': {'vectorCount': 0}, 'curator-pommeline': {'vectorCount': 212}, 'curator-pommeline-12fa085f': {'vectorCount': 0}, 'curator-pommeline-f03bab83': {'vectorCount': 0}, '': {'vectorCount': 0}}, 'dimension': 768, 'indexFullness': 0.0, 'totalVectorCount': 212}"}


2025-10-30 13:35:29,785 - pinecone_vector_store - INFO - Connected to Pinecone Index container: {'namespaces': {'curator-pommeline-7b1a7bbb': {'vectorCount': 0}, 'pommeline': {'vectorCount': 0}, 'curator-pommeline-a9b4d456': {'vectorCount': 0}, 'curator-pommeline': {'vectorCount': 212}, 'curator-pommeline-12fa085f': {'vectorCount': 0}, 'curator-pommeline-f03bab83': {'vectorCount': 0}, '': {'vectorCount': 0}}, 'dimension': 768, 'indexFullness': 0.0, 'totalVectorCount': 212}


{"asctime": "2025-10-30 13:35:29,787", "name": "cache", "levelname": "INFO", "message": "Started cache cleanup task with 300s interval"}


2025-10-30 13:35:29,787 - cache - INFO - Started cache cleanup task with 300s interval


Successfully imported all components
LLMWithTools class: <class 'src.utils.llm_pipeline.LLMWithTools'>
Retrieve function: <function retrieve_documents at 0x177dc0040>
Search products function: <function search_products at 0x177dc0b80>


In [2]:
# Tool Function Definitions and Wrappers (Fixed Version - Correct API Usage + Clean Logging)

def retrieve_knowledge_tool(query: str, top_k: int = 5, search_mode: str = "hybrid", similarity_threshold: float = 0.15) -> Dict[str, Any]:
    """
    Tool function for retrieving knowledge base documents.
    
    Args:
        query: Search query for knowledge base (string)
        top_k: Maximum number of documents to return (default: 5)
        search_mode: Search mode - "semantic", "keyword", or "hybrid" (default: "hybrid")
        similarity_threshold: Minimum similarity threshold (default: 0.15)
    
    Returns:
        Dict containing search results with documents and metadata
    """
    try:
        # Import here to avoid dependency issues
        from src.tools.retrieve import retrieve_documents
        
        # Ensure parameters are of correct type
        if not isinstance(query, str):
            query = str(query) if query is not None else ""
        
        if not isinstance(top_k, int):
            top_k = int(top_k) if top_k is not None else 5
            
        if not isinstance(search_mode, str):
            search_mode = str(search_mode) if search_mode is not None else "hybrid"
            
        if not isinstance(similarity_threshold, (int, float)):
            similarity_threshold = float(similarity_threshold) if similarity_threshold is not None else 0.15
        
        # Call retrieve_documents directly with string query + kwargs
        response = retrieve_documents(
            query=query,  # This must be a string
            top_k=top_k,
            search_mode=search_mode,
            similarity_threshold=similarity_threshold,
            include_scores=True
        )
        
        # Format results for LLM consumption
        results = []
        for doc in response.results:
            results.append({
                "content": doc.content,
                "source_file": doc.source_file,
                "score": doc.score,
                "metadata": doc.metadata
            })
        
        return {
            "query": query,
            "results": results,
            "total_results": len(results),
            "search_metadata": response.search_metadata
        }
        
    except Exception as e:
        import traceback
        print(f"Error in retrieve_knowledge_tool: {e}")
        traceback.print_exc(limit=3)
        return {
            "query": query,
            "results": [],
            "total_results": 0,
            "error": str(e)
        }

def search_products_tool(query: str, category: str = None, min_price: float = None, max_price: float = None, 
                        brand: str = None, limit: int = 10, sort_by: str = "relevance") -> Dict[str, Any]:
    """
    Tool function for searching product inventory.
    
    Args:
        query: Product search query (string)
        category: Filter by product category (string, optional)
        min_price: Minimum price filter (number, optional)
        max_price: Maximum price filter (number, optional)
        brand: Filter by brand (string, optional)
        limit: Maximum number of results (default: 10)
        sort_by: Sort order - "relevance", "price_low", "price_high", "rating" (default: "relevance")
    
    Returns:
        Dict containing product search results
    """
    try:
        # Import here to avoid dependency issues
        from src.tools.search_product import search_products
        
        # Ensure parameters are of correct type
        if not isinstance(query, str):
            query = str(query) if query is not None else ""
            
        if category is not None and not isinstance(category, str):
            category = str(category)
            
        if brand is not None and not isinstance(brand, str):
            brand = str(brand)
            
        if limit is not None and not isinstance(limit, int):
            limit = int(limit) if limit is not None else 10
            
        if sort_by is not None and not isinstance(sort_by, str):
            sort_by = str(sort_by) if sort_by is not None else "relevance"
            
        if min_price is not None and not isinstance(min_price, (int, float)):
            min_price = float(min_price) if min_price is not None else None
            
        if max_price is not None and not isinstance(max_price, (int, float)):
            max_price = float(max_price) if max_price is not None else None
        
        # Call search_products directly with string query + kwargs
        response = search_products(
            query=query,  # This must be a string
            category=category,
            min_price=min_price,
            max_price=max_price,
            brand=brand,
            limit=limit,
            sort_by=sort_by
        )
        
        # Format results for LLM consumption
        products = []
        for product in response.products:
            products.append({
                "id": product.id,
                "name": product.name,
                "description": product.description,
                "price": product.price,
                "brand": product.brand,
                "category": product.category,
                "availability": product.availability,
                "rating": product.rating,
                "specifications": product.specifications
            })
        
        return {
            "query": query,
            "products": products,
            "total_results": len(products),
            "filters_applied": response.filters_applied,
            "search_metadata": response.search_metadata
        }
        
    except Exception as e:
        import traceback
        print(f"Error in search_products_tool: {e}")
        traceback.print_exc(limit=3)
        return {
            "query": query,
            "products": [],
            "total_results": 0,
            "error": str(e)
        }

# Test the tool functions with direct calls
print("Testing tool functions...")

# Test retrieve tool
try:
    print("Testing retrieve_knowledge_tool...")
    retrieve_result = retrieve_knowledge_tool("iPhone features", top_k=3)
    print(f"Retrieve tool test: Found {retrieve_result['total_results']} documents")
    if retrieve_result['results']:
        print(f"First result: {retrieve_result['results'][0]['content'][:100]}...")
    else:
        print("No documents found - this may be expected if no data is ingested")
except Exception as e:
    print(f"Retrieve tool test failed: {e}")

# Test search products tool
try:
    print("Testing search_products_tool...")
    search_result = search_products_tool("iPhone", limit=3)
    print(f"Search products tool test: Found {search_result['total_results']} products")
    if search_result['products']:
        print(f"First product: {search_result['products'][0]['name']} - ${search_result['products'][0]['price']}")
    else:
        print("No products found - this may be expected if no product data is available")
except Exception as e:
    print(f"Search products tool test failed: {e}")

print("\nTool functions are defined (results may vary based on available data).")

Testing tool functions...
Testing retrieve_knowledge_tool...
{"asctime": "2025-10-30 13:35:29,897", "name": "embedder", "levelname": "INFO", "message": "Loading embedding model: google/embeddinggemma-300m"}


2025-10-30 13:35:29,897 - embedder - INFO - Loading embedding model: google/embeddinggemma-300m
2025-10-30 13:35:29,899 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: google/embeddinggemma-300m
2025-10-30 13:35:38,050 - sentence_transformers.SentenceTransformer - INFO - 14 prompts are loaded, with the keys: ['query', 'document', 'BitextMining', 'Clustering', 'Classification', 'InstructionRetrieval', 'MultilabelClassification', 'PairClassification', 'Reranking', 'Retrieval', 'Retrieval-query', 'Retrieval-document', 'STS', 'Summarization']


{"asctime": "2025-10-30 13:35:39,004", "name": "embedder", "levelname": "INFO", "message": "Model loaded successfully. Embedding dimension: 768"}


2025-10-30 13:35:39,004 - embedder - INFO - Model loaded successfully. Embedding dimension: 768


{"asctime": "2025-10-30 13:35:39,131", "name": "bm25_vectorizer", "levelname": "ERROR", "message": "Failed to load BM25Vectorizer: [Errno 2] No such file or directory: 'data/models/bm25_d0a4624a.pkl'"}


2025-10-30 13:35:39,131 - bm25_vectorizer - ERROR - Failed to load BM25Vectorizer: [Errno 2] No such file or directory: 'data/models/bm25_d0a4624a.pkl'










Retrieve tool test: Found 3 documents
First result: ### Hearing
- **Mono Audio** for balanced listening
- **Live Listen** with Made for iPhone hearing a...
Testing search_products_tool...
{"asctime": "2025-10-30 13:35:39,137", "name": "search_product_tool", "levelname": "INFO", "message": "Searching products for query: 'iPhone'"}


2025-10-30 13:35:39,137 - search_product_tool - INFO - Searching products for query: 'iPhone'


{"asctime": "2025-10-30 13:35:39,274", "name": "search_product_tool", "levelname": "INFO", "message": "Found 3 products"}


2025-10-30 13:35:39,274 - search_product_tool - INFO - Found 3 products


Search products tool test: Found 3 products
First product: iPhone 16 Pro - $1059.690750260071

Tool functions are defined (results may vary based on available data).


In [3]:
# Load System Instructions
system_prompt = ""
with open("../prompts/system_instructions.txt", "r") as f:
    system_prompt = f.read()

print("System prompt loaded successfully")
print(f"System prompt length: {len(system_prompt)} characters")

# Define tool schemas for LLM
tools_schema = [
    {
        "name": "retrieve_knowledge",
        "description": "Search the knowledge base for product information, policies, and general information. Use this when you need factual information about products, return policies, or general knowledge.",
        "parameters": {
            "type": "object",
            "properties": {
                "query": {
                    "type": "string",
                    "description": "Search query for the knowledge base"
                },
                "top_k": {
                    "type": "integer",
                    "description": "Maximum number of documents to return (default: 5)",
                    "default": 5
                },
                "search_mode": {
                    "type": "string",
                    "description": "Search mode: 'semantic' for conceptual search, 'keyword' for exact term matching, 'hybrid' for both (default: 'hybrid')",
                    "enum": ["semantic", "keyword", "hybrid"],
                    "default": "hybrid"
                },
                "similarity_threshold": {
                    "type": "number",
                    "description": "Minimum similarity threshold (default: 0.15)",
                    "default": 0.15
                }
            },
            "required": ["query"]
        }
    },
    {
        "name": "search_products",
        "description": "Search the product inventory for specific items with pricing and availability. Use this when looking for specific products to buy or compare.",
        "parameters": {
            "type": "object",
            "properties": {
                "query": {
                    "type": "string",
                    "description": "Product search query"
                },
                "category": {
                    "type": "string",
                    "description": "Filter by product category (e.g., 'Smartphones', 'Laptops', 'Audio')"
                },
                "min_price": {
                    "type": "number",
                    "description": "Minimum price filter"
                },
                "max_price": {
                    "type": "number",
                    "description": "Maximum price filter"
                },
                "brand": {
                    "type": "string",
                    "description": "Filter by brand (e.g., 'Apple', 'Samsung', 'Sony')"
                },
                "limit": {
                    "type": "integer",
                    "description": "Maximum number of results (default: 10)",
                    "default": 10
                },
                "sort_by": {
                    "type": "string",
                    "description": "Sort order: 'relevance', 'price_low', 'price_high', 'rating'",
                    "enum": ["relevance", "price_low", "price_high", "rating"],
                    "default": "relevance"
                }
            },
            "required": ["query"]
        }
    }
]

print(f"Tool schemas defined: {len(tools_schema)} tools")
for tool in tools_schema:
    print(f"  - {tool['name']}: {tool['description'][:80]}...")

System prompt loaded successfully
System prompt length: 3530 characters
Tool schemas defined: 2 tools
  - retrieve_knowledge: Search the knowledge base for product information, policies, and general informa...
  - search_products: Search the product inventory for specific items with pricing and availability. U...


In [4]:
# Initialize LLM with Tools (Using Real LLMWithTools Class)

print("Initializing LLM with Tools...")
print("Using the real LLMWithTools class with tool calling capabilities...")

# Create LLM with tools instance using the real class
llm_with_tools = LLMWithTools(
    system_prompt=system_prompt,
    model="glm-4.5-air",
    tools=tools_schema,
    tool_choice="auto",
    max_timeout_per_request=60
)

# Register real tool functions
llm_with_tools.register_function("retrieve_knowledge", retrieve_knowledge_tool)
llm_with_tools.register_function("search_products", search_products_tool)

print("LLM with Tools initialized successfully!")
print(f"Model: {llm_with_tools.model}")
print(f"Tools registered: {list(llm_with_tools.available_functions.keys())}")

# Test single message with tools
print("\n" + "="*50)
print("TESTING SINGLE MESSAGE WITH TOOL CALLING")
print("="*50)

test_queries = [
    "What iPhones do you have available under $1000?",
    "Tell me about the iPhone 16 Pro features",
    "Show me products from Apple",
    "What's the difference between the 128GB and 256GB models?"
]

for i, query in enumerate(test_queries, 1):
    print(f"\nTest {i}: {query}")
    print("-" * 40)
    
    start_time = time.time()
    
    try:
        # Generate response with tool execution
        response = await llm_with_tools.generate_with_tool_execution(
            user_prompt=query,
            max_retries=2,
            max_tool_iterations=3
        )
        
        end_time = time.time()
        latency = (end_time - start_time) * 1000  # Convert to milliseconds
        
        if response["type"] == "text":
            print(f"Response: {response['content'][:300]}...")
            print(f"Latency: {latency:.2f}ms")
        elif response["type"] == "tool_calls":
            print(f"Tool calls requested: {[tc['name'] for tc in response.get('tool_calls', [])]}")
            print(f"Response preview: {response.get('content', '')[:200]}...")
            print(f"Latency: {latency:.2f}ms")
        elif response["type"] == "error":
            print(f"Error: {response['content']}")
            print(f"Latency: {latency:.2f}ms")
        
    except Exception as e:
        print(f"Error processing query: {str(e)}")
        import traceback
        print(traceback.format_exc(limit=3))
        latency = (time.time() - start_time) * 1000
        print(f"Failed latency: {latency:.2f}ms")

print("\nSingle message testing completed!")
print("✅ Real LLMWithTools class with tool calling is working!")

Initializing LLM with Tools...
Using the real LLMWithTools class with tool calling capabilities...
{"asctime": "2025-10-30 13:35:39,298", "name": "src.utils.llm_pipeline", "levelname": "INFO", "message": "Initializing LLM singleton with model: glm-4.5-air"}


2025-10-30 13:35:39,298 - src.utils.llm_pipeline - INFO - Initializing LLM singleton with model: glm-4.5-air


LLM with Tools initialized successfully!
Model: glm-4.5-air
Tools registered: ['retrieve_knowledge', 'search_products']

TESTING SINGLE MESSAGE WITH TOOL CALLING

Test 1: What iPhones do you have available under $1000?
----------------------------------------
{"asctime": "2025-10-30 13:35:41,859", "name": "search_product_tool", "levelname": "INFO", "message": "Searching products for query: 'iPhone'"}


2025-10-30 13:35:41,859 - search_product_tool - INFO - Searching products for query: 'iPhone'


{"asctime": "2025-10-30 13:35:42,052", "name": "search_product_tool", "levelname": "INFO", "message": "Found 1 products"}


2025-10-30 13:35:42,052 - search_product_tool - INFO - Found 1 products


Response: Based on the search results, I found one iPhone available under $1000:

**iPhone 16 Pro** - $968.13
- **Brand:** Apple
- **Category:** Smartphones
- **Status:** In Stock
- **Rating:** 4.2/5

**Key Specifications:**
- Screen size: 6.3 inches
- Storage: 256GB
- Camera: 48MP main camera
- Processor: A1...
Latency: 5938.23ms

Test 2: Tell me about the iPhone 16 Pro features
----------------------------------------




Response: Based on the information in our knowledge base, here are the key features of the iPhone 16 Pro:

## Design and Display
- **6.3-inch Super Retina XDR display** with ProMotion technology
- **Titanium construction** for enhanced durability and reduced weight
- **Ceramic Shield front** for improved drop...
Latency: 7287.92ms

Test 3: Show me products from Apple
----------------------------------------
{"asctime": "2025-10-30 13:35:53,817", "name": "search_product_tool", "levelname": "INFO", "message": "Searching products for query: 'Apple'"}


2025-10-30 13:35:53,817 - search_product_tool - INFO - Searching products for query: 'Apple'


{"asctime": "2025-10-30 13:35:54,058", "name": "search_product_tool", "levelname": "INFO", "message": "Found 9 products"}


2025-10-30 13:35:54,058 - search_product_tool - INFO - Found 9 products


Response: Here are the Apple products currently available in our inventory:

## Smartphones

### iPhone 16 Pro
- **Price:** $968.13 - $1,059.69
- **Description:** Latest iPhone with A18 Pro chip, titanium design, and advanced camera system
- **Specifications:**
  - 6.3 inches screen size
  - 256GB storage
  -...
Latency: 7509.69ms

Test 4: What's the difference between the 128GB and 256GB models?
----------------------------------------
Response: I need more information about which specific product you're asking about. Could you please let me know which device or product you're comparing? For example, are you asking about:

- An iPhone model?
- An Android phone?
- A laptop?
- A tablet?
- Another storage device?

Once you specify the product,...
Latency: 3162.08ms

Single message testing completed!
✅ Real LLMWithTools class with tool calling is working!


In [None]:
# ChatBot Class with Real Tool Integration

@dataclass
class ChatMessage:
    """Message data structure for chat history."""
    role: str  # "user", "assistant", "system"
    content: str
    timestamp: str
    latency_ms: Optional[float] = None
    tools_used: Optional[List[str]] = None

class ChatBotWithTools:
    """
    Advanced chatbot class with real tool integration and conversation history.
    
    Features:
    - Single message queries with automatic tool execution
    - Multi-turn conversations with context persistence
    - Real tool calling (retrieve_knowledge, search_products)
    - Latency tracking and optimization
    - Message history management
    - Error handling and recovery
    """
    
    def __init__(self, llm_with_tools, max_history_length: int = 20):
        """
        Initialize the ChatBot with tools.
        
        Args:
            llm_with_tools: Configured LLMWithTools instance
            max_history_length: Maximum number of messages to keep in history
        """
        self.llm = llm_with_tools
        self.max_history_length = max_history_length
        self.conversation_history: List[ChatMessage] = []
        self.session_start_time = datetime.now()
        
        print(f"ChatBot initialized with {len(llm_with_tools.available_functions)} tools")
        print(f"Available tools: {list(llm_with_tools.available_functions.keys())}")
        print(f"Max history length: {max_history_length} messages")
    
    async def single_turn_chat(self, user_message: str) -> Dict[str, Any]:
        """
        Process a single message without conversation history.
        Tools are automatically called based on user query.
        
        Args:
            user_message: The user's input message
            
        Returns:
            Dict containing response and metadata
        """
        start_time = time.time()
        tools_used = []
        
        try:
            # Use LLM with automatic tool execution
            response = await self.llm.generate_with_tool_execution(
                user_prompt=user_message,
                max_retries=2,
                max_tool_iterations=3
            )
            
            end_time = time.time()
            latency_ms = (end_time - start_time) * 1000
            
            if response["type"] == "text":
                return {
                    "success": True,
                    "response": response["content"],
                    "latency_ms": latency_ms,
                    "tools_used": tools_used,
                    "message_type": "single_turn",
                    "tool_calls_made": 0
                }
            elif response["type"] == "tool_calls":
                # Tools were called during execution
                tools_used = [tc["name"] for tc in response.get("tool_calls", [])]
                return {
                    "success": True,
                    "response": response.get("content", "Tool execution completed."),
                    "latency_ms": latency_ms,
                    "tools_used": tools_used,
                    "message_type": "single_turn",
                    "tool_calls_made": len(tools_used)
                }
            else:
                return {
                    "success": False,
                    "error": response.get("content", "Unknown error"),
                    "latency_ms": latency_ms,
                    "tools_used": tools_used,
                    "message_type": "single_turn",
                    "tool_calls_made": 0
                }
                
        except Exception as e:
            end_time = time.time()
            latency_ms = (end_time - start_time) * 1000
            
            return {
                "success": False,
                "error": str(e),
                "latency_ms": latency_ms,
                "tools_used": tools_used,
                "message_type": "single_turn",
                "tool_calls_made": 0
            }
    
    async def multi_turn_chat(self, user_message: str) -> Dict[str, Any]:
        """
        Process a message with conversation history for multi-turn conversations.
        Context and previous tool results are considered.
        
        Args:
            user_message: The user's input message
            
        Returns:
            Dict containing response and metadata
        """
        start_time = time.time()
        tools_used = []
        
        # Add user message to history
        user_chat_message = ChatMessage(
            role="user",
            content=user_message,
            timestamp=datetime.now().isoformat()
        )
        self.conversation_history.append(user_chat_message)
        
        try:
            # Build conversation context from history
            conversation_context = self._build_conversation_context()
            
            # Get response from LLM with conversation context and tool execution
            response = await self.llm.generate_with_tool_execution(
                user_prompt=conversation_context,
                max_retries=2,
                max_tool_iterations=3
            )
            
            end_time = time.time()
            latency_ms = (end_time - start_time) * 1000
            
            if response["type"] == "text":
                # Add assistant response to history
                assistant_message = ChatMessage(
                    role="assistant",
                    content=response["content"],
                    timestamp=datetime.now().isoformat(),
                    latency_ms=latency_ms,
                    tools_used=tools_used
                )
                self.conversation_history.append(assistant_message)
                
                # Trim history if needed
                self._trim_history()
                
                return {
                    "success": True,
                    "response": response["content"],
                    "latency_ms": latency_ms,
                    "tools_used": tools_used,
                    "message_type": "multi_turn",
                    "history_length": len(self.conversation_history),
                    "tool_calls_made": 0
                }
            elif response["type"] == "tool_calls":
                # Tools were called during execution
                tools_used = [tc["name"] for tc in response.get("tool_calls", [])]
                
                # Add assistant response to history
                assistant_message = ChatMessage(
                    role="assistant",
                    content=response.get("content", "Tool execution completed."),
                    timestamp=datetime.now().isoformat(),
                    latency_ms=latency_ms,
                    tools_used=tools_used
                )
                self.conversation_history.append(assistant_message)
                
                # Trim history if needed
                self._trim_history()
                
                return {
                    "success": True,
                    "response": response.get("content", "Tool execution completed."),
                    "latency_ms": latency_ms,
                    "tools_used": tools_used,
                    "message_type": "multi_turn",
                    "history_length": len(self.conversation_history),
                    "tool_calls_made": len(tools_used)
                }
            else:
                return {
                    "success": False,
                    "error": response.get("content", "Unknown error"),
                    "latency_ms": latency_ms,
                    "tools_used": tools_used,
                    "message_type": "multi_turn",
                    "history_length": len(self.conversation_history),
                    "tool_calls_made": 0
                }
                
        except Exception as e:
            end_time = time.time()
            latency_ms = (end_time - start_time) * 1000
            
            return {
                "success": False,
                "error": str(e),
                "latency_ms": latency_ms,
                "tools_used": tools_used,
                "message_type": "multi_turn",
                "history_length": len(self.conversation_history),
                "tool_calls_made": 0
            }
    
    def _build_conversation_context(self) -> str:
        """Build conversation context from message history."""
        context_parts = []
        
        # Include recent conversation history (exclude system message)
        recent_messages = [msg for msg in self.conversation_history if msg.role != "system"]
        
        # Limit to last 6 exchanges for better context with tools
        context_messages = recent_messages[-6:]
        
        for msg in context_messages:
            if msg.role == "user":
                context_parts.append(f"User: {msg.content}")
            elif msg.role == "assistant":
                context_parts.append(f"Assistant: {msg.content}")
        
        # Add the latest user message at the end if not already included
        if context_parts:
            latest_user_msg = [msg for msg in self.conversation_history if msg.role == "user"][-1]
            if not context_parts[-1].startswith(f"User: {latest_user_msg.content}"):
                context_parts.append(f"User: {latest_user_msg.content}")
        
        return "\n".join(context_parts)
    
    def _trim_history(self):
        """Trim conversation history to maintain maximum length."""
        if len(self.conversation_history) > self.max_history_length:
            # Keep system message and recent messages
            system_messages = [msg for msg in self.conversation_history if msg.role == "system"]
            other_messages = [msg for msg in self.conversation_history if msg.role != "system"]
            
            # Keep most recent messages
            recent_other_messages = other_messages[-(self.max_history_length - len(system_messages)):]
            
            self.conversation_history = system_messages + recent_other_messages
    
    def get_conversation_history(self) -> List[Dict[str, Any]]:
        """Get formatted conversation history."""
        return [
            {
                "role": msg.role,
                "content": msg.content,
                "timestamp": msg.timestamp,
                "latency_ms": msg.latency_ms,
                "tools_used": msg.tools_used or []
            }
            for msg in self.conversation_history
        ]
    
    def clear_history(self):
        """Clear conversation history but keep system message."""
        system_messages = [msg for msg in self.conversation_history if msg.role == "system"]
        self.conversation_history = system_messages
        print("Conversation history cleared")
    
    def get_stats(self) -> Dict[str, Any]:
        """Get chatbot session statistics."""
        user_messages = [msg for msg in self.conversation_history if msg.role == "user"]
        assistant_messages = [msg for msg in self.conversation_history if msg.role == "assistant"]
        
        total_latencies = [msg.latency_ms for msg in assistant_messages if msg.latency_ms]
        avg_latency = sum(total_latencies) / len(total_latencies) if total_latencies else 0
        
        # Count tool usage
        all_tools_used = []
        for msg in assistant_messages:
            if msg.tools_used:
                all_tools_used.extend(msg.tools_used)
        
        tool_usage_counts = {}
        for tool in all_tools_used:
            tool_usage_counts[tool] = tool_usage_counts.get(tool, 0) + 1
        
        session_duration = (datetime.now() - self.session_start_time).total_seconds()
        
        return {
            "session_duration_seconds": session_duration,
            "total_messages": len(user_messages) + len(assistant_messages),
            "user_messages": len(user_messages),
            "assistant_messages": len(assistant_messages),
            "average_latency_ms": round(avg_latency, 2),
            "tools_available": list(self.llm.available_functions.keys()),
            "tool_usage_counts": tool_usage_counts,
            "total_tool_calls": sum(tool_usage_counts.values()),
            "history_length": len(self.conversation_history)
        }

# Initialize ChatBot with real tools
print("Initializing ChatBot with real tool integration...")
chatbot = ChatBotWithTools(llm_with_tools, max_history_length=15)
print("ChatBot with real tools initialized successfully!")

# Display initial stats
stats = chatbot.get_stats()
print(f"Session stats: {stats}")

print("\n✅ ChatBot with real tool integration is working!")
print("✅ Ready for testing single and multi-turn conversations with automatic tool calling!")

In [None]:
# Interactive Demo and Usage Examples with Real Tool Integration

print("=" * 60)
print("INTERACTIVE CHATBOT DEMO WITH REAL TOOLS")
print("=" * 60)

async def interactive_demo():
    """Interactive demonstration of the chatbot with real tool integration."""
    
    print("\nThis interactive demo allows you to test the chatbot with real tool integration.")
    print("The chatbot will automatically call tools based on your queries:")
    print("- 📋 retrieve_knowledge: Search knowledge base for product information, policies")
    print("- 🛒 search_products: Search product inventory with pricing and availability")
    print("\nCommands: 'quit' to exit, 'stats' for session stats, 'history' for conversation history, 'clear' to clear history")
    print("-" * 60)
    
    conversation_mode = "single"  # Start with single turn mode
    
    while True:
        try:
            # Get user input
            user_input = input("\nYou: ").strip()
            
            if not user_input:
                continue
                
            if user_input.lower() == 'quit':
                print("Goodbye!")
                break
                
            if user_input.lower() == 'stats':
                stats = chatbot.get_stats()
                print("\nSession Statistics:")
                for key, value in stats.items():
                    print(f"  {key}: {value}")
                continue
                
            if user_input.lower() == 'history':
                history = chatbot.get_conversation_history()
                print(f"\nConversation History ({len(history)} messages):")
                for i, msg in enumerate(history):
                    if msg['role'] != 'system':  # Skip system messages
                        timestamp = msg['timestamp'][:19]
                        role = msg['role'].upper()
                        content_preview = msg['content'][:150] + "..." if len(msg['content']) > 150 else msg['content']
                        tools_info = f" [Tools: {', '.join(msg['tools_used'])}]" if msg['tools_used'] else ""
                        print(f"  [{timestamp}] {role}: {content_preview}{tools_info}")
                continue
                
            if user_input.lower() == 'clear':
                chatbot.clear_history()
                print("Conversation history cleared!")
                continue
                
            if user_input.lower() == 'mode':
                conversation_mode = "multi" if conversation_mode == "single" else "single"
                print(f"Switched to {conversation_mode}-turn mode")
                continue
            
            # Process the message
            print(f"\nProcessing with {conversation_mode}-turn mode...")
            start_time = time.time()
            
            if conversation_mode == "single":
                result = await chatbot.single_turn_chat(user_input)
            else:
                result = await chatbot.multi_turn_chat(user_input)
            
            end_time = time.time()
            
            # Display response
            if result["success"]:
                print(f"\nAssistant: {result['response']}")
                
                # Show tool usage information
                tool_info = []
                if result["tool_calls_made"] > 0:
                    tool_info.append(f"Tools called: {result['tool_calls_made']}")
                if result["tools_used"]:
                    tool_info.append(f"Tools used: {', '.join(result['tools_used'])}")
                
                latency_info = f"Latency: {result['latency_ms']:.2f}ms"
                if tool_info:
                    latency_info += f" | {', '.join(tool_info)}"
                
                print(f"\n[{latency_info}]")
                
                if result["message_type"] == "multi-turn":
                    print(f"[History length: {result['history_length']} messages]")
            else:
                print(f"\nError: {result['error']}")
                print(f"[Latency: {result['latency_ms']:.2f}ms]")
                
        except KeyboardInterrupt:
            print("\n\nGoodbye!")
            break
        except Exception as e:
            print(f"\nError: {e}")

# Comprehensive Performance Testing with Real Tools
print("\n" + "=" * 60)
print("PERFORMANCE TESTING WITH REAL TOOL INTEGRATION")
print("=" * 60)

async def comprehensive_performance_test():
    """Run comprehensive performance tests on the chatbot with real tools."""
    
    test_scenarios = [
        {
            "name": "Product Search (should use search_products)",
            "query": "What iPhones do you have available under $1000?",
            "expected_tools": ["search_products"]
        },
        {
            "name": "Product Features (should use retrieve_knowledge)",
            "query": "What are the key features of iPhone 16 Pro?",
            "expected_tools": ["retrieve_knowledge"]
        },
        {
            "name": "Brand Search (should use search_products)",
            "query": "Show me all Apple products",
            "expected_tools": ["search_products"]
        },
        {
            "name": "Policy Query (should use retrieve_knowledge)",
            "query": "What is your return policy?",
            "expected_tools": ["retrieve_knowledge"]
        },
        {
            "name": "Complex Query (might use multiple tools)",
            "query": "I need a phone for photography with good battery life under $800. Compare options and tell me about return policies.",
            "expected_tools": ["search_products", "retrieve_knowledge"]
        }
    ]
    
    print("Running comprehensive performance tests...")
    print("-" * 50)
    
    single_turn_results = []
    multi_turn_results = []
    
    for scenario in test_scenarios:
        print(f"\nTesting: {scenario['name']}")
        print(f"Query: {scenario['query']}")
        print(f"Expected tools: {scenario['expected_tools']}")
        
        # Test single turn
        print("Testing single-turn...")
        start = time.time()
        result_single = await chatbot.single_turn_chat(scenario['query'])
        single_latency = time.time() - start
        
        # Test multi turn (clear history first for fair comparison)
        chatbot.clear_history()
        print("Testing multi-turn...")
        start = time.time()
        result_multi = await chatbot.multi_turn_chat(scenario['query'])
        multi_latency = time.time() - start
        
        # Store results
        single_turn_results.append({
            "scenario": scenario['name'],
            "latency": single_latency * 1000,
            "success": result_single['success'],
            "tools_used": result_single.get('tools_used', []),
            "tool_calls": result_single.get('tool_calls_made', 0)
        })
        
        multi_turn_results.append({
            "scenario": scenario['name'],
            "latency": multi_latency * 1000,
            "success": result_multi['success'],
            "tools_used": result_multi.get('tools_used', []),
            "tool_calls": result_multi.get('tool_calls_made', 0)
        })
        
        # Display results
        print(f"✅ Single-turn: {single_latency*1000:.2f}ms - Success: {result_single['success']}")
        if result_single['success']:
            print(f"   Tools used: {result_single.get('tools_used', [])}")
            print(f"   Tool calls made: {result_single.get('tool_calls_made', 0)}")
            response_preview = result_single['response'][:100] + "..."
            print(f"   Response preview: {response_preview}")
        
        print(f"✅ Multi-turn: {multi_latency*1000:.2f}ms - Success: {result_multi['success']}")
        if result_multi['success']:
            print(f"   Tools used: {result_multi.get('tools_used', [])}")
            print(f"   Tool calls made: {result_multi.get('tool_calls_made', 0)}")
    
    # Performance Analysis
    print("\n" + "=" * 50)
    print("PERFORMANCE ANALYSIS")
    print("=" * 50)
    
    # Calculate averages
    avg_single_latency = sum(r['latency'] for r in single_turn_results) / len(single_turn_results)
    avg_multi_latency = sum(r['latency'] for r in multi_turn_results) / len(multi_turn_results)
    
    successful_single = sum(1 for r in single_turn_results if r['success'])
    successful_multi = sum(1 for r in multi_turn_results if r['success'])
    
    total_tool_calls_single = sum(r['tool_calls'] for r in single_turn_results)
    total_tool_calls_multi = sum(r['tool_calls'] for r in multi_turn_results)
    
    print(f"Success Rate:")
    print(f"  Single-turn: {successful_single}/{len(single_turn_results)} ({successful_single/len(single_turn_results)*100:.1f}%)")
    print(f"  Multi-turn: {successful_multi}/{len(multi_turn_results)} ({successful_multi/len(multi_turn_results)*100:.1f}%)")
    
    print(f"\nAverage Latency:")
    print(f"  Single-turn: {avg_single_latency:.2f}ms")
    print(f"  Multi-turn: {avg_multi_latency:.2f}ms")
    print(f"  Overhead: {abs(avg_multi_latency - avg_single_latency):.2f}ms ({abs(avg_multi_latency - avg_single_latency)/avg_single_latency*100:.1f}%)")
    
    print(f"\nTool Usage:")
    print(f"  Total tool calls (single-turn): {total_tool_calls_single}")
    print(f"  Total tool calls (multi-turn): {total_tool_calls_multi}")
    
    # Tool usage breakdown
    tool_usage_single = {}
    tool_usage_multi = {}
    
    for result in single_turn_results:
        for tool in result['tools_used']:
            tool_usage_single[tool] = tool_usage_single.get(tool, 0) + 1
    
    for result in multi_turn_results:
        for tool in result['tools_used']:
            tool_usage_multi[tool] = tool_usage_multi.get(tool, 0) + 1
    
    print(f"\nTool Usage Breakdown:")
    all_tools = set(tool_usage_single.keys()) | set(tool_usage_multi.keys())
    for tool in all_tools:
        single_count = tool_usage_single.get(tool, 0)
        multi_count = tool_usage_multi.get(tool, 0)
        print(f"  {tool}: {single_count} (single) / {multi_count} (multi)")
    
    print(f"\n✅ Performance testing completed!")
    print(f"✅ Tool integration is working correctly!")

# Run comprehensive performance test
await comprehensive_performance_test()

print("\n" + "=" * 60)
print("READY FOR INTERACTIVE USE")
print("=" * 60)
print("\nTo start interactive demo, run:")
print("await interactive_demo()")
print("\nTo test individual queries:")
print("result = await chatbot.single_turn_chat('your query here')")
print("or")
print("result = await chatbot.multi_turn_chat('your query here')")

print("\nChatBot Features:")
print("✅ Real tool integration (retrieve_knowledge, search_products)")
print("✅ Automatic tool calling based on query analysis")
print("✅ Single-turn chat with tool execution")
print("✅ Multi-turn chat with conversation context")
print("✅ Latency tracking and optimization")
print("✅ Conversation persistence with tool context")
print("✅ Error handling and recovery")
print("✅ Session statistics and tool usage analytics")
print("✅ Comprehensive performance monitoring")

# Display current session stats
current_stats = chatbot.get_stats()
print(f"\nCurrent session stats: {current_stats}")

print("\nExample queries to try:")
print("- 'What iPhones do you have under $1000?'")
print("- 'Tell me about iPhone 16 Pro features'")
print("- 'What is your return policy?'")
print("- 'Show me Apple products with good ratings'")
print("- 'Compare laptops and tell me about warranty policies'")