### 03.03. Setting up Indexes

In [2]:
#Install prerequisite packages
!pip install python-dotenv==1.0.0

!pip install llama-index==0.10.59
!pip install llama-index-llms-openai==0.1.27
!pip install llama-index-embeddings-openai==0.1.11
!pip install llama-index-llms-azure-openai==0.1.10
!pip install llama-index-embeddings-azure-openai==0.1.11

!pip install llama-index-llms-openai-like
!pip install llama-index-embeddings-huggingface
!pip install sentence-transformers


Collecting llama-index-llms-openai-like
  Downloading llama_index_llms_openai_like-0.4.0-py3-none-any.whl.metadata (1.1 kB)
Collecting llama-index-core<0.13,>=0.12.0 (from llama-index-llms-openai-like)
  Downloading llama_index_core-0.12.43-py3-none-any.whl.metadata (2.5 kB)
Collecting llama-index-llms-openai<0.5,>=0.4.0 (from llama-index-llms-openai-like)
  Downloading llama_index_llms_openai-0.4.7-py3-none-any.whl.metadata (3.0 kB)
Collecting transformers<5,>=4.37.0 (from llama-index-llms-openai-like)
  Downloading transformers-4.52.4-py3-none-any.whl.metadata (38 kB)
Collecting aiosqlite (from llama-index-core<0.13,>=0.12.0->llama-index-llms-openai-like)
  Downloading aiosqlite-0.21.0-py3-none-any.whl.metadata (4.3 kB)
Collecting banks<3,>=2.0.0 (from llama-index-core<0.13,>=0.12.0->llama-index-llms-openai-like)
  Downloading banks-2.1.2-py3-none-any.whl.metadata (12 kB)
Collecting filetype<2,>=1.2.0 (from llama-index-core<0.13,>=0.12.0->llama-index-llms-openai-like)
  

In [25]:
from llama_index.llms.openai_like import OpenAILike
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import Settings
import nest_asyncio

nest_asyncio.apply()

def setup_local_models(llm_model=None, embedding_model=None):
    """
    Setup local LLM and embedding models with user choice

    Args:
        llm_model (str): Choose from available models in LM Studio
        embedding_model (str): Choose embedding model type
    """

    # Available LLM models (make sure these match what you have in LM Studio)
    available_llm_models = {
        "1": "deepseek-coder-33b-instruct",
        "2": "open_gpt4_8x7b_v0.2",
        "3": "llama-3-groq-8b-tool-use"
    }

    # Available embedding models
    available_embedding_models = {
        "1": "all-MiniLM-L6-v2",
        "2": "text-embedding-ada-002"
    }

    # Local server configuration
    local_llm_url = "http://127.0.0.1:1234/v1"

    # Choose LLM model
    if llm_model is None:
        print("Available LLM models:")
        for key, model in available_llm_models.items():
            print(f"{key}. {model}")

        choice = input("Choose LLM model (1-3): ").strip()
        llm_model = available_llm_models.get(choice, "deepseek-coder-33b-instruct")

    print(f"Selected LLM: {llm_model}")

    # Setup the LLM
    Settings.llm = OpenAILike(
        model=llm_model,
        api_base=local_llm_url,
        api_key="lm-studio",
        is_local=True,
        temperature=0.1,
        max_tokens=2048,
    )

    # Choose embedding model
    if embedding_model is None:
        print("\nAvailable embedding models:")
        for key, model in available_embedding_models.items():
            print(f"{key}. {model}")

        choice = input("Choose embedding model (1-2): ").strip()
        embedding_model = available_embedding_models.get(choice, "1")

    # Setup embedding model based on choice
    if embedding_model == "2" or embedding_model == "text-embedding-ada-002":
        print("Selected embedding: text-embedding-ada-002 (via local server)")
        # Try to use OpenAI-compatible embeddings through local server
        Settings.embed_model = OpenAIEmbedding(
            model="text-embedding-ada-002",
            api_base=local_llm_url,
            api_key="lm-studio",
        )
    else:
        print("Selected embedding: all-MiniLM-L6-v2 (HuggingFace)")
        # Use HuggingFace embeddings
        Settings.embed_model = HuggingFaceEmbedding(
            model_name="sentence-transformers/all-MiniLM-L6-v2"
        )

    print(f"\n✅ Setup complete!")
    print(f"LLM: {Settings.llm.model}")
    print(f"Embedding: {type(Settings.embed_model).__name__}")

    return Settings.llm, Settings.embed_model

# Quick setup functions for faster configuration
def setup_fast_config():
    """Quick setup with faster models"""
    print("Setting up fast configuration...")
    return setup_local_models(
        llm_model="llama-3-groq-8b-tool-use",  # Usually faster than deepseek
        embedding_model="1"  # HuggingFace MiniLM
    )

def setup_quality_config():
    """Setup with higher quality models"""
    print("Setting up quality configuration...")
    return setup_local_models(
        llm_model="deepseek-coder-33b-instruct",  # Higher quality but slower
        embedding_model="2"  # Ada-002 if available locally
    )

def setup_balanced_config():
    """Setup with balanced speed/quality"""
    print("Setting up balanced configuration...")
    return setup_local_models(
        llm_model="open_gpt4_8x7b_v0.2",  # Balance of speed and quality
        embedding_model="1"  # Fast embeddings
    )

# Interactive setup
print("=== Local Model Configuration ===")
print("Choose setup mode:")
print("1. Interactive (choose each model)")
print("2. Fast (optimized for speed)")
print("3. Quality (optimized for quality)")
print("4. Balanced (speed + quality)")

mode = input("Enter your choice (1-4): ").strip()

if mode == "2":
    setup_fast_config()
elif mode == "3":
    setup_quality_config()
elif mode == "4":
    setup_balanced_config()
else:
    setup_local_models()  # Interactive mode

=== Local Model Configuration ===
Choose setup mode:
1. Interactive (choose each model)
2. Fast (optimized for speed)
3. Quality (optimized for quality)
4. Balanced (speed + quality)
Setting up balanced configuration...
Selected LLM: open_gpt4_8x7b_v0.2
Selected embedding: all-MiniLM-L6-v2 (HuggingFace)

✅ Setup complete!
LLM: open_gpt4_8x7b_v0.2
Embedding: HuggingFaceEmbedding


# Setting up Vector Indexing

In [26]:
import os
import json
import numpy as np
from datetime import datetime

def inspect_vector_indexes(aeroflow_index=None, ecosprint_index=None,
                           aeroflow_nodes=None, ecosprint_nodes=None):
    """
    Comprehensive inspection of vector indexes and their contents
    """

    print("="*80)
    print("🔍 VECTOR INDEX INSPECTION REPORT")
    print("="*80)
    print(f"Generated at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print()

    # 1. Check where indexes are stored
    print("📂 INDEX STORAGE LOCATIONS")
    print("-" * 40)

    # LlamaIndex typically stores indexes in memory by default
    # But let's check for any persistent storage
    current_dir = os.getcwd()
    print(f"Current working directory: {current_dir}")

    # Check for common LlamaIndex storage directories
    storage_dirs = ['./storage', './index_storage', './vector_store']
    for storage_dir in storage_dirs:
        if os.path.exists(storage_dir):
            print(f"Found storage directory: {storage_dir}")
            for item in os.listdir(storage_dir):
                print(f"  - {item}")
        else:
            print(f"No storage found at: {storage_dir}")

    print("\n💡 Note: By default, VectorStoreIndex stores data in memory.")
    print("   To persist indexes, you'd need to use index.storage_context.persist()")

    # 2. Inspect AeroFlow Index
    if aeroflow_index and aeroflow_nodes:
        print("\n" + "="*60)
        print("🚁 AEROFLOW INDEX ANALYSIS")
        print("="*60)
        inspect_single_index("AeroFlow", aeroflow_index, aeroflow_nodes)

    # 3. Inspect EcoSprint Index
    if ecosprint_index and ecosprint_nodes:
        print("\n" + "="*60)
        print("🏃 ECOSPRINT INDEX ANALYSIS")
        print("="*60)
        inspect_single_index("EcoSprint", ecosprint_index, ecosprint_nodes)

def inspect_single_index(name, index, nodes):
    """
    Detailed inspection of a single vector index
    """

    print(f"\n📊 {name} INDEX OVERVIEW")
    print("-" * 30)

    # Basic stats
    print(f"Total nodes: {len(nodes)}")

    # Vector store info
    vector_store = index.vector_store
    print(f"Vector store type: {type(vector_store).__name__}")

    # Try to get vector store stats
    try:
        if hasattr(vector_store, 'client'):
            print(f"Vector store client: {type(vector_store.client).__name__}")
        if hasattr(vector_store, '_data'):
            if hasattr(vector_store._data, 'embedding_dict'):
                print(f"Stored vectors: {len(vector_store._data.embedding_dict)}")
    except:
        pass

    # Node analysis
    print(f"\n📝 {name} NODE CONTENT ANALYSIS")
    print("-" * 35)

    if nodes:
        # Text length statistics
        text_lengths = [len(node.text) for node in nodes]
        print(f"Average chunk size: {np.mean(text_lengths):.0f} characters")
        print(f"Min chunk size: {min(text_lengths)} characters")
        print(f"Max chunk size: {max(text_lengths)} characters")

        # Show first few chunks
        print(f"\n📄 SAMPLE CHUNKS FROM {name}")
        print("-" * 30)

        for i, node in enumerate(nodes[:3]):  # Show first 3 chunks
            print(f"\n--- Chunk {i+1} ---")
            print(f"Node ID: {node.node_id}")
            print(f"Length: {len(node.text)} characters")

            # Show metadata if available
            if hasattr(node, 'metadata') and node.metadata:
                print(f"Metadata: {node.metadata}")

            # Show first 200 chars of content
            preview = node.text[:200] + "..." if len(node.text) > 200 else node.text
            print(f"Content preview:\n{preview}")

            # Check if node has embedding
            if hasattr(node, 'embedding') and node.embedding:
                print(f"Embedding dimension: {len(node.embedding)}")
            else:
                print("Embedding: Not stored in node")

def save_index_contents_to_files(aeroflow_nodes=None, ecosprint_nodes=None):
    """
    Save detailed index contents to files for inspection
    """

    print("\n💾 SAVING INDEX CONTENTS TO FILES")
    print("-" * 40)

    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

    if aeroflow_nodes:
        filename = f"aeroflow_chunks_{timestamp}.txt"
        with open(filename, 'w', encoding='utf-8') as f:
            f.write("AEROFLOW DOCUMENT CHUNKS\n")
            f.write("=" * 50 + "\n\n")

            for i, node in enumerate(aeroflow_nodes):
                f.write(f"CHUNK {i+1}\n")
                f.write("-" * 20 + "\n")
                f.write(f"Node ID: {node.node_id}\n")
                f.write(f"Length: {len(node.text)} characters\n")
                if hasattr(node, 'metadata') and node.metadata:
                    f.write(f"Metadata: {node.metadata}\n")
                f.write(f"Content:\n{node.text}\n\n")
                f.write("=" * 50 + "\n\n")

        print(f"✅ AeroFlow chunks saved to: {filename}")

    if ecosprint_nodes:
        filename = f"ecosprint_chunks_{timestamp}.txt"
        with open(filename, 'w', encoding='utf-8') as f:
            f.write("ECOSPRINT DOCUMENT CHUNKS\n")
            f.write("=" * 50 + "\n\n")

            for i, node in enumerate(ecosprint_nodes):
                f.write(f"CHUNK {i+1}\n")
                f.write("-" * 20 + "\n")
                f.write(f"Node ID: {node.node_id}\n")
                f.write(f"Length: {len(node.text)} characters\n")
                if hasattr(node, 'metadata') and node.metadata:
                    f.write(f"Metadata: {node.metadata}\n")
                f.write(f"Content:\n{node.text}\n\n")
                f.write("=" * 50 + "\n\n")

        print(f"✅ EcoSprint chunks saved to: {filename}")

def analyze_embedding_vectors(index, sample_size=5):
    """
    Analyze the embedding vectors in the index
    """

    print(f"\n🧮 EMBEDDING VECTOR ANALYSIS")
    print("-" * 35)

    try:
        # Get a few sample embeddings
        vector_store = index.vector_store

        if hasattr(vector_store, '_data') and hasattr(vector_store._data, 'embedding_dict'):
            embeddings_dict = vector_store._data.embedding_dict

            if embeddings_dict:
                # Get sample embeddings
                sample_embeddings = list(embeddings_dict.values())[:sample_size]

                if sample_embeddings:
                    # Analyze dimensions
                    embedding_dim = len(sample_embeddings[0])
                    print(f"Embedding dimension: {embedding_dim}")

                    # Basic statistics
                    sample_array = np.array(sample_embeddings)
                    print(f"Sample size: {len(sample_embeddings)}")
                    print(f"Mean value: {np.mean(sample_array):.6f}")
                    print(f"Std deviation: {np.std(sample_array):.6f}")
                    print(f"Min value: {np.min(sample_array):.6f}")
                    print(f"Max value: {np.max(sample_array):.6f}")
                else:
                    print("No embedding vectors found")
            else:
                print("Embedding dictionary is empty")
        else:
            print("Cannot access embedding vectors (may be stored differently)")

    except Exception as e:
        print(f"Error analyzing embeddings: {e}")

# Usage function
def run_full_inspection():
    """
    Run this after creating your indexes to get a complete inspection
    """

    # Make sure the variables are available in your scope
    try:
        # These should be defined in your main code
        inspect_vector_indexes(
            aeroflow_index=aeroflow_index if 'aeroflow_index' in globals() else None,
            ecosprint_index=ecosprint_index if 'ecosprint_index' in globals() else None,
            aeroflow_nodes=aeroflow_nodes if 'aeroflow_nodes' in globals() else None,
            ecosprint_nodes=ecosprint_nodes if 'ecosprint_nodes' in globals() else None
        )

        # Save contents to files
        save_index_contents_to_files(
            aeroflow_nodes=aeroflow_nodes if 'aeroflow_nodes' in globals() else None,
            ecosprint_nodes=ecosprint_nodes if 'ecosprint_nodes' in globals() else None
        )

        # Analyze embeddings
        if 'aeroflow_index' in globals():
            print("\n🚁 AEROFLOW EMBEDDINGS")
            analyze_embedding_vectors(aeroflow_index)

        if 'ecosprint_index' in globals():
            print("\n🏃 ECOSPRINT EMBEDDINGS")
            analyze_embedding_vectors(ecosprint_index)

    except NameError as e:
        print(f"❌ Error: {e}")
        print("Make sure to run this after creating your indexes!")

print("🔍 Vector Index Inspector loaded!")
print("After creating your indexes, run: run_full_inspection()")

🔍 Vector Index Inspector loaded!
After creating your indexes, run: run_full_inspection()


In [29]:
# Create indexes for vector search
from llama_index.core import SimpleDirectoryReader
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import VectorStoreIndex
import os

# Initialize the splitter
splitter = SentenceSplitter(chunk_size=1024)

#-------------------------------------------------------------------
# Setup Aeroflow document index
#-------------------------------------------------------------------
print("Loading AeroFlow documents...")

# Check if file exists
aeroflow_file = "AeroFlow_Specification_Document.pdf"
if not os.path.exists(aeroflow_file):
    print(f"Warning: {aeroflow_file} not found!")
    aeroflow_index = None
    aeroflow_nodes = None
    aeroflow_query_engine = None
else:
    aeroflow_documents = SimpleDirectoryReader(
        input_files=[aeroflow_file]
    ).load_data()

    print(f"Loaded {len(aeroflow_documents)} AeroFlow documents")

    # Read documents into nodes
    aeroflow_nodes = splitter.get_nodes_from_documents(aeroflow_documents)
    print(f"Created {len(aeroflow_nodes)} AeroFlow nodes")

    # Create a vector store
    print("Creating AeroFlow vector index...")
    aeroflow_index = VectorStoreIndex(aeroflow_nodes)

    # Create a query engine
    aeroflow_query_engine = aeroflow_index.as_query_engine()
    print("AeroFlow query engine ready!")

#-------------------------------------------------------------------
# Setup EchoSprint document index
#-------------------------------------------------------------------
print("\nLoading EcoSprint documents...")

# Check if file exists
ecosprint_file = "EcoSprint_Specification_Document.pdf"
if not os.path.exists(ecosprint_file):
    print(f"Warning: {ecosprint_file} not found!")
    ecosprint_index = None
    ecosprint_nodes = None
    ecosprint_query_engine = None
else:
    ecosprint_documents = SimpleDirectoryReader(
        input_files=[ecosprint_file]
    ).load_data()

    print(f"Loaded {len(ecosprint_documents)} EcoSprint documents")

    # Read documents into nodes
    ecosprint_nodes = splitter.get_nodes_from_documents(ecosprint_documents)
    print(f"Created {len(ecosprint_nodes)} EcoSprint nodes")

    # Create a vector store
    print("Creating EcoSprint vector index...")
    ecosprint_index = VectorStoreIndex(ecosprint_nodes)

    # Create a query engine
    ecosprint_query_engine = ecosprint_index.as_query_engine()
    print("EcoSprint query engine ready!")

print("\nVector indexing complete!")
print("Both query engines are ready to use with your local LLM setup.")

# ADD THIS SECTION FOR INSPECTION
print("\n" + "="*60)
print("🔍 RUNNING INDEX INSPECTION...")
print("="*60)

# Run the inspection
inspect_vector_indexes(
    aeroflow_index=aeroflow_index,
    ecosprint_index=ecosprint_index,
    aeroflow_nodes=aeroflow_nodes,
    ecosprint_nodes=ecosprint_nodes
)

# Save detailed contents to files
save_index_contents_to_files(
    aeroflow_nodes=aeroflow_nodes,
    ecosprint_nodes=ecosprint_nodes
)

# Analyze embeddings
if aeroflow_index:
    print("\n🚁 AEROFLOW EMBEDDINGS")
    analyze_embedding_vectors(aeroflow_index)

if ecosprint_index:
    print("\n🏃 ECOSPRINT EMBEDDINGS")
    analyze_embedding_vectors(ecosprint_index)

Loading AeroFlow documents...
Loaded 2 AeroFlow documents
Created 2 AeroFlow nodes
Creating AeroFlow vector index...
AeroFlow query engine ready!

Loading EcoSprint documents...
Loaded 2 EcoSprint documents
Created 2 EcoSprint nodes
Creating EcoSprint vector index...
EcoSprint query engine ready!

Vector indexing complete!
Both query engines are ready to use with your local LLM setup.

🔍 RUNNING INDEX INSPECTION...
🔍 VECTOR INDEX INSPECTION REPORT
Generated at: 2025-06-18 23:21:30

📂 INDEX STORAGE LOCATIONS
----------------------------------------
Current working directory: /Users/jarotball/Setups/agentic-ai-for-developers-concepts-and-applications-for-enterprises-3913172
No storage found at: ./storage
No storage found at: ./index_storage
No storage found at: ./vector_store

💡 Note: By default, VectorStoreIndex stores data in memory.
   To persist indexes, you'd need to use index.storage_context.persist()

🚁 AEROFLOW INDEX ANALYSIS

📊 AeroFlow INDEX OVERVIEW
---------------------------

In [30]:
run_full_inspection()

🔍 VECTOR INDEX INSPECTION REPORT
Generated at: 2025-06-18 23:21:48

📂 INDEX STORAGE LOCATIONS
----------------------------------------
Current working directory: /Users/jarotball/Setups/agentic-ai-for-developers-concepts-and-applications-for-enterprises-3913172
No storage found at: ./storage
No storage found at: ./index_storage
No storage found at: ./vector_store

💡 Note: By default, VectorStoreIndex stores data in memory.
   To persist indexes, you'd need to use index.storage_context.persist()

🚁 AEROFLOW INDEX ANALYSIS

📊 AeroFlow INDEX OVERVIEW
------------------------------
Total nodes: 2
Vector store type: SimpleVectorStore
Vector store client: NoneType
Stored vectors: 2

📝 AeroFlow NODE CONTENT ANALYSIS
-----------------------------------
Average chunk size: 1239 characters
Min chunk size: 262 characters
Max chunk size: 2216 characters

📄 SAMPLE CHUNKS FROM AeroFlow
------------------------------

--- Chunk 1 ---
Node ID: 96adfe61-c901-4161-aa79-9372d5c4a8bf
Length: 2216 characte

### 03.04. Setup the Agentic Router

In [51]:
# # Fixes for Router JSON Errors
# from llama_index.core.tools import QueryEngineTool
# from llama_index.core.query_engine.router_query_engine import RouterQueryEngine
# from llama_index.core.selectors import LLMSingleSelector, PydanticSingleSelector
# from llama_index.core import Settings
#
# print("Setting up Agentic Router...")
#
# # Create a query engine Tool for AeroFlow
# aeroflow_tool = QueryEngineTool.from_defaults(
#     query_engine=aeroflow_query_engine,
#     name="AeroFlow_specifications",
#     description=(
#         "Use this tool for questions about AeroFlow vehicle specifications, including: "
#         "design details, features, technology components, maintenance procedures, "
#         "warranty information, performance metrics, and technical specifications. "
#         "This covers all AeroFlow-related documentation."
#     ),
# )
#
# # Create a query engine Tool for EcoSprint
# ecosprint_tool = QueryEngineTool.from_defaults(
#     query_engine=ecosprint_query_engine,
#     name="EcoSprint_specifications",
#     description=(
#         "Use this tool for questions about EcoSprint vehicle specifications, including: "
#         "design details, features, technology components, maintenance procedures, "
#         "warranty information, performance metrics, and technical specifications. "
#         "This covers all EcoSprint-related documentation."
#     ),
# )
#
# # Create a Router Agent with improved selector configuration
# print("Creating router with local LLM selector...")
# router_agent = RouterQueryEngine(
#     selector=LLMSingleSelector.from_defaults(
#         llm=Settings.llm  # Explicitly use our local LLM
#     ),
#     query_engine_tools=[
#         aeroflow_tool,
#         ecosprint_tool,
#     ],
#     verbose=True  # Shows which tool is selected for each query
# )
#
# print("Agentic Router setup complete!")
# print(f"Available tools: {[tool.metadata.name for tool in [aeroflow_tool, ecosprint_tool]]}")
#


Setting up Agentic Router...
Creating router with local LLM selector...


ValueError: Model name open_gpt4_8x7b_v0.2 does not support function calling API. 

In [53]:
# Working Router Solutions for Local LLMs
from llama_index.core.tools import QueryEngineTool
from llama_index.core.query_engine.router_query_engine import RouterQueryEngine
from llama_index.core.selectors import LLMSingleSelector
from llama_index.core import Settings

print("Setting up Agentic Router...")

# Create tools (same as before)
aeroflow_tool = QueryEngineTool.from_defaults(
    query_engine=aeroflow_query_engine,
    name="AeroFlow_specifications",
    description=(
        "Use this tool for questions about AeroFlow vehicle specifications, including: "
        "design details, features, technology components, maintenance procedures, "
        "warranty information, performance metrics, and technical specifications. "
        "This covers all AeroFlow-related documentation."
    ),
)

ecosprint_tool = QueryEngineTool.from_defaults(
    query_engine=ecosprint_query_engine,
    name="EcoSprint_specifications",
    description=(
        "Use this tool for questions about EcoSprint vehicle specifications, including: "
        "design details, features, technology components, maintenance procedures, "
        "warranty information, performance metrics, and technical specifications. "
        "This covers all EcoSprint-related documentation."
    ),
)

# ============================================================================
# SOLUTION 1: Better LLMSingleSelector with JSON Retry Logic
# ============================================================================

def create_robust_llm_router():
    """Create router with better error handling for JSON issues"""

    from llama_index.llms.openai_like import OpenAILike

    # Create a more conservative LLM configuration for routing
    routing_llm = OpenAILike(
        model=Settings.llm.model,
        api_base=Settings.llm.api_base,
        api_key=Settings.llm.api_key,
        is_local=True,
        temperature=0.0,  # Most deterministic
        max_tokens=200,   # Short responses for JSON
        request_timeout=30.0
    )

    try:
        router_agent = RouterQueryEngine(
            selector=LLMSingleSelector.from_defaults(
                llm=routing_llm
            ),
            query_engine_tools=[aeroflow_tool, ecosprint_tool],
            verbose=True
        )
        print("✅ LLM Router created successfully!")
        return router_agent

    except Exception as e:
        print(f"❌ LLM Router failed: {e}")
        return None

# ============================================================================
# SOLUTION 2: Simple Keyword-Based Router (Most Reliable)
# ============================================================================

class SimpleSmartRouter:
    """A simple but effective keyword-based router"""

    def __init__(self, aeroflow_engine, ecosprint_engine):
        self.aeroflow_engine = aeroflow_engine
        self.ecosprint_engine = ecosprint_engine
        self.query_count = 0

    def query(self, query_str):
        """Smart routing based on keywords and context"""

        self.query_count += 1
        query_lower = query_str.lower()

        print(f"\n🎯 Router Decision #{self.query_count}")
        print(f"Query: '{query_str}'")

        # Analyze query for routing
        aeroflow_score = 0
        ecosprint_score = 0

        # Check for explicit mentions
        if "aeroflow" in query_lower:
            aeroflow_score += 10
            print("   + AeroFlow explicitly mentioned (+10)")

        if "ecosprint" in query_lower:
            ecosprint_score += 10
            print("   + EcoSprint explicitly mentioned (+10)")

        # Check for contextual keywords
        aeroflow_keywords = ["aero", "flow", "aerodynamic"]
        ecosprint_keywords = ["eco", "sprint", "environment", "green"]

        for keyword in aeroflow_keywords:
            if keyword in query_lower:
                aeroflow_score += 2
                print(f"   + AeroFlow keyword '{keyword}' found (+2)")

        for keyword in ecosprint_keywords:
            if keyword in query_lower:
                ecosprint_score += 2
                print(f"   + EcoSprint keyword '{keyword}' found (+2)")

        # Make decision
        if aeroflow_score > ecosprint_score:
            print(f"   🚁 Routing to AeroFlow (score: {aeroflow_score} vs {ecosprint_score})")
            return self.aeroflow_engine.query(query_str)
        elif ecosprint_score > aeroflow_score:
            print(f"   🏃 Routing to EcoSprint (score: {ecosprint_score} vs {aeroflow_score})")
            return self.ecosprint_engine.query(query_str)
        else:
            # Tie or no specific keywords - alternate or use default logic
            print(f"   🤔 Ambiguous query (both scored {aeroflow_score})")

            # For comparison questions, try AeroFlow first
            comparison_words = ["better", "compare", "vs", "versus", "which", "best"]
            if any(word in query_lower for word in comparison_words):
                print("   📊 Comparison query detected - using AeroFlow for baseline")
                return self.aeroflow_engine.query(query_str)

            # Default to alternating
            if self.query_count % 2 == 1:
                print("   🎲 Defaulting to AeroFlow (odd query number)")
                return self.aeroflow_engine.query(query_str)
            else:
                print("   🎲 Defaulting to EcoSprint (even query number)")
                return self.ecosprint_engine.query(query_str)

def create_simple_router():
    """Create the simple keyword router"""
    if aeroflow_query_engine and ecosprint_query_engine:
        return SimpleSmartRouter(aeroflow_query_engine, ecosprint_query_engine)
    else:
        print("❌ Query engines not available")
        return None

# ============================================================================
# SOLUTION 3: Hybrid Router with Fallback
# ============================================================================

class HybridRouter:
    """Router that tries LLM first, falls back to keywords"""

    def __init__(self, llm_router, keyword_router):
        self.llm_router = llm_router
        self.keyword_router = keyword_router
        self.llm_failures = 0

    def query(self, query_str):
        """Try LLM router first, fallback to keyword router"""

        if self.llm_router and self.llm_failures < 3:
            try:
                print("🧠 Trying LLM router...")
                response = self.llm_router.query(query_str)
                print("✅ LLM router succeeded!")
                return response

            except Exception as e:
                self.llm_failures += 1
                print(f"❌ LLM router failed ({self.llm_failures}/3): {e}")
                print("🔄 Falling back to keyword router...")

        # Use keyword router
        return self.keyword_router.query(query_str)

# ============================================================================
# MAIN SETUP
# ============================================================================

def setup_best_available_router():
    """Setup the best router that works with your setup"""

    print("🚀 Setting up best available router...")

    # Try LLM router first
    llm_router = create_robust_llm_router()

    # Always create keyword router as backup
    keyword_router = create_simple_router()

    if llm_router and keyword_router:
        print("✅ Creating hybrid router (LLM + keyword fallback)")
        return HybridRouter(llm_router, keyword_router)
    elif keyword_router:
        print("✅ Using keyword router (reliable fallback)")
        return keyword_router
    else:
        print("❌ No router could be created")
        return None

# Create the router
print("Creating router with best available method...")
router_agent = setup_best_available_router()

if router_agent:
    print("✅ Router setup complete!")
    print(f"Available tools: AeroFlow_specifications, EcoSprint_specifications")

    # Test the problematic query
    print("\n🧪 Testing the problematic query...")
    try:
        test_query = "Which vehicle has better performance?"
        response = router_agent.query(test_query)
        print(f"\n✅ Success! Response preview:")
        preview = str(response)[:200] + "..." if len(str(response)) > 200 else str(response)
        print(preview)
    except Exception as e:
        print(f"❌ Test failed: {e}")
else:
    print("❌ Router setup failed!")

Setting up Agentic Router...
Creating router with best available method...
🚀 Setting up best available router...
✅ LLM Router created successfully!
✅ Creating hybrid router (LLM + keyword fallback)
✅ Router setup complete!
Available tools: AeroFlow_specifications, EcoSprint_specifications

🧪 Testing the problematic query...
🧠 Trying LLM router...
❌ LLM router failed (1/3): Got invalid JSON object. Error: Expecting ',' delimiter: line 6 column 1 (char 424) while parsing a flow sequence
  in "<unicode string>", line 1, column 1:
    [
    ^
expected ',' or ']', but got '}'
  in "<unicode string>", line 6, column 1:
    }
    ^. Got JSON string: [
    {
        "choice": 2,
        "reason": "The question asks to compare the performance of two vehicles. Since this tool is specifically designed to provide information about EcoSprint vehicle specifications and their performance metrics, it would be more relevant for answering such a question compared to the tool for AeroFlow which does not 

In [54]:
# Helper functions for testing
def test_various_queries():
    """Test the router with various queries"""

    if not router_agent:
        print("❌ No router available")
        return

    test_queries = [
        "What colors are available for AeroFlow?",           # Clear AeroFlow
        "Tell me about EcoSprint battery life",              # Clear EcoSprint
        "Which vehicle has better performance?",             # Ambiguous comparison
        "What are the maintenance requirements?",            # Generic
        "Compare AeroFlow and EcoSprint features",          # Explicit comparison
        "What is the range of the eco-friendly vehicle?",   # Contextual EcoSprint
    ]

    print("\n🧪 TESTING VARIOUS QUERIES")
    print("="*50)

    for i, query in enumerate(test_queries, 1):
        print(f"\n--- Test {i} ---")
        try:
            response = router_agent.query(query)
            print(f"✅ Success! Length: {len(str(response))} chars")
        except Exception as e:
            print(f"❌ Failed: {e}")

print(f"\n💡 To test various queries: test_various_queries()")

test_various_queries()


💡 To test various queries: test_various_queries()

🧪 TESTING VARIOUS QUERIES

--- Test 1 ---
🧠 Trying LLM router...
[1;3;38;5;200mSelecting query engine 0: The question is about AeroFlow vehicle specifications, which are covered in option 1. The specific query is about the available colors, which would likely be found under 'design details' or a similar category..
[0m✅ LLM router succeeded!
✅ Success! Length: 90 chars

--- Test 2 ---
🧠 Trying LLM router...
[1;3;38;5;200mSelecting query engine 1: The question 'Tell me about EcoSprint battery life' is specifically asking for information about the EcoSprint vehicle. According to the given options, this falls under choice 2 which is related to EcoSprint-specific documentation..
[0m✅ LLM router succeeded!
✅ Success! Length: 222 chars

--- Test 3 ---
🧠 Trying LLM router...
❌ LLM router failed (2/3): Got invalid JSON object. Error: Expecting ',' delimiter: line 6 column 1 (char 424) while parsing a flow sequence
  in "<unicode string>", 

In [55]:
# Comprehensive Router Testing
def test_router_intelligence():
    """Test the router with various query types to see how smart it is"""

    print("🧪 COMPREHENSIVE ROUTER INTELLIGENCE TEST")
    print("="*60)

    test_cases = [
        {
            "category": "🎯 Explicit Vehicle Mentions",
            "queries": [
                "What colors are available for AeroFlow?",
                "Tell me about EcoSprint's battery specifications",
                "How do I maintain my AeroFlow vehicle?",
                "What is EcoSprint's top speed?"
            ]
        },
        {
            "category": "🤔 Ambiguous/Comparison Queries",
            "queries": [
                "Which vehicle has better performance?",
                "What are the available color options?",
                "Compare the two electric vehicles",
                "Which one is more environmentally friendly?"
            ]
        },
        {
            "category": "🔍 Contextual Keywords",
            "queries": [
                "Tell me about the eco-friendly features",  # Should favor EcoSprint
                "What about aerodynamic design?",            # Should favor AeroFlow
                "How green is this vehicle?",                # Should favor EcoSprint
                "What about the flow dynamics?"              # Should favor AeroFlow
            ]
        },
        {
            "category": "📊 Technical Specifications",
            "queries": [
                "What is the battery capacity?",
                "How long does charging take?",
                "What safety features are included?",
                "What is the warranty coverage?"
            ]
        }
    ]

    total_tests = 0
    successful_tests = 0

    for category_info in test_cases:
        category = category_info["category"]
        queries = category_info["queries"]

        print(f"\n{category}")
        print("-" * 50)

        for i, query in enumerate(queries, 1):
            total_tests += 1
            print(f"\n{i}. Testing: '{query}'")

            try:
                response = router_agent.query(query)
                successful_tests += 1

                # Analyze the response
                response_str = str(response).lower()
                response_length = len(str(response))

                print(f"   ✅ Success! ({response_length} chars)")

                # Determine which document was likely used
                if "aeroflow" in response_str and "ecosprint" not in response_str:
                    print(f"   🚁 Routed to: AeroFlow (correct detection)")
                elif "ecosprint" in response_str and "aeroflow" not in response_str:
                    print(f"   🏃 Routed to: EcoSprint (correct detection)")
                elif "aeroflow" in response_str and "ecosprint" in response_str:
                    print(f"   🔄 Both mentioned (comparison or general info)")
                else:
                    print(f"   📄 General response")

                # Show preview
                preview = str(response)[:100] + "..." if len(str(response)) > 100 else str(response)
                print(f"   Preview: {preview}")

            except Exception as e:
                print(f"   ❌ Failed: {e}")

    print(f"\n" + "="*60)
    print(f"📊 TEST SUMMARY")
    print(f"="*60)
    print(f"✅ Successful: {successful_tests}/{total_tests} ({successful_tests/total_tests*100:.1f}%)")
    print(f"❌ Failed: {total_tests - successful_tests}/{total_tests}")

    if successful_tests > total_tests * 0.8:
        print(f"🎉 Excellent performance! Router is working very well.")
    elif successful_tests > total_tests * 0.6:
        print(f"👍 Good performance! Router is working adequately.")
    else:
        print(f"⚠️  Router needs improvement.")

def analyze_router_behavior():
    """Analyze how the router makes decisions"""

    print("\n🔍 ROUTER BEHAVIOR ANALYSIS")
    print("="*50)

    print("Current Router Setup:")
    print(f"  Type: {type(router_agent).__name__}")

    if hasattr(router_agent, 'llm_router'):
        print(f"  LLM Router: {'Available' if router_agent.llm_router else 'Not Available'}")
        print(f"  LLM Failures: {router_agent.llm_failures}/3")

    if hasattr(router_agent, 'keyword_router'):
        print(f"  Keyword Router: Available")
        if hasattr(router_agent.keyword_router, 'query_count'):
            print(f"  Queries Processed: {router_agent.keyword_router.query_count}")

def show_routing_tips():
    """Show tips for getting better routing results"""

    print("\n💡 ROUTING OPTIMIZATION TIPS")
    print("="*50)

    tips = [
        ("🎯 Be Specific", "Include 'AeroFlow' or 'EcoSprint' in your queries"),
        ("🔍 Use Keywords", "Words like 'eco', 'green' favor EcoSprint; 'aero', 'flow' favor AeroFlow"),
        ("📊 For Comparisons", "Ask specific comparisons like 'AeroFlow vs EcoSprint battery life'"),
        ("🔄 If Wrong Tool", "Rephrase with explicit vehicle name"),
        ("🧪 Test Edge Cases", "Try ambiguous queries to see router intelligence")
    ]

    for emoji_title, tip in tips:
        print(f"{emoji_title}: {tip}")

def demo_smart_queries():
    """Demonstrate smart query formulation"""

    print("\n🎓 SMART QUERY EXAMPLES")
    print("="*50)

    examples = [
        ("❌ Generic", "What is the battery life?", "🎯 Specific", "What is AeroFlow's battery life?"),
        ("❌ Ambiguous", "Which is better?", "🎯 Clear", "Which has better range, AeroFlow or EcoSprint?"),
        ("❌ Vague", "Tell me about colors", "🎯 Targeted", "What color options does EcoSprint offer?"),
        ("❌ Unclear", "How do I charge it?", "🎯 Explicit", "How do I charge the AeroFlow vehicle?")
    ]

    for bad_label, bad_query, good_label, good_query in examples:
        print(f"{bad_label}: '{bad_query}'")
        print(f"{good_label}: '{good_query}'")
        print()

# Run the comprehensive test
test_router_intelligence()

# Show additional analysis
analyze_router_behavior()
show_routing_tips()
demo_smart_queries()

print(f"\n🚀 Your router is working! The hybrid approach gives you:")
print(f"  ✅ Reliable routing (keyword fallback)")
print(f"  ✅ Smart decisions (comparison detection)")
print(f"  ✅ Clear feedback (shows decision process)")
print(f"  ✅ Automatic fallback (when LLM fails)")

🧪 COMPREHENSIVE ROUTER INTELLIGENCE TEST

🎯 Explicit Vehicle Mentions
--------------------------------------------------

1. Testing: 'What colors are available for AeroFlow?'
🧠 Trying LLM router...
[1;3;38;5;200mSelecting query engine 0: The question is about AeroFlow vehicle specifications, which are covered in option 1. The specific query is about the available colors, which would likely be found under 'design details' or a similar category..
[0m✅ LLM router succeeded!
   ✅ Success! (90 chars)
   🚁 Routed to: AeroFlow (correct detection)
   Preview:  The AeroFlow is available in colors such as Coastal Blue, Sunset Orange, and Pearl White.

2. Testing: 'Tell me about EcoSprint's battery specifications'
🧠 Trying LLM router...
[1;3;38;5;200mSelecting query engine 1: The question 'Tell me about EcoSprint's battery specifications' is asking for information specifically related to the EcoSprint vehicle. Therefore, choice 2, which is about EcoSprint vehicle specifications including tech

In [56]:
def test_router_comprehensive():
    """Comprehensive test function with better test queries"""

    print("\n" + "="*70)
    print("🧪 COMPREHENSIVE ROUTER TESTING")
    print("="*70)

    # Test cases with clear expected routing
    test_cases = [
        {
            "query": "What are the key features of AeroFlow?",
            "expected_tool": "AeroFlow_specifications",
            "description": "Should route to AeroFlow tool"
        },
        {
            "query": "Tell me about EcoSprint's battery specifications",
            "expected_tool": "EcoSprint_specifications",
            "description": "Should route to EcoSprint tool"
        },
        {
            "query": "What is the warranty coverage for AeroFlow vehicles?",
            "expected_tool": "AeroFlow_specifications",
            "description": "AeroFlow warranty question"
        },
        {
            "query": "How do I maintain an EcoSprint?",
            "expected_tool": "EcoSprint_specifications",
            "description": "EcoSprint maintenance question"
        },
        {
            "query": "Compare the design features of both vehicles",
            "expected_tool": "Either (ambiguous)",
            "description": "Ambiguous query - could go to either"
        }
    ]

    results = []

    for i, test_case in enumerate(test_cases, 1):
        print(f"\n--- Test {i}: {test_case['description']} ---")
        print(f"Query: '{test_case['query']}'")
        print(f"Expected routing: {test_case['expected_tool']}")

        try:
            # Capture the response
            response = router_agent.query(test_case['query'])

            # Try to determine which tool was actually used
            # (This is tricky since we need to parse the verbose output)
            print(f"Response preview: {str(response)[:200]}...")

            results.append({
                "test": i,
                "query": test_case['query'],
                "success": True,
                "response": str(response)
            })

        except Exception as e:
            print(f"❌ Test failed: {e}")
            results.append({
                "test": i,
                "query": test_case['query'],
                "success": False,
                "error": str(e)
            })

    # Summary
    print(f"\n" + "="*50)
    print("📊 TEST SUMMARY")
    print("="*50)

    successful_tests = [r for r in results if r["success"]]
    print(f"✅ Successful tests: {len(successful_tests)}/{len(results)}")

    if len(successful_tests) < len(results):
        failed_tests = [r for r in results if not r["success"]]
        print(f"❌ Failed tests: {len(failed_tests)}")
        for test in failed_tests:
            print(f"   - Test {test['test']}: {test['error']}")

    return results

def test_simple_routing():
    """Simple test to verify basic routing functionality"""

    print("\n" + "="*50)
    print("🔧 SIMPLE ROUTING TEST")
    print("="*50)

    # Simple, clear test queries
    simple_tests = [
        ("What are the key features of AeroFlow?", "Should route to AeroFlow"),
        ("What are the specifications of EcoSprint?", "Should route to EcoSprint"),
        ("Tell me about AeroFlow's battery", "Should route to AeroFlow"),
        ("How do I maintain EcoSprint?", "Should route to EcoSprint")
    ]

    for query, expected in simple_tests:
        print(f"\n--- Testing: '{query}' ---")
        print(f"Expected: {expected}")

        try:
            print("Router is thinking...")
            response = router_agent.query(query)

            print(f"✅ Got response! Length: {len(str(response))} characters")

            # Show first 200 characters of response
            preview = str(response)[:200] + "..." if len(str(response)) > 200 else str(response)
            print(f"Response preview: {preview}")

            # Simple check if response seems relevant
            response_lower = str(response).lower()
            if "aeroflow" in query.lower() and "aeroflow" in response_lower:
                print("✅ Response mentions AeroFlow - likely correct routing")
            elif "ecosprint" in query.lower() and "ecosprint" in response_lower:
                print("✅ Response mentions EcoSprint - likely correct routing")
            elif len(str(response)) > 50:
                print("ℹ️  Got substantial response - router seems to be working")
            else:
                print("⚠️  Short response - might need investigation")

        except Exception as e:
            print(f"❌ Error: {e}")

        print("-" * 40)

def analyze_router_behavior():
    """Analyze what the router is actually doing"""

    print("\n" + "="*50)
    print("🔍 ROUTER BEHAVIOR ANALYSIS")
    print("="*50)

    print(f"Router type: {type(router_agent).__name__}")

    # Try to access selector safely
    try:
        if hasattr(router_agent, '_selector'):
            print(f"Selector type: {type(router_agent._selector).__name__}")
        elif hasattr(router_agent, 'selector'):
            print(f"Selector type: {type(router_agent.selector).__name__}")
        else:
            print("Selector type: Not directly accessible")
    except:
        print("Selector type: Could not determine")

    # Check tools
    try:
        if hasattr(router_agent, 'query_engine_tools'):
            print(f"Number of tools: {len(router_agent.query_engine_tools)}")

            print("\nTool details:")
            for i, tool in enumerate(router_agent.query_engine_tools):
                print(f"  {i}: {tool.metadata.name}")
                print(f"     Description: {tool.metadata.description[:100]}...")
        else:
            print("Tools: Not directly accessible")
    except Exception as e:
        print(f"Error accessing tools: {e}")

    # Show router attributes
    print(f"\nRouter attributes: {[attr for attr in dir(router_agent) if not attr.startswith('_')]}")

# Choose which test to run
def run_router_tests(test_type="simple"):
    """
    Run router tests

    Args:
        test_type: "simple", "comprehensive", or "analyze"
    """

    if test_type == "simple":
        test_simple_routing()
    elif test_type == "comprehensive":
        test_router_comprehensive()
    elif test_type == "analyze":
        analyze_router_behavior()
    else:
        print("Available test types: 'simple', 'comprehensive', 'analyze'")


In [57]:
# Run basic analysis first
analyze_router_behavior()

print("\n💡 To test the router, run:")
print("   run_router_tests('simple')      # Basic functionality test")
print("   run_router_tests('comprehensive') # Detailed routing test")
print("   run_router_tests('analyze')     # Router configuration analysis")


🔍 ROUTER BEHAVIOR ANALYSIS
Router type: HybridRouter
Selector type: Not directly accessible
Tools: Not directly accessible

Router attributes: ['keyword_router', 'llm_failures', 'llm_router', 'query']

💡 To test the router, run:
   run_router_tests('simple')      # Basic functionality test
   run_router_tests('comprehensive') # Detailed routing test
   run_router_tests('analyze')     # Router configuration analysis


In [58]:
run_router_tests('comprehensive')


🧪 COMPREHENSIVE ROUTER TESTING

--- Test 1: Should route to AeroFlow tool ---
Query: 'What are the key features of AeroFlow?'
Expected routing: AeroFlow_specifications

🎯 Router Decision #16
Query: 'What are the key features of AeroFlow?'
   + AeroFlow explicitly mentioned (+10)
   + AeroFlow keyword 'aero' found (+2)
   + AeroFlow keyword 'flow' found (+2)
   🚁 Routing to AeroFlow (score: 14 vs 0)
Response preview: 
The key features of AeroFlow include a sleek, modern exterior design with a spacious interior layout, available in colors like Coastal Blue, Sunset Orange, and Pearl White. The interior boasts a vers...

--- Test 2: Should route to EcoSprint tool ---
Query: 'Tell me about EcoSprint's battery specifications'
Expected routing: EcoSprint_specifications

🎯 Router Decision #17
Query: 'Tell me about EcoSprint's battery specifications'
   + EcoSprint explicitly mentioned (+10)
   + EcoSprint keyword 'eco' found (+2)
   + EcoSprint keyword 'sprint' found (+2)
   🏃 Routing to EcoS

### 03.05. Route with Agentic AI

In [19]:
#Ask a question about NoSQL
response = router_agent.query("What colors are available for AeroFlow?")
print("\nResponse: ",str(response))

[1;3;38;5;200mSelecting query engine 0: The question is about AeroFlow vehicle specifications, which are covered in option 1. The specific query is about the available colors, which would likely be found under 'design details' or a similar category..
[0m
Response:   The AeroFlow is available in colors such as Coastal Blue, Sunset Orange, and Pearl White.


In [20]:
response = router_agent.query("What colors are available for EcoSprint?")
print("\nResponse: ",str(response))

[1;3;38;5;200mSelecting query engine 1: The question 'What colors are available for EcoSprint?' is asking about specifications related to the EcoSprint vehicle. According to the given options, using this tool for questions about EcoSprint vehicle specifications, including design details, features, technology components, maintenance procedures, warranty information, performance metrics, and technical specifications (option 2) is most relevant..
[0m
Response:   The EcoSprint is available in Midnight Black, Ocean Blue, and Pearl White.


In [21]:
# Inspect Indexes and Vector Stores

In [23]:
run_full_inspection()

🔍 VECTOR INDEX INSPECTION REPORT
Generated at: 2025-06-18 23:12:06

📂 INDEX STORAGE LOCATIONS
----------------------------------------
Current working directory: /Users/jarotball/Setups/agentic-ai-for-developers-concepts-and-applications-for-enterprises-3913172
No storage found at: ./storage
No storage found at: ./index_storage
No storage found at: ./vector_store

💡 Note: By default, VectorStoreIndex stores data in memory.
   To persist indexes, you'd need to use index.storage_context.persist()

🚁 AEROFLOW INDEX ANALYSIS

📊 AeroFlow INDEX OVERVIEW
------------------------------
Total nodes: 2
Vector store type: SimpleVectorStore
Vector store client: NoneType
Stored vectors: 2

📝 AeroFlow NODE CONTENT ANALYSIS
-----------------------------------
Average chunk size: 1239 characters
Min chunk size: 262 characters
Max chunk size: 2216 characters

📄 SAMPLE CHUNKS FROM AeroFlow
------------------------------

--- Chunk 1 ---
Node ID: 6bb81dce-7322-4d1e-98e3-35e6b0006e23
Length: 2216 characte

# Demonstarate the usage of the router in a real-world scenario

In [43]:
# Understanding Router Decision Process
"""
How the Router Agent Decides Which Tool to Choose
================================================

The LLMSingleSelector follows this process:

1. RECEIVES USER QUERY
   └── "What colors are available for AeroFlow?"

2. ANALYZES AVAILABLE TOOLS
   ├── Tool 0: AeroFlow_specifications
   │   └── Description: "Use this tool for questions about AeroFlow vehicle
   │       specifications, including: design details, features, technology
   │       components, maintenance procedures, warranty information,
   │       performance metrics, and technical specifications."
   │
   └── Tool 1: EcoSprint_specifications
       └── Description: "Use this tool for questions about EcoSprint vehicle
           specifications, including: design details, features, technology
           components, maintenance procedures, warranty information,
           performance metrics, and technical specifications."

3. LLM REASONING PROCESS
   The LLM (DeepSeek Coder) analyzes:
   ├── Query contains "AeroFlow" → Strong match with Tool 0
   ├── Query about "colors" → Likely in design/features section
   ├── Tool 0 description mentions "design details, features"
   └── Tool 1 is about EcoSprint, not AeroFlow

4. DECISION
   └── Selects Tool 0 (AeroFlow_specifications)

5. EXECUTION
   └── Queries the AeroFlow vector index for color information
"""

def demonstrate_router_decision_process():
    """
    Demonstrate and visualize the router decision process
    """

    print("🧠 ROUTER DECISION PROCESS DEMONSTRATION")
    print("="*60)

    # Example queries and expected routing
    test_cases = [
        {
            "query": "What colors are available for AeroFlow?",
            "key_words": ["AeroFlow", "colors"],
            "expected_tool": "AeroFlow_specifications",
            "reasoning": "Query mentions 'AeroFlow' specifically, and colors are part of design features"
        },
        {
            "query": "Tell me about EcoSprint battery life",
            "key_words": ["EcoSprint", "battery"],
            "expected_tool": "EcoSprint_specifications",
            "reasoning": "Query mentions 'EcoSprint' specifically, battery is a technical specification"
        },
        {
            "query": "Which vehicle has better performance?",
            "key_words": ["vehicle", "performance"],
            "expected_tool": "Either (ambiguous)",
            "reasoning": "Doesn't mention specific vehicle, LLM must choose based on context"
        },
        {
            "query": "How do I maintain my electric vehicle?",
            "key_words": ["maintain", "electric vehicle"],
            "expected_tool": "Either (ambiguous)",
            "reasoning": "Generic question, could apply to both vehicles"
        }
    ]

    print("📋 ROUTING ANALYSIS:")
    print("-" * 40)

    for i, case in enumerate(test_cases, 1):
        print(f"\n{i}. Query: '{case['query']}'")
        print(f"   Key words: {case['key_words']}")
        print(f"   Expected routing: {case['expected_tool']}")
        print(f"   Reasoning: {case['reasoning']}")

def show_actual_router_prompt():
    """
    Show what prompt the LLM actually receives for routing decisions
    """

    print("\n🔍 ACTUAL LLM PROMPT FOR ROUTING")
    print("="*50)

    # This is approximately what the LLM sees
    sample_prompt = """
You are a query router. Given a user query and a list of tools, select the most appropriate tool.

Query: "What colors are available for AeroFlow?"

Available Tools:
0. AeroFlow_specifications
   Description: Use this tool for questions about AeroFlow vehicle specifications, including: design details, features, technology components, maintenance procedures, warranty information, performance metrics, and technical specifications. This covers all AeroFlow-related documentation.

1. EcoSprint_specifications
   Description: Use this tool for questions about EcoSprint vehicle specifications, including: design details, features, technology components, maintenance procedures, warranty information, performance metrics, and technical specifications. This covers all EcoSprint-related documentation.

Select the most appropriate tool and explain your reasoning.
"""

    print(sample_prompt)

    print("\n💭 LLM REASONING OUTPUT:")
    print("-" * 30)
    sample_reasoning = """
    The query asks about "colors available for AeroFlow", which specifically mentions AeroFlow.

    Tool 0 (AeroFlow_specifications) is designed for AeroFlow-related questions and includes
    "design details" and "features" in its description. Colors would fall under design details.

    Tool 1 (EcoSprint_specifications) is for EcoSprint-related questions, which doesn't match
    this query about AeroFlow.

    Therefore, I select Tool 0.
    """

    print(sample_reasoning)

def test_router_with_explanation(query):
    """
    Test the router and show the decision process
    """

    print(f"\n🧪 TESTING ROUTER DECISION")
    print("="*50)
    print(f"Query: '{query}'")
    print()

    # Capture the verbose output to see reasoning
    import io
    import sys
    from contextlib import redirect_stdout

    print("🤔 Router is thinking...")

    # The verbose=True in your router will show the reasoning
    # Let's capture and display it more clearly

    f = io.StringIO()
    try:
        with redirect_stdout(f):
            response = router_agent.query(query)

        # Get the captured output
        verbose_output = f.getvalue()

        print("💭 LLM Reasoning Process:")
        print("-" * 30)

        # Parse and display the reasoning
        lines = verbose_output.split('\n')
        for line in lines:
            if line.strip():
                if "Selecting query engine" in line:
                    print(f"🎯 {line}")
                else:
                    print(f"   {line}")

        print(f"\n✅ Final Response:")
        print(f"   Length: {len(str(response))} characters")
        preview = str(response)[:200] + "..." if len(str(response)) > 200 else str(response)
        print(f"   Preview: {preview}")

        return response

    except Exception as e:
        print(f"❌ Error during routing: {e}")
        return None

# Key factors that influence routing decisions
def explain_routing_factors():
    """
    Explain what factors influence the router's decision
    """

    print("\n🎯 KEY FACTORS IN ROUTING DECISIONS")
    print("="*50)

    factors = [
        ("Exact Name Match", "Query contains 'AeroFlow' or 'EcoSprint'", "🔥 Very High"),
        ("Keyword Relevance", "Technical terms match tool descriptions", "🔴 High"),
        ("Context Clues", "Related terms suggest specific domain", "🟡 Medium"),
        ("Tool Descriptions", "How well query aligns with tool purpose", "🟡 Medium"),
        ("LLM Training", "Model's understanding of relationships", "🔵 Background")
    ]

    print(f"{'Factor':<20} {'Description':<40} {'Influence':<15}")
    print("-" * 75)

    for factor, description, influence in factors:
        print(f"{factor:<20} {description:<40} {influence:<15}")

    print(f"\n💡 Pro Tips for Better Routing:")
    print(f"• Use specific vehicle names in your queries")
    print(f"• Include relevant technical terms")
    print(f"• Be specific rather than generic")
    print(f"• Test edge cases to understand behavior")

# Run demonstrations
if __name__ == "__main__":
    demonstrate_router_decision_process()
    show_actual_router_prompt()
    explain_routing_factors()

    print(f"\n🚀 To test with your actual router:")
    print(f'   test_router_with_explanation("What colors are available for AeroFlow?")')

🧠 ROUTER DECISION PROCESS DEMONSTRATION
📋 ROUTING ANALYSIS:
----------------------------------------

1. Query: 'What colors are available for AeroFlow?'
   Key words: ['AeroFlow', 'colors']
   Expected routing: AeroFlow_specifications
   Reasoning: Query mentions 'AeroFlow' specifically, and colors are part of design features

2. Query: 'Tell me about EcoSprint battery life'
   Key words: ['EcoSprint', 'battery']
   Expected routing: EcoSprint_specifications
   Reasoning: Query mentions 'EcoSprint' specifically, battery is a technical specification

3. Query: 'Which vehicle has better performance?'
   Key words: ['vehicle', 'performance']
   Expected routing: Either (ambiguous)
   Reasoning: Doesn't mention specific vehicle, LLM must choose based on context

4. Query: 'How do I maintain my electric vehicle?'
   Key words: ['maintain', 'electric vehicle']
   Expected routing: Either (ambiguous)
   Reasoning: Generic question, could apply to both vehicles

🔍 ACTUAL LLM PROMPT FOR ROUTIN

In [49]:
test_router_with_explanation("What colors are available for AeroFlow?")


🧪 TESTING ROUTER DECISION
Query: 'What colors are available for AeroFlow?'

🤔 Router is thinking...
💭 LLM Reasoning Process:
------------------------------
🎯 [1;3;38;5;200mSelecting query engine 0: The question asks about AeroFlow vehicle specifications, which is covered in option 1. The color availability falls under the 'design details' or 'features' category of a vehicle specification..
   [0m

✅ Final Response:
   Length: 90 characters
   Preview:  The AeroFlow is available in colors such as Coastal Blue, Sunset Orange, and Pearl White.


Response(response=' The AeroFlow is available in colors such as Coastal Blue, Sunset Orange, and Pearl White.', source_nodes=[NodeWithScore(node=TextNode(id_='96adfe61-c901-4161-aa79-9372d5c4a8bf', embedding=None, metadata={'page_label': '1', 'file_name': 'AeroFlow_Specification_Document.pdf', 'file_path': 'AeroFlow_Specification_Document.pdf', 'file_type': 'application/pdf', 'file_size': 36333, 'creation_date': '2025-06-19', 'last_modified_date': '2025-06-19'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='8f494123-5c9a-421f-84a4-9ea486e54dbc', node_type='4', metadata={'page_label': '1', 'file_name': 'AeroFlow_Specification_Document.pdf', 'file_path': 'AeroFlow_Specification_Document.pdf', 'file_type': '