# Vector Tool Integration Demo

This notebook demonstrates how to integrate the vector retrieval tool with agents for document-aware responses.

## 🎯 What We'll Demonstrate

1. **Vector Retrieval Tool Setup** - Connect to Supabase and retrieve document vectors
2. **Document-Aware Agent Creation** - Build agents that use vector context
3. **Practical Examples** - Healthcare scenarios with real document context
4. **Performance Patterns** - Efficient vector search and content extraction


In [1]:
# Setup and imports
import os
import sys
import json
import logging
from pathlib import Path
from typing import List, Dict, Any, Optional
from pydantic import BaseModel, Field

# Add project root to path
project_root = Path('.').resolve().parent.parent
sys.path.insert(0, str(project_root))

# Import our utilities and vector tool
from langgraph_utils import create_agent, ExampleAgentOutput
from agents.common.vector_retrieval_tool import (
    VectorRetrievalTool, 
    VectorFilter, 
    VectorResult,
    get_document_vectors,
    get_document_text
)

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

print("🔧 Vector Tool Integration Demo Setup Complete")


🔧 Using local database: postgresql://aq_home@[host]
🔧 Vector Tool Integration Demo Setup Complete


## 1. 🔍 Vector Tool Basic Usage


In [2]:
# 1.1 Initialize vector tool with Supabase connection
print("=== 🔍 Vector Tool Basic Usage ===")

# Create vector tool instance with Supabase connection
vector_tool = VectorRetrievalTool(force_supabase=True)
print("✅ Vector tool initialized with Supabase connection")

# Test data from our successful Supabase test
test_user_id = "d64bfbbe-ff7f-4b51-b220-a0fa20756d9d"
test_document_id = "d64bfbbe-ff7f-4b51-b220-a0fa20756d9d"

print(f"📋 Using test data:")
print(f"   User ID: {test_user_id}")
print(f"   Document ID: {test_document_id}")


=== 🔍 Vector Tool Basic Usage ===
✅ Vector tool initialized with Supabase connection
📋 Using test data:
   User ID: d64bfbbe-ff7f-4b51-b220-a0fa20756d9d
   Document ID: d64bfbbe-ff7f-4b51-b220-a0fa20756d9d


In [4]:
# 1.2 Retrieve vectors using the tool
print("\n🔍 Retrieving document vectors from Supabase...")

try:
    # Create filter for user documents
    filter_criteria = VectorFilter(
        user_id=test_user_id,
        document_id=test_document_id
    )
    
    # Retrieve vectors
    vectors = await vector_tool.get_vectors_by_filter(filter_criteria)
    
    print(f"✅ Retrieved {len(vectors)} vector chunks")
    print(f"📊 Vector dimensions: {len(vectors[0].content_embedding) if vectors else 'N/A'}")
    
    # Show first chunk info
    if vectors:
        first_chunk = vectors[0]
        print(f"📄 First chunk preview:")
        print(f"   Chunk index: {first_chunk.chunk_index}")
        print(f"   Content: {first_chunk.chunk_text[:100] if first_chunk.chunk_text else '[ENCRYPTED]'}...")
    
except Exception as e:
    print(f"❌ Vector retrieval failed: {str(e)}")
    vectors = []  # Set empty for mock mode


2025-06-23 18:42:56,570 - INFO - Executing query: 
                SELECT 
                    id,
                    encrypted_chunk_text,
                    encrypted_chunk_metadata,
                    content_embedding,
                    chunk_index,
                    document_source_type,
                    user_id,
                    document_record_id,
                    regulatory_document_id,
                    encryption_key_id
                FROM document_vectors
                WHERE is_active = $1 AND user_id = $2 AND document_id = $3
                ORDER BY chunk_index ASC
                LIMIT $4
            
2025-06-23 18:42:56,571 - INFO - With parameters: [True, UUID('d64bfbbe-ff7f-4b51-b220-a0fa20756d9d'), UUID('d64bfbbe-ff7f-4b51-b220-a0fa20756d9d'), 1000]



🔍 Retrieving document vectors from Supabase...


2025-06-23 18:42:57,904 - INFO - Retrieved 187 vector results


✅ Retrieved 187 vector chunks
📊 Vector dimensions: 19232
📄 First chunk preview:
   Chunk index: 0
   Content: [Encrypted content]...


## 2. 🤖 Document-Aware Agent Creation


In [5]:
# 2.1 Define schema for document-aware responses
print("=== 🤖 Creating Document-Aware Agent ===")

class DocumentAwareResponse(BaseModel):
    """Schema for document-aware agent responses"""
    response: str = Field(description="Main response using document context")
    document_citations: List[str] = Field(description="Document sections referenced")
    confidence: float = Field(description="Confidence score", ge=0.0, le=1.0)
    context_quality: str = Field(description="Quality of document context: excellent, good, limited, none")
    suggestions: List[str] = Field(description="Additional suggestions based on documents")
    metadata: Dict[str, Any] = Field(default_factory=dict, description="Response metadata")

print("✅ Document-aware schema defined")


=== 🤖 Creating Document-Aware Agent ===
✅ Document-aware schema defined


In [8]:
# 2.2 Create document-aware agent function
def create_document_aware_agent(user_id: str, document_id: Optional[str] = None):
    """Create an agent that uses document context for responses"""
    
    async def document_aware_agent(user_input: str) -> DocumentAwareResponse:
        """Agent that retrieves document context and provides informed responses"""
        
        # Step 1: Retrieve document context
        document_context = "No documents available for context."
        context_quality = "none"
        citations = []
        
        try:
            # Create filter for user's documents
            filter_criteria = VectorFilter(user_id=user_id)
            if document_id:
                filter_criteria.document_id = document_id
            
                                      # Retrieve relevant vectors
            vectors = await vector_tool.get_vectors_by_filter(filter_criteria)
            
            if vectors:
                # Extract text content
                document_text = vector_tool.get_text_content(vectors)
                document_summary = vector_tool.get_metadata_summary(vectors)
                
                context_quality = "excellent" if len(vectors) > 50 else "good" if len(vectors) > 10 else "limited"
                citations = [f"Document section {v.chunk_index}" for v in vectors[:3]]
            
        except Exception as e:
            logger.error(f"Failed to retrieve document context: {e}")
            context_quality = "none"
        
        # Step 2: Generate response with context (mock for demo)
        if context_quality != "none":
            response_text = f"Based on your uploaded documents, I can provide specific guidance for: {user_input}. Your policy includes relevant coverage options that I can reference."
            suggestions = [
                "Review your specific policy benefits section",
                "Check provider network details in your documents",
                "Consider the copay structure outlined in your plan"
            ]
            confidence = 0.9
        else:
            response_text = f"I can provide general guidance for: {user_input}. For personalized advice, please upload your insurance documents."
            suggestions = [
                "Upload your insurance policy documents",
                "Contact your insurance provider directly",
                "Review general healthcare options"
            ]
            confidence = 0.6
        
        return DocumentAwareResponse(
            response=response_text,
            document_citations=citations,
            confidence=confidence,
            context_quality=context_quality,
            suggestions=suggestions,
            metadata={
                "user_id": user_id,
                "document_id": document_id,
                "vectors_found": len(vectors) if 'vectors' in locals() else 0,
                "query_type": "document_aware"
            }
        )
    
    return document_aware_agent

print("✅ Document-aware agent factory created")


✅ Document-aware agent factory created


## 3. 🧪 Testing Document-Aware Responses


In [9]:
# 3.1 Test with user who has documents
print("=== 🧪 Testing Document-Aware Agent ===")

# Create agent for user with documents
agent_with_docs = create_document_aware_agent(
    user_id=test_user_id, 
    document_id=test_document_id
)

# Test queries
test_queries = [
    "I need to find a cardiologist. What does my insurance cover?",
    "What are my prescription drug benefits?",
    "Do I need a referral to see a specialist?"
]

print(f"\n🔍 Testing with user who has documents:")
for i, query in enumerate(test_queries, 1):
    print(f"\n--- Test {i}: {query} ---")
    
    try:
        result = await agent_with_docs(query)
        
        print(f"📝 Response: {result.response}")
        print(f"📚 Citations: {result.document_citations}")
        print(f"🎯 Confidence: {result.confidence}")
        print(f"📊 Context Quality: {result.context_quality}")
        print(f"💡 Suggestions: {result.suggestions[0] if result.suggestions else 'None'}")
        
    except Exception as e:
        print(f"❌ Test failed: {str(e)}")


2025-06-23 18:43:35,920 - INFO - Executing query: 
                SELECT 
                    id,
                    encrypted_chunk_text,
                    encrypted_chunk_metadata,
                    content_embedding,
                    chunk_index,
                    document_source_type,
                    user_id,
                    document_record_id,
                    regulatory_document_id,
                    encryption_key_id
                FROM document_vectors
                WHERE is_active = $1 AND user_id = $2 AND document_id = $3
                ORDER BY chunk_index ASC
                LIMIT $4
            
2025-06-23 18:43:35,920 - INFO - With parameters: [True, UUID('d64bfbbe-ff7f-4b51-b220-a0fa20756d9d'), UUID('d64bfbbe-ff7f-4b51-b220-a0fa20756d9d'), 1000]


=== 🧪 Testing Document-Aware Agent ===

🔍 Testing with user who has documents:

--- Test 1: I need to find a cardiologist. What does my insurance cover? ---


2025-06-23 18:43:36,980 - INFO - Retrieved 187 vector results
2025-06-23 18:43:36,993 - INFO - Executing query: 
                SELECT 
                    id,
                    encrypted_chunk_text,
                    encrypted_chunk_metadata,
                    content_embedding,
                    chunk_index,
                    document_source_type,
                    user_id,
                    document_record_id,
                    regulatory_document_id,
                    encryption_key_id
                FROM document_vectors
                WHERE is_active = $1 AND user_id = $2 AND document_id = $3
                ORDER BY chunk_index ASC
                LIMIT $4
            
2025-06-23 18:43:36,994 - INFO - With parameters: [True, UUID('d64bfbbe-ff7f-4b51-b220-a0fa20756d9d'), UUID('d64bfbbe-ff7f-4b51-b220-a0fa20756d9d'), 1000]


📝 Response: Based on your uploaded documents, I can provide specific guidance for: I need to find a cardiologist. What does my insurance cover?. Your policy includes relevant coverage options that I can reference.
📚 Citations: ['Document section 0', 'Document section 0', 'Document section 0']
🎯 Confidence: 0.9
📊 Context Quality: excellent
💡 Suggestions: Review your specific policy benefits section

--- Test 2: What are my prescription drug benefits? ---


2025-06-23 18:43:38,016 - INFO - Retrieved 187 vector results
2025-06-23 18:43:38,027 - INFO - Executing query: 
                SELECT 
                    id,
                    encrypted_chunk_text,
                    encrypted_chunk_metadata,
                    content_embedding,
                    chunk_index,
                    document_source_type,
                    user_id,
                    document_record_id,
                    regulatory_document_id,
                    encryption_key_id
                FROM document_vectors
                WHERE is_active = $1 AND user_id = $2 AND document_id = $3
                ORDER BY chunk_index ASC
                LIMIT $4
            
2025-06-23 18:43:38,027 - INFO - With parameters: [True, UUID('d64bfbbe-ff7f-4b51-b220-a0fa20756d9d'), UUID('d64bfbbe-ff7f-4b51-b220-a0fa20756d9d'), 1000]


📝 Response: Based on your uploaded documents, I can provide specific guidance for: What are my prescription drug benefits?. Your policy includes relevant coverage options that I can reference.
📚 Citations: ['Document section 0', 'Document section 0', 'Document section 0']
🎯 Confidence: 0.9
📊 Context Quality: excellent
💡 Suggestions: Review your specific policy benefits section

--- Test 3: Do I need a referral to see a specialist? ---


2025-06-23 18:43:38,388 - INFO - Retrieved 187 vector results


📝 Response: Based on your uploaded documents, I can provide specific guidance for: Do I need a referral to see a specialist?. Your policy includes relevant coverage options that I can reference.
📚 Citations: ['Document section 0', 'Document section 0', 'Document section 0']
🎯 Confidence: 0.9
📊 Context Quality: excellent
💡 Suggestions: Review your specific policy benefits section


In [None]:
# 3.2 Test with user who has no documents
print(f"\n🔍 Testing with user who has no documents:")

# Create agent for user without documents
agent_no_docs = create_document_aware_agent(user_id="no-documents-user-id")

# Test same query
test_query = "I need to find a cardiologist. What does my insurance cover?"
print(f"\n--- Query: {test_query} ---")

try:
    result = await agent_no_docs(test_query)
    
    print(f"📝 Response: {result.response}")
    print(f"📚 Citations: {result.document_citations}")
    print(f"🎯 Confidence: {result.confidence}")
    print(f"📊 Context Quality: {result.context_quality}")
    print(f"💡 Suggestions: {result.suggestions}")
    
except Exception as e:
    print(f"❌ Test failed: {str(e)}")
