# Test Retrieval

In [4]:
import sys
import os
import json
from pprint import pprint
sys.path.append('..')

from langchain_core.messages import HumanMessage
from app.core.config import settings
from app.services.document_service import document_service
from app.core.logging import get_logger
from app.services.rag_service_agentic import RAGServiceAgentic

In [5]:
rag_service_agentic = RAGServiceAgentic()

query = "What are the best practices for building a recursive function?"
thread_id = "002"

# retrieve content from pinecone
initial_state = {
    "messages": [HumanMessage(content=query)],
    "user_id": "5afbada6-cd83-4b4d-85cf-de9537c2d89e"
}

config = {"configurable": {"thread_id": thread_id}}

result = await rag_service_agentic.graph.ainvoke(initial_state, config=config)

[2m2025-06-10 21:06:33[0m [[32m[1minfo     [0m] [1mInitialized Agentic RAG Service[0m
[2m2025-06-10 21:06:35[0m [[32m[1minfo     [0m] [1mRetrieved documents           [0m [36mnum_docs[0m=[35m4[0m [36muser_id[0m=[35m5afbada6-cd83-4b4d-85cf-de9537c2d89e[0m


In [31]:
user_id = "5afbada6-cd83-4b4d-85cf-de9537c2d89e"
user_vector_store = document_service._get_vector_store(user_id)

retrieved_docs = user_vector_store.similarity_search(query, k=4)

# get all attributes of a document
# print(dir(retrieved_docs[0]))

# id, metadata, page_content
# default to page_content
# get id or metadata upon request 
# print(retrieved_docs[0])

print("\n", "_"*50, "\n")

# Pretty print the retrieved documents
for i, doc in enumerate(retrieved_docs):
    print(f"\n=== Document {i+1} ===")
    print(f"Doc ID: {doc.id}")
    print(f"ID: {doc.metadata.get('id', 'N/A')}")
    print(f"Source: {doc.metadata.get('source', 'N/A')}")
    print(f"Page: {doc.metadata.get('page', 'N/A')}")
    print("\nContent:")
    print(doc.page_content)
    print("\n" + "="*50)

{'creationdate': '2020-05-12T09:19:17+03:00', 'creator': 'PyPDF', 'moddate': '2020-05-12T09:24:22+03:00', 'page': 440.0, 'page_label': '417', 'producer': 'PyPDF', 'source': '/tmp/tmpm4y371jw.pdf', 'start_index': 848.0, 'total_pages': 469.0}

 __________________________________________________ 


=== Document 1 ===
Doc ID: Brett Slatkin - Effective Python_ 90 Specific Ways to Write Better Python-Addison-Wesley Professional (2019).pdf_1002
ID: N/A
Source: /tmp/tmpm4y371jw.pdf
Page: 440.0

Content:
Behaviors in TestCase Subclasses” for how to avoid that). However, 
these downsides are often better than the alternative of restructuring 
your entire program.
Things to Remember
✦ Circular dependencies happen when two modules must call into 
each other at import time. They can cause your program to crash at 
startup.
✦ The best way to break a circular dependency is by refactoring 
mutual dependencies into a separate module at the bottom of the 
dependency tree.
✦ Dynamic imports are the simpl

In [27]:
# Display documents in key: value format for easy reference
for i, doc in enumerate(retrieved_docs):
    print(f"\n{'='*60}")
    print(f"DOCUMENT {i+1}")
    print(f"{'='*60}")
    
    # Document-level attributes
    print(f"id: {doc.id}")
    print(f"type: {type(doc).__name__}")
    
    # All metadata keys and values
    for key, value in doc.metadata.items():
        print(f"{key}: {value}")
    
    # Page content
    print(f"page_content: {doc.page_content}")
    print(f"{'='*60}\n")


DOCUMENT 1
id: Brett Slatkin - Effective Python_ 90 Specific Ways to Write Better Python-Addison-Wesley Professional (2019).pdf_1002
type: Document
creationdate: 2020-05-12T09:19:17+03:00
creator: PyPDF
moddate: 2020-05-12T09:24:22+03:00
page: 440.0
page_label: 417
producer: PyPDF
source: /tmp/tmpm4y371jw.pdf
start_index: 848.0
total_pages: 469.0
page_content: Behaviors in TestCase Subclasses” for how to avoid that). However, 
these downsides are often better than the alternative of restructuring 
your entire program.
Things to Remember
✦ Circular dependencies happen when two modules must call into 
each other at import time. They can cause your program to crash at 
startup.
✦ The best way to break a circular dependency is by refactoring 
mutual dependencies into a separate module at the bottom of the 
dependency tree.
✦ Dynamic imports are the simplest solution for breaking a circular 
dependency between modules while minimizing refactoring and 
complexity.
Migrate Usage
It’s natural

In [None]:
# Utility function for pretty printing messages
def pretty_print_messages(messages, format_type="simple"):
    """
    Pretty print LangChain messages in different formats.
    
    Args:
        messages: List of LangChain message objects
        format_type: "simple", "detailed", or "json"
    """
    if format_type == "simple":
        print("=== MESSAGES ===")
        for i, message in enumerate(messages):
            print(f"\n[{i+1}] {message.__class__.__name__}: {message.content}")
    
    elif format_type == "detailed":
        print("=== MESSAGES (Detailed) ===")
        for i, message in enumerate(messages):
            print(f"\n[{i+1}] {message.__class__.__name__}:")
            print(f"  Content: {message.content}")
            
            # Print token usage if available
            if hasattr(message, 'usage_metadata') and message.usage_metadata:
                usage = message.usage_metadata
                print(f"  Tokens: {usage.get('input_tokens', 0)} in, {usage.get('output_tokens', 0)} out")
            
            # Print model info if available
            if hasattr(message, 'response_metadata') and message.response_metadata:
                model = message.response_metadata.get('model_name', 'Unknown')
                print(f"  Model: {model}")
    
    elif format_type == "json":
        messages_dict = []
        for message in messages:
            msg_dict = {
                'type': message.__class__.__name__,
                'content': message.content,
            }

            # Check if message has tool calls
            if hasattr(message, 'tool_calls') and message.tool_calls:
                msg_dict['tool_calls'] = message.tool_calls

            # Alternative location (often used)
            if hasattr(message, 'additional_kwargs') and 'tool_calls' in message.additional_kwargs:
                msg_dict['tool_calls'] = message.additional_kwargs['tool_calls']

            if hasattr(message, 'response_metadata') and message.response_metadata:
                msg_dict['response_metadata'] = message.response_metadata
            
            if hasattr(message, 'usage_metadata') and message.usage_metadata:
                msg_dict['usage_metadata'] = message.usage_metadata
            
            messages_dict.append(msg_dict)
        
        print(json.dumps(messages_dict, indent=2, default=str))
    
    print("=" * 50)

# Example usage with your result
pretty_print_messages(result['messages'], "json") # simple, detailed, json


[
  {
    "type": "HumanMessage",
    "content": "What are the best practices for building a recursive function?"
  },
  {
    "type": "AIMessage",
    "content": "",
    "tool_calls": [
      {
        "id": "call_zThO6NitDx5RSuLeYbZjBnVa",
        "function": {
          "arguments": "{\"query\":\"best practices for building a recursive function\"}",
          "name": "retrieve_knowledge_base"
        },
        "type": "function"
      }
    ],
    "response_metadata": {
      "token_usage": {
        "completion_tokens": 22,
        "prompt_tokens": 189,
        "total_tokens": 211,
        "completion_tokens_details": {
          "accepted_prediction_tokens": 0,
          "audio_tokens": 0,
          "reasoning_tokens": 0,
          "rejected_prediction_tokens": 0
        },
        "prompt_tokens_details": {
          "audio_tokens": 0,
          "cached_tokens": 0
        }
      },
      "model_name": "gpt-4o-mini-2024-07-18",
      "system_fingerprint": "fp_34a54ae93c",
      