# Graph RAG Document Analyzer - Exploration Notebook

This notebook is for exploring and testing the Graph RAG functionality interactively.

## 1. Setup and Imports

In [None]:
# Add src to path for imports
import sys
import os
sys.path.append('../src')

# Load environment variables
from dotenv import load_dotenv
load_dotenv('../.env')

# Core imports
from loader import DocumentLoader
from graph_builder import get_graph_connection, GraphBuilder
from chains import create_graph_rag_chain

print("✅ Setup complete!")

## 2. Document Loading and Chunking

In [None]:
# Load a sample document
# Replace with your actual PDF path
document_path = "../data/sample_document.pdf"  # Update this path

# Check if file exists
if os.path.exists(document_path):
    loader = DocumentLoader(chunk_size=1000, chunk_overlap=200)
    chunks = loader.load_and_chunk_document(document_path)
    
    # Get document statistics
    stats = loader.get_document_stats(chunks)
    print(f"Document loaded successfully!")
    print(f"Total chunks: {stats['total_chunks']}")
    print(f"Total characters: {stats['total_chars']}")
    print(f"Average chunk size: {stats['avg_chunk_size']}")
    print(f"Source pages: {stats['source_pages']}")
    
    # Show first chunk
    print("\n--- First Chunk Preview ---")
    print(chunks[0].page_content[:500] + "...")
    print(f"\nChunk metadata: {chunks[0].metadata}")
    
else:
    print(f"❌ Document not found at {document_path}")
    print("Please add a PDF file to the data/ directory and update the path above.")

## 3. Graph Database Connection

In [None]:
# Test graph database connection
try:
    graph_connection = get_graph_connection()
    print("✅ Successfully connected to Neo4j!")
    
    # Get current schema
    schema = graph_connection.get_schema()
    print(f"\nCurrent schema:")
    print(f"Node labels: {schema['node_labels']}")
    print(f"Relationships: {schema['relationships']}")
    print(f"Properties: {schema['properties']}")
    
except Exception as e:
    print(f"❌ Graph connection failed: {e}")
    print("Please ensure Neo4j is running and credentials are correct in .env file")

## 4. Build Knowledge Graph

In [None]:
# Build knowledge graph from document chunks
if 'chunks' in locals() and 'graph_connection' in locals():
    # Optional: Clear existing graph
    clear_existing = input("Clear existing graph? (y/n): ").lower() == 'y'
    if clear_existing:
        graph_connection.clear_database()
        print("🗑️ Cleared existing graph")
    
    # Build graph
    builder = GraphBuilder(graph_connection)
    
    # For testing, use only first few chunks
    test_chunks = chunks[:5]  # Process first 5 chunks for faster testing
    print(f"Building graph from {len(test_chunks)} chunks...")
    
    graph_stats = builder.build_graph_from_document(test_chunks)
    
    print("\n✅ Graph building complete!")
    print(f"Total nodes created: {graph_stats['total_nodes']}")
    print(f"Total relationships created: {graph_stats['total_relationships']}")
    print(f"Chunks processed: {graph_stats['chunks_processed']}")
    
else:
    print("❌ Please run the previous cells first to load document and connect to graph")

## 5. Test Graph RAG Queries

In [None]:
# Create Graph RAG chain for testing
try:
    rag_chain = create_graph_rag_chain()
    print("✅ Graph RAG chain created successfully!")
    
    # Get updated schema
    print("\nCurrent graph schema:")
    print(rag_chain.get_graph_schema())
    
except Exception as e:
    print(f"❌ Failed to create RAG chain: {e}")

In [None]:
# Test simple queries
test_questions = [
    "What entities are mentioned in the document?",
    "What are the main topics discussed?",
    "What relationships exist between the entities?"
]

if 'rag_chain' in locals():
    for i, question in enumerate(test_questions, 1):
        print(f"\n--- Question {i}: {question} ---")
        try:
            result = rag_chain.query(question)
            print(f"Answer: {result['answer']}")
            
            if result.get('generated_cypher'):
                print(f"\nGenerated Cypher: {result['generated_cypher']}")
                
        except Exception as e:
            print(f"❌ Error: {e}")
else:
    print("❌ Please run the previous cell to create the RAG chain first")

In [None]:
# Interactive query testing
print("Interactive Query Testing")
print("Type 'quit' to exit")

if 'rag_chain' in locals():
    while True:
        question = input("\nEnter your question: ")
        if question.lower() == 'quit':
            break
            
        try:
            result = rag_chain.query(question)
            print(f"\nAnswer: {result['answer']}")
            
            # Show debug info if available
            if result.get('generated_cypher'):
                show_debug = input("Show debug info? (y/n): ").lower() == 'y'
                if show_debug:
                    print(f"\nCypher Query: {result['generated_cypher']}")
                    print(f"Raw Context: {result.get('raw_context', 'N/A')}")
                    
        except Exception as e:
            print(f"❌ Error: {e}")
else:
    print("❌ Please create the RAG chain first")

## 6. Cleanup

In [None]:
# Close graph connection
if 'graph_connection' in locals():
    graph_connection.close()
    print("✅ Graph connection closed")