# GraphRAG Patterns

This notebook demonstrates key GraphRAG patterns that combine:
1. Graph-based context retrieval
2. Vector embeddings for semantic search
3. LLM integration for natural language understanding
4. RAG-optimized prompting

In [None]:
from neo4j import GraphDatabase
from dotenv import load_dotenv
import os
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Neo4jVector
from langchain.chat_models import ChatOpenAI
from langchain.chains import GraphCypherQAChain

# Load environment variables
load_dotenv()

# Initialize Neo4j connection
driver = GraphDatabase.driver(
    os.getenv('NEO4J_URI'),
    auth=(os.getenv('NEO4J_USERNAME'), os.getenv('NEO4J_PASSWORD'))
)

# Initialize LLM and embeddings
embeddings = OpenAIEmbeddings()
llm = ChatOpenAI(temperature=0)

## Pattern 1: Hybrid Graph-Vector Search

This pattern combines graph traversal with semantic search to find relevant information:

In [None]:
def hybrid_search(query, k=3):
    # Create vector store
    vector_store = Neo4jVector.from_existing_index(
        embeddings,
        driver=driver,
        node_label='Document',
        embedding_node_property='embedding',
        text_node_property='content'
    )
    
    # Step 1: Semantic search for relevant documents
    similar_docs = vector_store.similarity_search(query, k=k)
    
    # Step 2: Graph traversal to find connected entities
    with driver.session() as session:
        result = session.run("""
        // Find documents similar to the query
        WITH $doc_ids as doc_ids
        MATCH (d:Document)
        WHERE d.id IN doc_ids
        
        // Find connected entities within 2 hops
        MATCH path = (d)-[*1..2]-(related)
        WHERE NOT related:Document  // Exclude other documents
        
        // Return unique paths and relevance score
        RETURN DISTINCT path,
               d.similarity as relevance
        ORDER BY relevance DESC
        """, doc_ids=[doc.metadata['id'] for doc in similar_docs])
        
        return result.data()

# Example usage
results = hybrid_search("How do I troubleshoot laptop connectivity issues?")
print(f"Found {len(results)} relevant paths")

## Pattern 2: Context-Aware RAG

This pattern enhances RAG with graph-based context:

In [None]:
def context_aware_rag(query, context_type='product'):
    # Step 1: Extract entities and relationships from query
    extraction_prompt = f"Extract key entities and their relationships from: {query}"
    entities = llm.predict(extraction_prompt)
    
    # Step 2: Graph traversal for context
    with driver.session() as session:
        context = session.run("""
        // Find relevant context based on entity type
        MATCH (e:{context_type})
        WHERE e.name CONTAINS $entity
        
        // Get connected documents with embeddings
        MATCH (e)-[r]-(d:Document)
        WHERE d.embedding IS NOT NULL
        
        // Return context and documents
        RETURN e, collect(d) as docs
        """, context_type=context_type, entity=entities)
    
    # Step 3: Semantic search within context
    vector_store = Neo4jVector.from_existing_index(
        embeddings,
        driver=driver,
        node_label='Document',
        embedding_node_property='embedding',
        text_node_property='content'
    )
    
    relevant_docs = vector_store.similarity_search(
        query,
        k=3,
        filter={"context": context_type}
    )
    
    # Step 4: Generate response with context
    response = llm.predict(
        f"Based on these documents: {relevant_docs}\n\n"
        f"And this context: {context}\n\n"
        f"Answer: {query}"
    )
    
    return response

# Example usage
answer = context_aware_rag(
    "What are common issues with the Laptop Pro model?",
    context_type='Product'
)
print(answer)

## Pattern 3: Memory-Augmented GraphRAG

This pattern maintains conversation history in a graph structure:

In [None]:
def memory_augmented_rag(query, session_id):
    # Step 1: Store query in conversation graph
    with driver.session() as session:
        session.run("""
        MATCH (s:Session {id: $session_id})
        CREATE (q:Query {text: $query, timestamp: datetime()})
        CREATE (s)-[:HAS_QUERY]->(q)
        """, session_id=session_id, query=query)
    
    # Step 2: Get conversation history and context
    with driver.session() as session:
        history = session.run("""
        MATCH (s:Session {id: $session_id})-[:HAS_QUERY]->(q)
        WITH q ORDER BY q.timestamp
        RETURN collect(q.text) as queries,
               collect(q.response) as responses
        """, session_id=session_id)
    
    # Step 3: Combine with vector search
    vector_store = Neo4jVector.from_existing_index(
        embeddings,
        driver=driver,
        node_label='Document',
        embedding_node_property='embedding',
        text_node_property='content'
    )
    
    relevant_docs = vector_store.similarity_search(
        query + " " + " ".join(history['queries']),
        k=3
    )
    
    # Step 4: Generate response with history and context
    response = llm.predict(
        f"Conversation history: {history}\n\n"
        f"Relevant documents: {relevant_docs}\n\n"
        f"Current query: {query}\n\n"
        f"Generate a response that maintains conversation context."
    )
    
    # Step 5: Store response in graph
    with driver.session() as session:
        session.run("""
        MATCH (s:Session {id: $session_id})-[:HAS_QUERY]->(q)
        WHERE q.text = $query
        SET q.response = $response
        """, session_id=session_id, query=query, response=response)
    
    return response

# Example usage
response = memory_augmented_rag(
    "Tell me more about that issue",
    session_id="session_123"
)
print(response)

## Key GraphRAG Concepts Demonstrated

1. **Hybrid Search**: Combining vector similarity with graph traversal
2. **Context-Aware RAG**: Using graph relationships to enhance context
3. **Memory Augmentation**: Storing conversation state in graph structure
4. **Entity Extraction**: Using LLMs to identify graph entities
5. **Semantic Search**: Vector embeddings for content similarity