# Graph RAG with Neo4j, LlamaIndex & Llama 3.2

Complete skeleton for building a Graph RAG system.

## Overview
This notebook demonstrates how to build a Graph RAG system that combines:
- **Neo4j**: Graph database for storing knowledge graphs
- **LlamaIndex**: Framework for RAG orchestration
- **Llama 3.2**: Language model for generation and entity extraction

## 1. Environment Setup

Install dependencies and configure the environment.

In [None]:
# Install required packages
!pip install llama-index neo4j llama-index-graph-stores-neo4j
!pip install llama-index-llms-ollama llama-index-embeddings-huggingface

In [None]:
# Import libraries
import os
import json
from typing import List, Dict, Any

from llama_index.core import KnowledgeGraphIndex, ServiceContext, Document
from llama_index.graph_stores.neo4j import Neo4jGraphStore
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from neo4j import GraphDatabase

## 2. Neo4j Configuration

Set up Neo4j connection and graph store.

In [None]:
# Neo4j connection parameters
NEO4J_URI = "bolt://localhost:7687"
NEO4J_USERNAME = "neo4j"
NEO4J_PASSWORD = "your_password"

# Initialize Neo4j graph store
graph_store = Neo4jGraphStore(
    username=NEO4J_USERNAME,
    password=NEO4J_PASSWORD,
    url=NEO4J_URI,
    database="neo4j"
)

print("Neo4j connection established")

## 3. Llama 3.2 & LlamaIndex Setup

Configure the language model and embedding model.

In [None]:
# Initialize Llama 3.2 via Ollama
llm = Ollama(model="llama3.2", request_timeout=120.0)

# Initialize embedding model
embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Create service context
service_context = ServiceContext.from_defaults(
    llm=llm,
    embed_model=embed_model,
    chunk_size=512
)

print("LLM and embedding models configured")

## 4. Data Loading & Preparation

Load and prepare your documents for graph construction.

In [None]:
# Load your data (example with JSON files)
def load_documents(file_paths: List[str]) -> List[Document]:
    documents = []
    
    for file_path in file_paths:
        with open(file_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
            
        # Convert data to text format
        text_content = json.dumps(data, indent=2)
        doc = Document(text=text_content, metadata={"source": file_path})
        documents.append(doc)
    
    return documents

# Example: Load your sport data
file_paths = [
    "/home/alvar/CascadeProjects/windsurf-project/RAG/data/raw/sport/la_liga_2023_teams.json",
    "/home/alvar/CascadeProjects/windsurf-project/RAG/data/raw/sport/la_liga_2023_players.json"
]

documents = load_documents(file_paths)
print(f"Loaded {len(documents)} documents")

## 5. Knowledge Graph Construction

Build the knowledge graph from your documents.

In [None]:
# Create storage context with Neo4j graph store
storage_context = StorageContext.from_defaults(graph_store=graph_store)

# Build knowledge graph index
kg_index = KnowledgeGraphIndex.from_documents(
    documents,
    storage_context=storage_context,
    service_context=service_context,
    max_triplets_per_chunk=10,
    include_embeddings=True
)

print("Knowledge graph constructed and stored in Neo4j")

## 6. Graph Querying & Retrieval

Query the knowledge graph for information retrieval.

In [None]:
# Create query engine
query_engine = kg_index.as_query_engine(
    include_text=True,
    response_mode="tree_summarize",
    embedding_mode="hybrid",
    similarity_top_k=5
)

# Example queries
queries = [
    "What teams are in La Liga 2023?",
    "Who are the top players in Real Madrid?",
    "What is the relationship between Barcelona and their players?"
]

for query in queries:
    print(f"\nQuery: {query}")
    response = query_engine.query(query)
    print(f"Response: {response}")
    print("-" * 50)

## 7. Advanced Graph Operations

Perform advanced graph operations and analysis.

In [None]:
# Custom Cypher queries
def run_cypher_query(query: str):
    driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))
    
    with driver.session() as session:
        result = session.run(query)
        return [record.data() for record in result]

# Example: Find all entities and their relationships
cypher_query = """
MATCH (n)-[r]->(m)
RETURN n.name as source, type(r) as relationship, m.name as target
LIMIT 10
"""

results = run_cypher_query(cypher_query)
print("Graph relationships:")
for result in results:
    print(f"{result['source']} --{result['relationship']}--> {result['target']}")

## 8. Graph Visualization

Visualize the knowledge graph structure.

In [None]:
# Install visualization dependencies
!pip install networkx matplotlib pyvis

import networkx as nx
import matplotlib.pyplot as plt
from pyvis.network import Network

# Create network visualization
def visualize_graph_sample():
    # Get sample of graph data
    query = """
    MATCH (n)-[r]->(m)
    RETURN n.name as source, type(r) as relationship, m.name as target
    LIMIT 20
    """
    
    results = run_cypher_query(query)
    
    # Create network graph
    net = Network(height="600px", width="100%", bgcolor="#222222", font_color="white")
    
    for result in results:
        net.add_node(result['source'], label=result['source'])
        net.add_node(result['target'], label=result['target'])
        net.add_edge(result['source'], result['target'], label=result['relationship'])
    
    net.show("graph_visualization.html")
    print("Graph visualization saved as graph_visualization.html")

visualize_graph_sample()

## 9. Performance Evaluation

Evaluate the Graph RAG system performance.

In [None]:
# Evaluation metrics and testing
import time

def evaluate_query_performance(queries: List[str]):
    results = []
    
    for query in queries:
        start_time = time.time()
        response = query_engine.query(query)
        end_time = time.time()
        
        results.append({
            'query': query,
            'response_time': end_time - start_time,
            'response_length': len(str(response))
        })
    
    return results

# Test queries
test_queries = [
    "List all La Liga teams",
    "Who plays for Barcelona?",
    "What position does Messi play?"
]

performance_results = evaluate_query_performance(test_queries)

print("Performance Results:")
for result in performance_results:
    print(f"Query: {result['query']}")
    print(f"Response Time: {result['response_time']:.2f}s")
    print(f"Response Length: {result['response_length']} chars")
    print("-" * 40)

## 10. Optimization & Fine-tuning

Optimize the Graph RAG system for better performance.

In [None]:
# Graph optimization techniques

# 1. Index creation for faster queries
optimization_queries = [
    "CREATE INDEX entity_name IF NOT EXISTS FOR (n:Entity) ON (n.name)",
    "CREATE INDEX relationship_type IF NOT EXISTS FOR ()-[r:RELATIONSHIP]-() ON (r.type)"
]

for opt_query in optimization_queries:
    try:
        run_cypher_query(opt_query)
        print(f"Executed: {opt_query}")
    except Exception as e:
        print(f"Error: {e}")

# 2. Query optimization parameters
optimized_query_engine = kg_index.as_query_engine(
    include_text=True,
    response_mode="compact",
    embedding_mode="hybrid",
    similarity_top_k=3,  # Reduced for faster queries
    max_keywords_per_query=5
)

print("Graph optimization completed")

## 11. Cleanup & Resources

Clean up resources and provide next steps.

In [None]:
# Cleanup function
def cleanup_resources():
    # Close Neo4j connections
    if 'graph_store' in globals():
        graph_store.close()
    
    print("Resources cleaned up")

# cleanup_resources()  # Uncomment when done

print("""
Graph RAG Tutorial Complete!

Next Steps:
1. Customize entity extraction for your domain
2. Implement custom relationship types
3. Add graph-based reasoning capabilities
4. Scale with distributed Neo4j setup
5. Integrate with production applications
""")