In [2]:
# https://sandeep14.medium.com/running-graphrag-locally-with-neo4j-and-ollama-text-format-371bf88b14b7

%pip install --upgrade --quiet  langchain langchain-community langchain-ollama langchain-experimental neo4j tiktoken yfiles_jupyter_graphs python-dotenv fastapi

[0mNote: you may need to restart the kernel to use updated packages.


# Step 1. Installing and Importing Required Libraries

In [46]:
import os
import time
from fastapi import FastAPI, HTTPException
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel, Field
from langchain_core.output_parsers import StrOutputParser
from langchain_community.graphs import Neo4jGraph
from langchain_community.chat_models import ChatOllama
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_community.vectorstores import Neo4jVector
from langchain_core.documents import Document
from langchain_ollama import OllamaEmbeddings
from langchain_experimental.llms.ollama_functions import OllamaFunctions
from neo4j import GraphDatabase, Driver

# Step 2: Setting Up the Neo4j Graph
First, we need to initialize the connection to the Neo4j graph:

In [66]:
def init_graph():
    """Initialize Neo4j graph connection"""
    return Neo4jGraph(
    url= "bolt://localhost:7687" ,
    username="neo4j", #default
    password="your_password" #change accordingly
    )

def clear_database(graph):
    """Clear all nodes, relationships, and vector indexes from the Neo4j database"""
    # First drop the vector index
    try:
        graph.query("""
            DROP INDEX vector IF EXISTS
        """)
    except Exception as e:
        print(f"Note: Vector index drop attempt resulted in: {e}")
    
    # Then delete all nodes and relationships
    graph.query("""
        MATCH (n)
        DETACH DELETE n
    """)
    

# Step 3: Text to Graph Conversion with Ollama

We start with a block of text that we want to convert into a graph. For this example, we’ll use a biographical snippet of Marie Curie:

In [68]:
text = """
Marie Curie, born in 1867, was a Polish and naturalised-French physicist and chemist who conducted pioneering research on radioactivity.
She was the first woman to win a Nobel Prize, the first person to win a Nobel Prize twice, and the only person to win a Nobel Prize in two scientific fields.
Her husband, Pierre Curie, was a co-winner of her first Nobel Prize, making them the first-ever married couple to win the Nobel Prize and launching the Curie family legacy of five Nobel Prizes.
She was, in 1906, the first woman to become a professor at the University of Paris. 
"""

The function ingestion handles the conversion of this text into graph documents, which are then added to the Neo4j database:

In [73]:
def ingest_data(text: str, llm_model: str = llm_model):
    """Ingest text data into Neo4j graph and create vector embeddings"""
    # Initialize graph
    graph = init_graph()
    
    # Clear existing data
    clear_database(graph)
    
    # Convert text to documents
    documents = [Document(page_content=text)]
    
    # Initialize LLM for text-to-graph conversion
    llm = ChatOllama(model=llm_model, temperature=0)
    llm_transformer = LLMGraphTransformer(llm=llm)
    
    # Convert text to graph documents
    graph_documents = llm_transformer.convert_to_graph_documents(documents)

    # Add the generated graph into Neo4j
    graph.add_graph_documents(
        graph_documents,
        baseEntityLabel=True,
        include_source=True
    )
   
    # Create vector embeddings
    embed = OllamaEmbeddings(model="nomic-embed-text")
    vector_index = Neo4jVector.from_existing_graph(
        embedding=embed,
        search_type="hybrid",
        node_label="Document",
        text_node_properties=["text"],
        embedding_node_property="embedding"
    )
    
    return graph, vector_index.as_retriever()

# Step 4: Querying Entities with Neo4j

Once we have the graph stored in Neo4j, we can run queries against it. The querying_neo4j function takes in a user’s question, extracts entities using the LLM, and retrieves relationships from the graph database:

In [74]:
class Entities(BaseModel):
    """Model for extracted entities"""
    names: list[str] = Field(..., description="All entities from the text")

def create_entity_chain(llm_model: str = llm_model):
    """Create entity extraction chain"""
    prompt = ChatPromptTemplate.from_messages([
        ("system", "Extract organization and person entities from the text."),
        ("human", "Extract entities from: {question}")
    ])
    
    # llm = OllamaFunctions(model=llm_model, format="json", temperature=0)
    llm = OllamaFunctions(model=llm_model, format="json", temperature=0)
    return prompt | llm.with_structured_output(Entities)  # Removed include_raw=True

def graph_retriever(question: str, graph: Neo4jGraph, entity_chain) -> str:
    """Retrieve relationships for entities from Neo4j"""
    try:
        # Get entities directly from the structured output
        response = entity_chain.invoke({"question": question})
        entities = response.names  # Access names directly from the Entities model
        print("Retrieved Entities:", entities)
        
        results = []
        for entity in entities:
            query_response = graph.query(
                """
                MATCH (p:Person {id: $entity})-[r]->(e)
                RETURN p.id AS source_id, type(r) AS relationship, e.id AS target_id
                LIMIT 50
                """,
                {"entity": entity}
            )
            results.extend([
                f"{el['source_id']} - {el['relationship']} -> {el['target_id']}"
                for el in query_response
            ])
        
        return "\n".join(results) if results else "No relationships found."
    except Exception as e:
        print(f"Error in graph_retriever: {e}")
        return "Error retrieving relationships."


# Step 5: Hybrid Search with Ollama and Graph Data

Finally, we combine both graph-based retrieval and embeddings to perform hybrid searches. The querying_ollama function allows users to query based on both the graph relationships and embedding-based context:

In [75]:
def create_qa_chain(graph: Neo4jGraph, vector_retriever, entity_chain, llm_model: str = llm_model):
    """Create question-answering chain"""
    def full_retriever(question: str):
        graph_data = graph_retriever(question, graph, entity_chain)
        vector_data = [el.page_content for el in vector_retriever.invoke(question)]
        return f"Graph data: {graph_data}\nVector data: {'#Document '.join(vector_data)}"
    
    template = """
    Answer the question based only on the following context:
    {context}
    Question: {question}
    Answer:
    """
    
    prompt = ChatPromptTemplate.from_template(template)
    llm = ChatOllama(model=llm_model, temperature=0)
    
    return (
        {
            "context": lambda input: full_retriever(input),
            "question": RunnablePassthrough()
        }
        | prompt
        | llm
        | StrOutputParser()
    )

This function first retrieves both the graph data and the vector-based embeddings, then uses the retrieved context to generate a concise answer to the user’s query.

# Step 6. Testing

In [76]:

def main():
    # Initialize components
    
    text = """
    Marie Curie, born in 1867, was a Polish and naturalised-French physicist and chemist who conducted pioneering research on radioactivity.
    She was the first woman to win a Nobel Prize, the first person to win a Nobel Prize twice, and the only person to win a Nobel Prize in two scientific fields.
    Her husband, Pierre Curie, was a co-winner of her first Nobel Prize, making them the first-ever married couple to win the Nobel Prize and launching the Curie family legacy of five Nobel Prizes.
    She was, in 1906, the first woman to become a professor at the University of Paris.
    """
    
    graph = init_graph()

   # Clear the database first
    clear_database(graph)


    # Ingest data
    graph, vector_retriever = ingest_data(text)
    
    # Create chains
    entity_chain = create_entity_chain()
    qa_chain = create_qa_chain(graph, vector_retriever, entity_chain)
    
    # Test the chain
    question = "Who are Marie Curie and Pierre Curie?"
    response = qa_chain.invoke(question)
    print("Final Answer:", response)

if __name__ == "__main__":
    main()

Retrieved Entities: ['Marie Curie', 'Pierre Curie']
Final Answer: Marie Curie and Pierre Curie were a married couple who made significant contributions to science. They were both physicists and chemists who conducted pioneering research on radioactivity, which led to numerous accolades.

Marie Curie was a trailblazer in many ways:

* She was the first woman to win a Nobel Prize (in 1906) and the first person to win it twice.
* She was the only person to win a Nobel Prize in two scientific fields.
* She was the first woman to become a professor at the University of Paris, achieving this milestone in 1906.

Pierre Curie, on the other hand, was Marie's husband and a co-winner of her first Nobel Prize. He was also a physicist and chemist who made significant contributions to the field of radioactivity.

Together, they formed a remarkable scientific partnership that paved the way for future generations of scientists.
