In [1]:
%pip install --upgrade --quiet langchain langchain-community langchain-openai langchain-ollama langchain-experimental neo4j langchain-neo4j neo4j-graphrag

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [18]:
from langchain_core.runnables import (
    RunnablePassthrough
)
from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel, Field
from langchain_core.output_parsers import StrOutputParser
import os
from langchain_community.graphs import Neo4jGraph
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import ChatOpenAI
from langchain_ollama import ChatOllama
from langchain_experimental.graph_transformers import LLMGraphTransformer
from neo4j import GraphDatabase
from langchain_community.vectorstores import Neo4jVector
from langchain_openai import OpenAIEmbeddings
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores.neo4j_vector import remove_lucene_chars
from langchain_neo4j import Neo4jGraph
from dotenv import load_dotenv

load_dotenv()



True

In [None]:
AUTH = ("neo4j", os.getenv("NEO4J_PASSWORD"))

# Connect to Neo4j database
driver = GraphDatabase.driver('bolt://192.168.137.121:7687', auth=AUTH)

In [9]:
loader = TextLoader(file_path="texts/books/barrie/peterpan.txt", encoding="utf-8")
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=25)
documents = text_splitter.split_documents(documents=docs)


In [25]:
documents[0]

Document(metadata={'source': 'texts/books/barrie/peterpan.txt', 'id': 'cec59315c68e80292ae497dd5967100b'}, page_content='\ufeff\nPeter Pan\n\n[PETER AND WENDY]\n\nby J. M. Barrie [James Matthew Barrie]\n\nA Millennium Fulcrum Edition produced in 1991 by Duncan Research. Note\nthat while a copyright was initially claimed for the labor involved in\ndigitization, that copyright claim is not consistent with current\ncopyright requirements. This text, which matches the 1911 original\npublication, is in the public domain in the US.')

In [26]:
llm_type = "ollama"
llm = ChatOllama(model="gemma3:1b", temperature=0, base_url=os.getenv("OLLAMA_HOST"))
llm_transformer = LLMGraphTransformer(llm=llm, relationship_properties=True)

graph_documents = await llm_transformer.aconvert_to_graph_documents(documents)

In [27]:
graph_documents[0]

GraphDocument(nodes=[Node(id='Peter', type='Person', properties={}), Node(id='Wendy', type='Person', properties={}), Node(id='James Matthew Barre', type='Author', properties={}), Node(id='Millennium Fulcrum', type='Edition', properties={}), Node(id='1991', type='Date', properties={}), Node(id='Duncan Research', type='Publisher', properties={}), Node(id='1911', type='Year', properties={}), Node(id='Public Domain', type='Status', properties={})], relationships=[Relationship(source=Node(id='Peter', type='Person', properties={}), target=Node(id='Wendy', type='Person', properties={}), type='RELATIONSHIP', properties={}), Relationship(source=Node(id='James Matthew Barre', type='Author', properties={}), target=Node(id='Peter', type='Person', properties={}), type='RELATIONSHIP', properties={}), Relationship(source=Node(id='James Matthew Barre', type='Author', properties={}), target=Node(id='Millennium Fulcrum', type='Edition', properties={}), type='RELATIONSHIP', properties={}), Relationship(s

In [28]:
graph = Neo4jGraph('bolt://192.168.137.117:7687', username='neo4j', password=os.getenv("NEO4J_PASSWORD"), database='neo4j')
graph.add_graph_documents(
    graph_documents,
    baseEntityLabel=True,
    include_source=True
)

In [20]:
os.getenv('NEO4J_URL')

'bolt://192.168.137.117:7687'

In [8]:
from langchain_ollama import OllamaEmbeddings

vector_store = Neo4jVector(
    embedding=OllamaEmbeddings(model="nomic-embed-text"),
    url=os.getenv('NEO4J_URL'),
    username='neo4j',
    password=os.getenv("NEO4J_PASSWORD"),
    database='neo4j'
)
vector_index = vector_store.from_existing_graph(
    embedding=OllamaEmbeddings(model="nomic-embed-text"),
    search_type="hybrid",
    node_label="Document",
    text_node_properties=["text"],
    embedding_node_property="embedding",
)
vector_retriever = vector_index.as_retriever()

  vector_store = Neo4jVector(


In [21]:
class Entities(BaseModel):
    names: list[str] = Field(
        ...,
        description="All the person, organization, or business entities that appear in the text",
    )

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are extracting organization and person entities from the text.",
        ),
        (
            "human",
            "Use the given format to extract information from the following input: {question}",
        )
    ]
)

entity_chain = prompt | llm.with_structured_output(Entities)

In [22]:
entity_chain.invoke({"question": "Who are Peter and Wendy?"}).names

['Peter', 'Wendy']

In [23]:
def generate_full_text_query(input: str) -> str:
    words = [el for el in remove_lucene_chars(input).split() if el]
    if not words:
        return ""
    full_text_query = " AND ".join([f"{word}~2" for word in words])
    print(f"Generated Query: {full_text_query}")
    return full_text_query.strip()

def graph_retriever(question: str) -> str:
    """
    Collects the neighborhood of entities mentioned in the question
    """
    result = ""
    entities = entity_chain.invoke({"question": question})
    for entity in entities.names:
        response = graph.query(
            """CALL db.index.fulltext.queryNodes('entities', $query, {limit:2})
            YIELD node,score
            CALL {
              WITH node
              MATCH (node)<-[r:!MENTIONS]-(neighbor)
              RETURN node.id + ' - ' + type(r) + ' -> ' + neighbor.id AS output
              UNION ALL
              WITH node
              MATCH (node)<-[r:!MENTIONS]-(neighbor)
              RETURN neighbor.id + ' - ' + type(r) + ' -> ' +node.id AS output
              }
              RETURN output LIMIT 50
              """,
              {"query": generate_full_text_query(entity)},
        )
        result += "\n".join([el['output'] for el in response])
        return result

In [24]:
print(graph_retriever("Who is Johnathan?"))



Generated Query: Johnathan~2

