In [1]:
!python -m pip install langchain-experimental -q
!python -m pip install neo4j -q

#### Initialize OpenAI LLM

In [2]:
import os
from langchain_openai import ChatOpenAI

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") 

chat_model = ChatOpenAI(model="gpt-5-nano-2025-08-07", temperature=0, openai_api_key=OPENAI_API_KEY)

#### Initialize Embedding Model

In [3]:
from langchain_openai import OpenAIEmbeddings
embedding_model = OpenAIEmbeddings(model="text-embedding-3-small", openai_api_key=OPENAI_API_KEY)

#### Load Data

In [6]:
from langchain_core.documents import Document

text = Document(page_content="""
Three students, A, B, and C, are tackling two subjects, X (Mathematics) and Y (Literature). Each has a unique perspective, weaving their experiences into a shared academic journey. A, gifted in Mathematics, thrives on solving equations but struggles with the abstract world of poetry and storytelling. On the other hand, B shines in Literature, captivating others with a flair for creative writing, yet finds numbers daunting and formulas perplexing.
C, a generalist, performs decently in both subjects but often bridges gaps between A and B. While A helps B understand mathematical concepts, B guides A through essay writing. Meanwhile, C organizes group study sessions, offering real-world examples to connect ideas from X and Y, making both subjects more relatable. Their collaboration not only enhances their learning but fosters a sense of camaraderie, demonstrating the power of teamwork in overcoming challenges.
""")

#### Split Documents into chunks

In [7]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=30)

chunks = splitter.split_documents([text])

#### Graph Initialization and Transformation

In [17]:
from langchain_neo4j import Neo4jGraph
from dotenv import load_dotenv
load_dotenv()

url = os.getenv("NEO4J_URI")
username = os.getenv("NEO4J_USERNAME")
password = os.getenv("NEO4J_PASSWORD")

graph = Neo4jGraph(url=url, username=username, password=password)

In [18]:
from langchain_experimental.graph_transformers import LLMGraphTransformer

graph_transformer = LLMGraphTransformer(
    llm=chat_model
)

In [24]:
# Convert chunks to graph
from typing import List

# Convert documents (chunks) into graph documents using the LLM transformer
graph_documents: List = []
batch_size = 10
for i in range(0, len(chunks), batch_size):
    batch = chunks[i:i+batch_size]
    # transformer returns GraphDocument objects expected by Neo4jGraph.add_graph_documents
    gdocs = graph_transformer.convert_to_graph_documents(batch)
    graph_documents.extend(gdocs)

# Persist to Neo4j
if graph_documents:
    graph.add_graph_documents(graph_documents, include_source=True)
    print(f"Added {len(graph_documents)} graph documents to Neo4j")
else:
    print("No graph documents created from chunks")

Added 4 graph documents to Neo4j


In [25]:
graph_documents[0]

GraphDocument(nodes=[Node(id='A', type='Person', properties={}), Node(id='B', type='Person', properties={}), Node(id='C', type='Person', properties={}), Node(id='X', type='Subject', properties={}), Node(id='Y', type='Subject', properties={})], relationships=[Relationship(source=Node(id='A', type='Person', properties={}), target=Node(id='X', type='Subject', properties={}), type='HAS_SKILL', properties={})], source=Document(metadata={'id': '83ccc51c7c15689f673e724706f24ea6'}, page_content='Three students, A, B, and C, are tackling two subjects, X (Mathematics) and Y (Literature). Each has a unique perspective, weaving their experiences into a shared academic journey. A, gifted in Mathematics, thrives on solving equations but struggles'))

###### add nodes and relationships to graph

In [26]:
graph.add_graph_documents(graph_documents,baseEntityLabel=True, include_source=True)

In [27]:
# indexing enables fast searches within text-based properties
def create_fulltext_index(g):
  cypher = "CREATE FULLTEXT INDEX entity IF NOT EXISTS FOR (e:__Entity__) ON EACH [e.id]"
  g.query(cypher)

create_fulltext_index(graph)

#### Quering the graph and entity retieval

In [28]:

from langchain_core.prompts import ChatPromptTemplate

entity_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system","You are extracting entities from the text.",
        ),
        (
            "human","Use the following information to extract entities"
            "input: {question}",
        ),
    ]
)

In [29]:
from pydantic import BaseModel, Field
from typing import List

class Entities(BaseModel):
    names: List[str] = Field(
        ...,
        description="All the entities that appear in the text",
    )

In [32]:
entity_chain = (
    entity_prompt | chat_model.with_structured_output(Entities)
)

In [33]:
entity_chain.invoke({"question": "Who learn both X and Y subjects"}).names

['X', 'Y']

#### Graph Retriever

In [34]:
from langchain_community.vectorstores.neo4j_vector import remove_lucene_chars

def generate_full_text_query(input):
    full_text_query = ""
    words = [el for el in remove_lucene_chars(input).split() if el]
    for word in words[:-1]:
        full_text_query += f" {word}~2 AND"
    full_text_query += f" {words[-1]}~2"
    return full_text_query.strip()

def graph_retriever(question: str) -> str:
    result = ""
    entities = entity_chain.invoke({"question": question})
    for entity in entities.names:
        response = graph.query(
            """CALL db.index.fulltext.queryNodes('entity', $query, {limit:2})
            YIELD node,score
            CALL {
              WITH node
              MATCH (node)-[r:!MENTIONS]->(neighbor)
              RETURN node.id + ' - ' + type(r) + ' -> ' + neighbor.id AS output
              UNION ALL
              WITH node
              MATCH (node)<-[r:!MENTIONS]-(neighbor)
              RETURN neighbor.id + ' - ' + type(r) + ' -> ' +  node.id AS output
            }
            RETURN output LIMIT 20
            """,
            {"query": generate_full_text_query(entity)},
        )
        result += "\n".join([el['output'] for el in response])
    return result


In [35]:
print(graph_retriever("who learn both X and Y subjects"))

  words = [el for el in remove_lucene_chars(input).split() if el]


X - CONNECTS_WITH -> Y
X - ENHANCES -> Learning
X - FOSTERS -> Camaraderie
A - HAS_SKILL -> X
B - EXCELS_IN -> Literature
B - EXCELS_IN -> Creative_Writing
B - STRUGGLES_WITH -> Numbers
B - STRUGGLES_WITH -> Formulas
B - TEACHES -> A
C - BRIDGES -> B
A - TEACHES -> BY - ENHANCES -> Learning
Y - FOSTERS -> Camaraderie
X - CONNECTS_WITH -> Y
B - EXCELS_IN -> Literature
B - EXCELS_IN -> Creative_Writing
B - STRUGGLES_WITH -> Numbers
B - STRUGGLES_WITH -> Formulas
B - TEACHES -> A
C - BRIDGES -> B
A - TEACHES -> B


#### Sementic Search Retriever

In [36]:

from langchain_community.vectorstores import Neo4jVector

vector_index = Neo4jVector.from_existing_graph(
    embedding_model,
    search_type="hybrid",
    node_label="Document",
    text_node_properties=["text"],
    embedding_node_property="embedding"
)

In [37]:
vector_index.similarity_search("who learn both X and Y subjects", k=2)



[Document(metadata={}, page_content='\ntext: Three students, A, B, and C, are tackling two subjects, X (Mathematics) and Y (Literature). Each has a unique perspective, weaving their experiences into a shared academic journey. A, gifted in Mathematics, thrives on solving equations but struggles'),
 Document(metadata={}, page_content='\ntext: offering real-world examples to connect ideas from X and Y, making both subjects more relatable. Their collaboration not only enhances their learning but fosters a sense of camaraderie, demonstrating the power of teamwork in overcoming challenges.')]

In [38]:
def retriever(question):
    graph_search_result = graph_retriever(question)
    semantic_search_result = [data.page_content for data in vector_index.similarity_search(question, k=2)]
    final_data = f"Graph data:{graph_search_result}\nText data:{' '. join(semantic_search_result)}"
    return final_data

#### Define Prompt Template for RAG

In [39]:
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system","Answer this question using the provided context only.",
        ),
        (
            "human","Context: {context}"
            "Question: {question}",
        ),
    ]
)

#### Create RAG CHain

In [40]:
from langchain_core.runnables import RunnablePassthrough

chain = (
    {
      "context": retriever,
      "question": RunnablePassthrough()
    }
    | prompt
    | chat_model
)

#### Invoke RAG chain with example questions

In [41]:

response = chain.invoke("who learn both X and Y subjects")

print(response.content)



A, B, and C. The text states three students are tackling two subjects, X and Y, and their collaboration connects ideas from both subjects, indicating that all three engage with both X and Y.


In [42]:

response = chain.invoke("who are the students in that school")

print(response.content)



The students are A, B, and C.
