In [None]:
!pip install --upgrade --quiet  langchain langchain-community langchain-ollama langchain-experimental neo4j tiktoken yfiles_jupyter_graphs python-dotenv json-repair langchain-openai langchain_core

In [None]:
from langchain_core.runnables import  RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel, Field
from langchain_core.output_parsers import StrOutputParser
from langchain_community.graphs import Neo4jGraph
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.chat_models import ChatOllama
from langchain_experimental.graph_transformers import LLMGraphTransformer
from neo4j import GraphDatabase
from yfiles_jupyter_graphs import GraphWidget
from langchain_community.vectorstores import Neo4jVector
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores.neo4j_vector import remove_lucene_chars
from langchain_ollama import OllamaEmbeddings
import os
from langchain_experimental.llms.ollama_functions import OllamaFunctions
from neo4j import  Driver

from dotenv import load_dotenv

load_dotenv()

True

In [None]:
graph = Neo4jGraph(username="neo4j", password="Y@$#odhan23", url="bolt://localhost:7687")


Text Loading and Splitting
This code loads a text file (dummytext.txt) and splits it into smaller chunks of 250 characters with a 24-character overlap to preserve context between chunks.

In [None]:
loader = TextLoader(file_path="dummytext.txt")
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=24)
documents = text_splitter.split_documents(documents=docs)


Graph Document Conversion
This code initializes a language model (llama3.1) and converts text documents into graph-structured data using LLMGraphTransformer. The output is a list of graph_documents ready for Neo4J ingestion.

In [None]:
from langchain_ollama import ChatOllama

# Initialize the new ChatOllama LLM
llm = ChatOllama(model="llama3.1", temperature=0, format="json")

# Now you can use llm_transformer as before
llm_transformer = LLMGraphTransformer(llm=llm)

# Convert documents to graph documents
graph_documents = llm_transformer.convert_to_graph_documents(documents)


In [None]:
graph_documents[0]

GraphDocument(nodes=[Node(id="Amico'S Family", type='Family', properties={}), Node(id='Legacy', type='Concept', properties={}), Node(id='Love', type='Emotion', properties={}), Node(id='Tradition', type='Custom', properties={})], relationships=[], source=Document(metadata={'source': 'dummytext.txt'}, page_content='1. The Story of Amico’s Family: A Legacy of Love and Tradition'))

Add Graph Documents to Neo4J
This code inserts the generated graph_documents into the Neo4J graph database.

- include_source=True: Ensures the original text source is included as metadata.
- baseEntityLabel=True: Adds a base label to all entities for easier querying.

In [52]:
graph.add_graph_documents(
    graph_documents,
    include_source=True,
    baseEntityLabel=True

)

In [53]:
from langchain_community.vectorstores.neo4j_vector import Neo4jVector, SearchType
from langchain_ollama import OllamaEmbeddings


# Initialize your embeddings
embeddings = OllamaEmbeddings(
    model="mxbai-embed-large",
)

# Initialize the Neo4jVector from an existing graph using proper ordering and credentials
vector_index = Neo4jVector.from_existing_graph(
    embedding=embeddings,
    node_label="Document",
    embedding_node_property="embedding",
    index_name="vector",
    text_node_properties=["text"],
    search_type="hybrid",  # using the enum for hybrid search type
    url="bolt://localhost:7687",   # your Neo4j URL
    username="neo4j",              # your Neo4j username
    password="Y@$#odhan23"       # your Neo4j password
)

# Create a retriever from the vector index
vector_retriever = vector_index.as_retriever()


In [60]:
driver = GraphDatabase.driver(
        uri = "bolt://localhost:7687",
        auth = ("neo4j","Y@$#odhan23"))

def create_fulltext_index(tx):
    query = '''
    CREATE FULLTEXT INDEX `fulltext_entity_id` 
    FOR (n:__Entity__) 
    ON EACH [n.id];
    '''
    tx.run(query)

# Function to execute the query
def create_index():
    with driver.session() as session:
        session.execute_write(create_fulltext_index)
        print("Fulltext index created successfully.")

# Call the function to create the index
try:
    create_index()
except:
    pass

# Close the driver connection
driver.close()

Fulltext index created successfully.


In [61]:
class Entities(BaseModel):
    """Identifying information about entities."""

    names: list[str] = Field(
        ...,
        description="All the person, organization, or business entities that "
        "appear in the text",
    )

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are extracting organization and person entities from the text.",
        ),
        (
            "human",
            "Use the given format to extract information from the following "
            "input: {question}",
        ),
    ]
)


entity_chain = llm.with_structured_output(Entities)

In [62]:
entity_chain.invoke("Who are Nonna Lucia and Giovanni Caruso?")

Entities(names=['Nonna Lucia', 'Giovanni Caruso'])

In [70]:
def graph_retriever(question: str) -> str:
    result = ""
    entities = entity_chain.invoke(question)
    for entity in entities.names:
        response = graph.query(
            """CALL db.index.fulltext.queryNodes('fulltext_entity_id', $query, {limit:2})
            YIELD node,score
            
            WITH node
            CALL {
              WITH node
              MATCH (node)-[r:!MENTIONS]->(neighbor)
              RETURN node.id + ' - ' + type(r) + ' -> ' + neighbor.id AS output
              
              UNION ALL
              
              WITH node
              MATCH (node)<-[r:!MENTIONS]-(neighbor)
              RETURN neighbor.id + ' - ' + type(r) + ' -> ' + node.id AS output
            }
            RETURN output LIMIT 50""",
            {"query": entity},
        )
        result += "\n".join([el['output'] for el in response])
    return result

In [71]:
print(graph_retriever("Who is Nonna Lucia?"))






In [73]:
def full_retriever(question: str):
    graph_data = graph_retriever(question)
    vector_data = [el.page_content for el in vector_retriever.invoke(question)]
    final_data = f"""Graph data:
{graph_data}
vector data:
{"#Document ". join(vector_data)}
    """
    return final_data

In [74]:
template = """Answer the question based only on the following context:
{context}

Question: {question}
Use natural language and be concise.
Answer:"""
prompt = ChatPromptTemplate.from_template(template)

chain = (
        {
            "context": full_retriever,
            "question": RunnablePassthrough(),
        }
    | prompt
    | llm
    | StrOutputParser()
)

In [75]:
chain.invoke(input="Who is Nonna Lucia? Did she teach anyone about restaurants or cooking?")



'{ "Nonna Lucia"  :  "She was the matriarch of the Caruso family, a culinary sage who taught her grandchildren and others traditional Sicilian recipes." }'