In [2]:
### Neo4j
from neo4j import GraphDatabase
from neo4j import  Driver
### Langchain
from langchain_neo4j import Neo4jGraph
from langchain_neo4j import Neo4jVector
from langchain_core.runnables import  RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores.neo4j_vector import remove_lucene_chars
### ChatModels (https://python.langchain.com/docs/integrations/chat/)
from langchain_ollama import ChatOllama
from langchain_openai import ChatOpenAI
from langchain_anthropic import ChatAnthropic
### Embeddings
from langchain_ollama import OllamaEmbeddings
from langchain_experimental.llms.ollama_functions import OllamaFunctions
from langchain_experimental.graph_transformers import LLMGraphTransformer

from yfiles_jupyter_graphs import GraphWidget
from pydantic import BaseModel, Field

import os
from dotenv import load_dotenv

load_dotenv()

True

In [3]:
graph = Neo4jGraph()

In [4]:
graph

<langchain_neo4j.graphs.neo4j_graph.Neo4jGraph at 0x1e4d837f3d0>

In [5]:
loader = TextLoader(file_path="dummytext.txt")
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=24)
documents = text_splitter.split_documents(documents=docs)

In [6]:
llm_type = os.getenv("LLM_TYPE", "ollama")
llm_type 

'ollama'

In [7]:
if llm_type == "ollama":
    llm = ChatOllama(
        model="llama3.2", temperature=0)
elif llm_type == "openai":
    llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
else:
    llm = ChatAnthropic(model="claude-3-5-sonnet-20240620", temperature=0)

llm_transformer = LLMGraphTransformer(llm=llm)
graph_documents = llm_transformer.convert_to_graph_documents(documents)

In [8]:
graph_documents[13]

GraphDocument(nodes=[Node(id='Pietro', properties={}), Node(id='Antonio', properties={}), Node(id='Sofia', properties={}), Node(id='Kitchen', properties={}), Node(id='Sea', properties={}), Node(id='Village', properties={}), Node(id='Family Trattoria', properties={})], relationships=[], source=Document(metadata={'source': 'dummytext.txt'}, page_content="Pietro, Antonio's eldest son, was a skilled fisherman who loved the sea as much as the kitchen. His daily catches were the freshest seafood in the village, a staple in the family trattoria he ran with his wife, Sofia. Sofia was a baker par"))

In [9]:
len(graph_documents)

73

In [10]:
graph.add_graph_documents(
    graph_documents,
    baseEntityLabel=True,
    include_source=True
)

In [11]:
def displayGraph():
    driver = GraphDatabase.driver(
        uri = os.environ["NEO4J_URI"],
        auth = (os.environ["NEO4J_USERNAME"],
                os.environ["NEO4J_PASSWORD"]))

    session = driver.session()
    widget = GraphWidget(graph=session.run("MATCH (s)-[r:!MENTIONS]->(t) RETURN s,r,t").graph())
    widget.node_label_mapping = 'id'
    return widget

# displayGraph()

In [12]:
displayGraph()

GraphWidget(layout=Layout(height='500px', width='100%'))

### Embedding Model

- https://medium.com/timescale/finding-the-best-open-source-embedding-model-for-rag-929d1656d331

In [12]:
EMBEDDING_MODELS = [
    {'name':'mxbai-embed-large', 'dimensions': 1024},
    {'name':'nomic-embed-text','dimensions': 768},
    {'name':'bge-m3','dimensions': 1024},
]
 
for model in EMBEDDING_MODELS:
    print(model['name'], model['dimensions'])

mxbai-embed-large 1024
nomic-embed-text 768
bge-m3 1024


In [13]:
embeddings = OllamaEmbeddings(
    model="mxbai-embed-large",
)

vector_index = Neo4jVector.from_existing_graph(
    embeddings,
    search_type="hybrid",
    node_label="Document",
    text_node_properties=["text"],
    embedding_node_property="embedding"
)
vector_retriever = vector_index.as_retriever()

In [14]:
vector_retriever

VectorStoreRetriever(tags=['Neo4jVector', 'OllamaEmbeddings'], vectorstore=<langchain_neo4j.vectorstores.neo4j_vector.Neo4jVector object at 0x000002BC91B88D30>, search_kwargs={})

In [17]:
driver = GraphDatabase.driver(
        uri = os.environ["NEO4J_URI"],
        auth = (os.environ["NEO4J_USERNAME"],
                os.environ["NEO4J_PASSWORD"]))

def create_fulltext_index(tx):
    query = '''
    CREATE FULLTEXT INDEX `fulltext_entity_id` 
    FOR (n:__Entity__) 
    ON EACH [n.id];
    '''
    tx.run(query)

# Function to execute the query
def create_index():
    with driver.session() as session:
        session.execute_write(create_fulltext_index)
        print("Fulltext is indexed successfully.")

# Call the function to create the index
try:
    create_index()
except:
    pass

# Close the driver connection
driver.close()

In [18]:

class Entities(BaseModel):
    """Identifying information about entities."""

    names: list[str] = Field(
        ...,
        description="All the person, organization, or business entities that "
        "appear in the text",
    )

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are extracting organization and person entities from the text.",
        ),
        (
            "human",
            "Use the given format to extract information from the following "
            "input: {question}",
        ),
    ]
)


entity_chain = llm.with_structured_output(Entities)

In [19]:
entity_chain

RunnableBinding(bound=ChatOllama(model='llama3.2', temperature=0.0), kwargs={'tools': [{'type': 'function', 'function': {'name': 'Entities', 'description': 'Identifying information about entities.', 'parameters': {'properties': {'names': {'description': 'All the person, organization, or business entities that appear in the text', 'items': {'type': 'string'}, 'type': 'array'}}, 'required': ['names'], 'type': 'object'}}}], 'structured_output_format': {'kwargs': {'method': 'function_calling'}, 'schema': {'type': 'function', 'function': {'name': 'Entities', 'description': 'Identifying information about entities.', 'parameters': {'properties': {'names': {'description': 'All the person, organization, or business entities that appear in the text', 'items': {'type': 'string'}, 'type': 'array'}}, 'required': ['names'], 'type': 'object'}}}}}, config={}, config_factories=[])
| PydanticToolsParser(first_tool_only=True, tools=[<class '__main__.Entities'>])

In [20]:
entity_chain.invoke("Who are Nonna Lucia and Giovanni Caruso?")

Entities(names=['Nonna Lucia', 'Giovanni Caruso'])

In [21]:
def generate_full_text_query(input: str) -> str:
    words = [el for el in remove_lucene_chars(input).split() if el]
    if not words:
        return ""
    full_text_query = " AND ".join([f"{word}~2" for word in words])
    print(f"Generated Query: {full_text_query}")
    return full_text_query.strip()


# Fulltext index query
def graph_retriever(question: str) -> str:
    """
    Collects the neighborhood of entities mentioned
    in the question
    """
    result = ""
    entities = entity_chain.invoke(question)
    for entity in entities.names:
        response = graph.query(
            """CALL db.index.fulltext.queryNodes('fulltext_entity_id', $query, {limit:2})
            YIELD node,score
            CALL {
              WITH node
              MATCH (node)-[r:!MENTIONS]->(neighbor)
              RETURN node.id + ' - ' + type(r) + ' -> ' + neighbor.id AS output
              UNION ALL
              WITH node
              MATCH (node)<-[r:!MENTIONS]-(neighbor)
              RETURN neighbor.id + ' - ' + type(r) + ' -> ' +  node.id AS output
            }
            RETURN output LIMIT 50
            """,
            {"query": entity},
        )
        result += "\n".join([el['output'] for el in response])
    return result

In [22]:
print(graph_retriever("Who is Nonna Lucia?"))




In [23]:
graph_retriever("Who is Giovanni?")

''

In [24]:
def full_retriever(question: str):
    graph_data = graph_retriever(question)
    vector_data = [el.page_content for el in vector_retriever.invoke(question)]
    final_data = f"""Graph data:
{graph_data}
vector data:
{"#Document ". join(vector_data)}
    """
    return final_data

## Q&A

In [25]:
template = """Answer the question based only on the following context:
{context}

Question: {question}
Use natural language and be concise.
Answer:"""
prompt = ChatPromptTemplate.from_template(template)

chain = (
        {
            "context": full_retriever,
            "question": RunnablePassthrough(),
        }
    | prompt
    | llm
    | StrOutputParser()
)

In [26]:
chain.invoke(input="Who is Nonna Lucia? Did she teach anyone about restaurants or cooking?")

"Nonna Lucia was Amico's grandmother, a culinary sage who taught her grandchildren the art of Sicilian cooking. She also played a role in shaping Lucia's culinary talent and influence. There is no mention of Nonna Lucia teaching anyone about restaurants."

In [27]:
chain.invoke(input="What is La Dolce Vita?")

'La Dolce Vita was a restaurant owned by Antonio Caruso, located in Rome, known for its live classical music and blend of traditional and modern cuisine.'

In [35]:
chain.invoke(input="What is LA Tera Di Siena?")

'LA Terra di Siena appears to be the name of a restaurant, specifically owned by Sofia Caruso.'

In [None]:
chain.invoke(input="Who is Sofia Caruso?")

"Sofia Caruso is Antonio's wife and the co-owner of a trattoria with her husband Pietro. She is also a baker."

In [37]:
chain.invoke(input="Describe the relationship of the Sofia, Pietro, Antonio and Geovanni?")

"The relationships between Sofia, Pietro, Antonio, and Giovanni are as follows:\n\n- Sofia is Pietro's wife and a baker.\n- Pietro is Antonio's eldest son and a skilled fisherman.\n- Antonio is the father of both Pietro and Giovanni, and he hosts workshops in Rome to share his innovative techniques.\n\nThere is no direct information about the relationship between Sofia and Giovanni."