In [1]:
%pip install --quiet langchain langchain-community langchain-ollama langchain-experimental neo4j pandas matplotlib tiktoken networkx py2neo yfiles_jupyter_graphs python-dotenv

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.0.1 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [12]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
import os
from langchain_core.output_parsers import StrOutputParser
from langchain_community.graphs import Neo4jGraph
from langchain_community.chat_models import ChatOllama
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain.text_splitter import RecursiveCharacterTextSplitter
from neo4j import GraphDatabase
from yfiles_jupyter_graphs import GraphWidget
from langchain_community.vectorstores import Neo4jVector
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores.neo4j_vector import remove_lucene_chars
from langchain_community.document_loaders import PyPDFLoader

from dotenv import load_dotenv

load_dotenv()

True

In [27]:
url = os.getenv("NEO4J_URI")
user = os.getenv("NEO4J_USERNAME")
password = os.getenv("NEO4J_PASSWORD")

graph = Neo4jGraph(
    url=url,
    password=password
)

In [62]:
loader = TextLoader("dummytext.txt")
doc = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
docs = text_splitter.split_documents(documents=doc)


In [63]:
def ShowGraph():
    driver = GraphDatabase.driver(url, auth=(user, password))
    session = driver.session()
    widget = GraphWidget(graph = session.run("MATCH (s)-[r:!MENTIONS]->(t) RETURN s,r,t").graph())
    widget.node_label_mapping = 'id'
    return widget

In [298]:
from langchain_experimental.llms.ollama_functions import OllamaFunctions
llm = OllamaFunctions(model="llama3.1", temperature=0, format="json")

llm_graph_transformer = LLMGraphTransformer(
    llm=llm
)
graph_documents = llm_graph_transformer.convert_to_graph_documents(docs)

In [None]:
### Fixing the types of the nodes and relationships (handling null values)
for g in graph_documents:
    for reln in g.relationships:
        if reln.target.type == '':
            reln.target.type = 'MENTIONS'
            print(reln)
    for node in g.nodes:
        if node.type == '':
            node.type = 'ENTITY'
            print(node)

In [300]:
graph_documents[1]

GraphDocument(nodes=[Node(id='Giovanni Caruso', type='Person', properties={}), Node(id='Maria', type='Person', properties={}), Node(id='Antonio Caruso', type='Person', properties={})], relationships=[Relationship(source=Node(id='Giovanni Caruso', type='Person', properties={}), target=Node(id='Maria', type='Person', properties={}), type='SPOUSE', properties={}), Relationship(source=Node(id='Giovanni Caruso', type='Person', properties={}), target=Node(id='Antonio Caruso', type='Person', properties={}), type='PARENT', properties={})], source=Document(metadata={'source': 'dummytext.txt', 'id': 'f02d79f401c2ab5918868e8e9414ff13'}, page_content="Giovanni Caruso and Maria: The Founding Generation\n\nGiovanni Caruso, Amico's great-grandfather, was a man of the earth. His calloused hands spoke of years spent cultivating the fertile soils of Santa Caterina, producing olives and grapes that were the pride of the region. Giovanni was not just a farmer but an alchemist of flavors, blending the frui

In [301]:
graph.add_graph_documents(graph_documents,
                          baseEntityLabel=True,
                          include_source=True
                          )

In [302]:
ShowGraph()

GraphWidget(layout=Layout(height='800px', width='100%'))

In [331]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

vector_index = Neo4jVector.from_existing_graph(
    embeddings,
    search_type="hybrid",
    node_label="Document",
    text_node_properties=["text"],
    embedding_node_property="embedding"
)
vector_retriever = vector_index.as_retriever()

In [309]:
from typing import List
from pydantic import BaseModel, Field

class Entities(BaseModel):
    """Identifying information about entities."""

    names: List[str] = Field(
        ...,
        description="All the person, organization, or business entities that "
        "appear in the text",
    )

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are extracting organization and person entities from the text.",
        ),
        (
            "human",
            "Use the given format to extract information from the following "
            "input: {question}",
        ),
    ]
)

entity_chain = llm.with_structured_output(Entities)


In [314]:
import json

def get_entities(result):
    print(result)
    raw_content = result['raw'].content
    # Parse the JSON content
    parsed_content = json.loads(raw_content)
    # Extract the names list
    names_list = []
    if 'parameters' in parsed_content and 'names' in parsed_content['parameters']:
        names = parsed_content['parameters']['names']
        if isinstance(names, str):
            names_list = json.loads(names)
        elif isinstance(names, list):
            names_list = names
    return names_list

entity_chain = prompt | llm.with_structured_output(Entities)
result = entity_chain.invoke({"question": "who is Nana Lucia?"}).names

In [315]:
driver = GraphDatabase.driver(
        uri = os.environ["NEO4J_URI"],
        auth = (os.environ["NEO4J_USERNAME"],
                os.environ["NEO4J_PASSWORD"]))
def create_fulltext_index(tx):
    query = '''
    CREATE FULLTEXT INDEX `fulltext_entity_id` 
    FOR (n:__Entity__) 
    ON EACH [n.id];
    '''
    tx.run(query)

# Function to execute the query
def create_index():
    with driver.session() as session:
        session.execute_write(create_fulltext_index)
        print("Fulltext index created successfully.")

# Call the function to create the index
try:
    create_index()
except:
    pass

# Close the driver connection
driver.close()

In [326]:

def generate_full_text_query(input: str) -> str:
    words = [el for el in remove_lucene_chars(input).split() if el]
    if not words:
        return ""
    full_text_query = " AND ".join([f"{word}~2" for word in words])
    print(f"Generated Query: {full_text_query}")
    return full_text_query.strip()

def graph_retriever(question: str) -> str:
    entities = entity_chain.invoke({"question": question}).names
    print(entities)
    final_results = []

    for entity in entities:
        query = """
        CALL db.index.fulltext.queryNodes("fulltext_entity_id", $query) YIELD node, score
        CALL (node, score) {
            WITH node
            MATCH (node)-[r:!MENTIONS]->(neighbour)
            RETURN node.id + ' - ' + type(r) + ' -> ' + neighbour.id AS output
            UNION ALL
            WITH node
            MATCH (node)<-[r:!MENTIONS]-(neighbour)
            RETURN neighbour.id + ' <- ' + type(r) + ' - ' + node.id AS output
        }
        RETURN output LIMIT 50;
        """
        response = graph.query(query, {"query": (entity)})
        final_results.extend(record['output'] for record in response)

    return "\n".join(final_results)

In [327]:
print(graph_retriever("Who is Nonna Lucia?"))

['Nonna Lucia']
Nonna Lucia - MOTHER_OF -> Amico
Nonna Lucia - GRANDMOTHER_OF -> Antonio
Nonna Lucia - CUSTODIAN_OF -> family's recipes
Nonna Lucia - TEACHER_OF -> Amico
Nonna Lucia - INFLUENCED_BY -> family's love and wisdom
Nonna Lucia - TEACHES_ABOUT -> life, love, community
Nonna Lucia - SISTER -> Antonio
Amico <- GRANDMOTHER - Nonna Lucia
Lucia Caruso - OWNED_BY -> Bella Vita
Lucia Caruso - INHERITED_TALENT -> her grandmother
Lucia Caruso - COMMITTED_TO_SUSTAINABILITY -> sustainability
Lucia Caruso - FOCUSED_ON -> sustainable cooking practices
Lucia Caruso - FROM -> Sicily
Bella Vita <- OWNS - Lucia Caruso
Bella Vita - Los Angeles <- OWNED_BY - Lucia Caruso
Caruso Family <- MEMBER - Lucia Caruso


In [336]:
def full_retriever(question: str):
    graph_data = graph_retriever(question)
    print(graph_data)
    vector_data = [el.page_content for el in vector_retriever.invoke(question)]
    final_data = f"""Graph data:
{graph_data}
vector data:
{"#Document ". join(vector_data)}
    """
    return final_data

In [337]:
template = """ 
    Answer the question based on only the following context provided:
    {context}
    
    Question: {question}
    Use natural language to answer the question.
    Answer:
"""

prompt = ChatPromptTemplate.from_template(
    template=template
)

chain = (
        {
            "context": full_retriever,
            "question": RunnablePassthrough(),
        }
    | prompt
    | llm
    | StrOutputParser()
)

In [338]:
chain.invoke(input="What are the topics covered in the paper Attention is All You Need?")

['Attention is All You Need']
Food for All - TARGETS_GROUP -> homeless and low-income families
Food for All - INCLUDED_ACTIVITY -> food drives
Carusos <- LAUNCHED_INITIATIVE - Food for All
Food For All - SET_UP -> Community Kitchens
Carusos <- LAUNCHED - Food For All
self-attention - USED_IN -> Transformer
Jakob <- PROPOSED - self-attention
Transformer <- IS_FIRST_MODEL - self-attention
models such as [17, 18] and [9] <- COMPARED_WITH - self-attention
attention mechanisms - ALLOWED -> modeling of dependencies without regard to their distance in the input or output sequences
recurrent network <- USED_WITH - attention mechanisms
Transformer <- USES_ATTENTION_MECHANISM - attention mechanism
Self-attention - USED_IN -> reading comprehension, abstractive summarization, textual entailment and learning task-independent sentence representations
Transformer <- USES - Multi-Head Attention
End-to-end memory networks <- BASED_ON - recurrent attention mechanism
multi-head attention - IMPLEMENTED_IN



'The paper Attention Is All You Need covers various topics, including sequence transduction models, attention mechanisms, and a new simple network architecture called the Transformer. It discusses how traditional recurrent or convolutional neural networks can be replaced with attention-based models, which have shown to perform well in tasks such as reading comprehension, abstractive summarization, and language modeling.'