In [73]:
%pip install --upgrade --quiet langchain langchain-community langchain-openai langchain-experimental neo4j wikipedia tiktoken yfiles_jupyter_graphs sentence-transformers transformers text-generation pypdf2 json-repair ollama chromadb huggingface-hub jsonpickle

Note: you may need to restart the kernel to use updated packages.


In [74]:
from langchain_core.runnables import (
    RunnableBranch,
    RunnableLambda,
    RunnableParallel,
    RunnablePassthrough,
)

from text_generation import Client
from transformers import AutoTokenizer
from langchain_community.llms import Ollama
from langchain_community.chat_models import ChatOllama
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from PyPDF2 import PdfReader
import pypdf

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts.prompt import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from typing import Tuple, List, Optional
from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.output_parsers import StrOutputParser
import os
from langchain_community.graphs import Neo4jGraph
from langchain.document_loaders import WikipediaLoader
from langchain.text_splitter import TokenTextSplitter
from langchain_openai import ChatOpenAI
from langchain_experimental.graph_transformers import LLMGraphTransformer
from neo4j import GraphDatabase
from yfiles_jupyter_graphs import GraphWidget
from langchain_community.vectorstores import Neo4jVector
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores.neo4j_vector import remove_lucene_chars
from langchain_core.runnables import ConfigurableField, RunnableParallel, RunnablePassthrough
import json_repair

try:
  import google.colab
  from google.colab import output
  output.enable_custom_widget_manager()
except:
  pass

# Enhancing RAG-based applications accuracy by constructing and leveraging knowledge graphs
## A practical guide to constructing and retrieving information from knowledge graphs in RAG applications with Neo4j and LangChain

Graph retrieval augmented generation (Graph RAG) is gaining momentum and emerging as a powerful addition to traditional vector search retrieval methods. This approach leverages the structured nature of graph databases, which organize data as nodes and relationships, to enhance the depth and contextuality of retrieved information.

Graphs are great at representing and storing heterogeneous and interconnected information in a structured manner, effortlessly capturing complex relationships and attributes across diverse data types. In contrast, vector databases often struggle with such structured information, as their strength lies in handling unstructured data through high-dimensional vectors. In your RAG application, you can combine structured graph data with vector search through unstructured text to achieve the best of both worlds, which is exactly what we will do in this blog post.

Knowledge graphs are great, but how do you create one? Constructing a knowledge graph is typically the most challenging step in leveraging the power of graph-based data representation. It involves gathering and structuring the data, which requires a deep understanding of both the domain and graph modeling. To simplify this process, we have been experimenting with LLMs. LLMs, with their profound understanding of language and context, can automate significant parts of the knowledge graph creation process. By analyzing text data, these models can identify entities, understand the relationships between them, and suggest how they might be best represented in a graph structure. As a result of these experiments, we have added the first version of the graph construction module to LangChain, which we will demonstrate in this blog post.

## Neo4j Environment Setup

You need to set up a Neo4j instance follow along with the examples in this blog post. The easiest way is to start a free instance on [Neo4j Aura](https://neo4j.com/cloud/platform/aura-graph-database/), which offers cloud instances of Neo4j database. Alternatively, you can also set up a local instance of the Neo4j database by downloading the Neo4j Desktop application and creating a local database instance.

In [75]:
import os
from langchain_community.graphs import Neo4jGraph
os.environ["NEO4J_URI"] = "neo4j+s://9f7422d3.databases.neo4j.io"
os.environ["NEO4J_USERNAME"] = "neo4j"
os.environ["NEO4J_PASSWORD"] = "zzXUn6CYqwe5HANtzTf4bL4P9fj2JCL3WTPtCETYc3k"

graph = Neo4jGraph()

from neo4j import GraphDatabase

driver = GraphDatabase.driver(
    uri = os.environ["NEO4J_URI"],
    auth = (os.environ["NEO4J_USERNAME"],os.environ["NEO4J_PASSWORD"]))
session = driver.session()


Failed to write data to connection ResolvedIPv4Address(('34.69.128.95', 7687)) (ResolvedIPv4Address(('34.69.128.95', 7687)))
Failed to write data to connection IPv4Address(('9f7422d3.databases.neo4j.io', 7687)) (ResolvedIPv4Address(('34.69.128.95', 7687)))


## Data ingestion

For this demonstration, we will use a DACES report. 

In [127]:
from PyPDF2 import PdfReader
from langchain_community.document_loaders import PyPDFLoader
import jsonpickle
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.documents import Document


text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size=300,
    chunk_overlap=20,
    length_function=len,
    is_separator_regex=False,
)


# Load from PDF
file_path = "DACES-Third-Annual-Report_Final.pdf"

# Split pages
loader = PyPDFLoader(file_path=file_path)
pages = loader.load_and_split()
print("pages")
#print(pages)

# Define chunking strategy
# text_splitter = TokenTextSplitter(chunk_size=512, chunk_overlap=24)
print("text split")
print(text_splitter)

text = text_splitter.split_documents(pages[:3])
print("text split and document split")
print(text)

#text = str(text)
documents_json = jsonpickle.encode(text)

pages = str(pages)


docs = [Document(page_content= f"{text}", metadata={"title": f"{file_path}"})]

documents = [docs]

print("")
print("final document")
print(documents[0])

len(text)


pages
text split
<langchain_text_splitters.character.RecursiveCharacterTextSplitter object at 0x7fa042691d90>
text split and document split
[Document(page_content='1   \n \n \n \nDEPARTMENT OF THE ARMY CAREER ENGAGEMENT SURVEY \nTHIRD  ANNUAL REPORT', metadata={'source': 'DACES-Third-Annual-Report_Final.pdf', 'page': 0}), Document(page_content='June 2023  Prepared by:  \nDEPUTY CHIEF OF STAFF, G -1, \nHEADQUARTERS, DEPARTMENT OF THE ARMY', metadata={'source': 'DACES-Third-Annual-Report_Final.pdf', 'page': 0}), Document(page_content='& \n \nPEOPLE ANALYTICS,  \nOFFICE OF THE ASSISTANT SECRETARY OF THE ARMY', metadata={'source': 'DACES-Third-Annual-Report_Final.pdf', 'page': 0}), Document(page_content='(MANPOWER & RESERVE AFFAIRS)', metadata={'source': 'DACES-Third-Annual-Report_Final.pdf', 'page': 0}), Document(page_content='2   \n \n \n \n \n \n  \n \n \n \n \n \n  \n \n \n \nTHIS PAGE WAS INTENTIONALLY LEFT BLANK', metadata={'source': 'DACES-Third-Annual-Report_Final.pdf', 'page': 1})

8

Now it's time to construct a graph based on the retrieved documents. For this purpose, we have implemented an `LLMGraphTransformermodule` that significantly simplifies constructing and storing a knowledge graph in a graph database.

In [135]:
from langchain_community.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")


from langchain_core.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

callbacks = [StreamingStdOutCallbackHandler()]
llm = HuggingFaceEndpoint(
    endpoint_url="http://ice192:6300",
    max_new_tokens=218,
    top_k=10,
    top_p=0.95,
    typical_p=0.95,
    temperature=0.01,
    repetition_penalty=1.03,
    callbacks=callbacks,
    streaming=True,
    huggingfacehub_api_token="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"
)



The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /home/dosagie6/.cache/huggingface/token
Login successful


In [136]:

#Call llama from Icehammer
client = Client("http://ice192:6300", timeout=180)

# Define tokenizer
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-70B-Instruct")

messages = [
        #{"role": "system", "content": },
        {"role": "system", "content": f"You are a helpful and perfect research expert who extracts information from documents. You have {llm.max_new_tokens} max tokens so use that amount to complete your task. You must complete the task, do not interupt and start over. You are extracting organization and person entities and relationships from the text. DO NOT DUPLICATE ANY RESPONSES UNLESS THEY MAKE SENSE. Use the given format to extract information from the following input: {documents[0]}."},
    ]

prompt = tokenizer.apply_chat_template(messages, tokenize = False, add_generation_prompt=True)


response = client.generate(
    prompt,
    stop_sequences=["<|eot_id|>"],
    temperature=1,
    do_sample=True,
    return_full_text = False,
)

print("-----markers----")

#-------------------------------
#from langchain_community.chat_models import ChatOllama
#llm = ChatOllama(temperature=0,model="llama3:70b-instruct")  # Trying to use client with existing code further down
#llm=ChatOpenAI(temperature=0, model_name="llama3:70b-instruct")

#doc = Document(page_content="Elon Musk is suing OpenAI")
#graph_documents = transformer.convert_to_graph_documents([doc])

llm_transformer = LLMGraphTransformer(
    llm=llm,
    
)

# Extract graph data
graph_documents = llm_transformer.convert_to_graph_documents(documents[0])

graph.add_graph_documents(
    graph_documents,
    baseEntityLabel=True,
    include_source=True
)



Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


-----markers----
<|eot_id|><|start_header_id|>assistant<|end_header_id|>

[
    {"head": "DEPARTMENT OF THE ARMY", "head_type": "Organization", "relation": "PUBLISHED", "tail": "CAREER ENGAGEMENT SURVEY", "tail_type": "Report"},
    {"head": "DEPUTY CHIEF OF STAFF, G -1", "head_type": "Person", "relation": "WORKS_FOR", "tail": "HEADQUARTERS, DEPARTMENT OF THE ARMY", "tail_type": "Organization"},
    {"head": "PEOPLE ANALYTICS", "head_type": "Organization", "relation": "PART_OF", "tail": "OFFICE OF THE ASSISTANT SECRETARY OF THE ARMY", "tail_type": "Organization"},
    {"head": "OFFICE OF THE ASSISTANT SECRETARY OF THE ARMY", "head_type": "Organization", "relation": "PART_OF", "tail": "MANPOWER & RESERVE AFFAIRS", "tail_type": "Organization"},
    {"head": "

AttributeError: 'str' object has no attribute 'content'

You can define which LLM you want the knowledge graph generation chain to use. At the moment, we support only function calling models from OpenAI and Mistral. However, we plan to expand the LLM selection in the future. In this example, we are using the latest GPT-4. Note that the quality of generated graph significantly depends on the model you are using. In theory, you always want to use the most capable one. The LLM graph transformers returns graph documents, which can be imported to Neo4j via the `add_graph_documents` method. The `baseEntityLabel` parameter assigns an additional `__Entity__` label to each node, enhancing indexing and query performance. The `include_source` parameter links nodes to their originating documents, facilitating data traceability and context understanding.

You can inspect the generated graph with yfiles visualization.

In [None]:
# directly show the graph resulting from the given Cypher query
default_cypher = "MATCH (s)-[r:!MENTIONS]->(t) RETURN s,r,t LIMIT 50"

def showGraph(cypher: str = default_cypher):
    # create a neo4j session to run queries
    driver = GraphDatabase.driver(
        uri = os.environ["NEO4J_URI"],
        auth = (os.environ["NEO4J_USERNAME"],
                os.environ["NEO4J_PASSWORD"]))
    session = driver.session()
    widget = GraphWidget(graph = session.run(cypher).graph())
    widget.node_label_mapping = 'id'
    #display(widget)
    return widget

showGraph()

## Hybrid Retrieval for RAG
After the graph generation, we will use a hybrid retrieval approach that combines vector and keyword indexes with graph retrieval for RAG applications.

![retrieval](https://raw.githubusercontent.com/tomasonjo/blogs/master/graphhybrid.png)

The diagram illustrates a retrieval process beginning with a user posing a question, which is then directed to an RAG retriever. This retriever employs keyword and vector searches to search through unstructured text data and combines it with the information it collects from the knowledge graph. Since Neo4j features both keyword and vector indexes, you can implement all three retrieval options with a single database system. The collected data from these sources is fed into an LLM to generate and deliver the final answer.
## Unstructured data retriever
You can use the Neo4jVector.from_existing_graph method to add both keyword and vector retrieval to documents. This method configures keyword and vector search indexes for a hybrid search approach, targeting nodes labeled Document. Additionally, it calculates text embedding values if they are missing.



In [None]:
from langchain_community.vectorstores import Neo4jVector

vector_index = Neo4jVector.from_existing_graph(
    OllamaEmbeddings(),
    search_type="hybrid",
    node_label="Document",
    text_node_properties=["text"],
    embedding_node_property="embedding"
)

The vector index can then be called with the similarity_search method.
## Graph retriever
On the other hand, configuring a graph retrieval is more involved but offers more freedom. In this example, we will use a full-text index to identify relevant nodes and then return their direct neighborhood.

![graph](https://raw.githubusercontent.com/tomasonjo/blogs/master/neighbor.png)

The graph retriever starts by identifying relevant entities in the input. For simplicity, we instruct the LLM to identify people, organizations, and locations. To achieve this, we will use LCEL with the newly added `with_structured_output` method to achieve this.

In [None]:
# Retriever

graph.query(
    "CREATE FULLTEXT INDEX entity IF NOT EXISTS FOR (e:__Entity__) ON EACH [e.id]")

# Extract entities from text
class Entities(BaseModel):
    """Identifying information about entities."""

    names: List[str] = Field(
        ...,
        description="All the person, organization, or business entities that "
        "appear in the text",
    )
    
    
"""
prompt = ChatPromptTemplate.from_messages(
    [
        {"role": "system", "content": "You are extracting organization and person entities from the text."},
        {"role": "user", "content": "Use the given format to extract information from the following input: {question}."},
    ]
)
"""


entity_chain = prompt | llm.with_structured_output(Entities)

Let's test it out:

In [None]:
entity_chain.invoke({"question": "What branch(es) of the military is facing military unemployment?"}).names

Great, now that we can detect entities in the question, let's use a full-text index to map them to the knowledge graph. First, we need to define a full-text index and a function that will generate full-text queries that allow a bit of misspelling, which we won't go into much detail here.

In [None]:
def generate_full_text_query(input: str) -> str:
    """
    Generate a full-text search query for a given input string.

    This function constructs a query string suitable for a full-text search.
    It processes the input string by splitting it into words and appending a
    similarity threshold (~2 changed characters) to each word, then combines
    them using the AND operator. Useful for mapping entities from user questions
    to database values, and allows for some misspelings.
    """
    full_text_query = ""
    words = [el for el in remove_lucene_chars(input).split() if el]
    for word in words[:-1]:
        full_text_query += f" {word}~2 AND"
    full_text_query += f" {words[-1]}~2"
    return full_text_query.strip()

# Fulltext index query
def structured_retriever(question: str) -> str:
    """
    Collects the neighborhood of entities mentioned
    in the question
    """
    result = ""
    entities = entity_chain.invoke({"question": question})
    for entity in entities.names:
        response = graph.query(
            """CALL db.index.fulltext.queryNodes('entity', $query, {limit:2})
            YIELD node,score
            CALL {
              WITH node
              MATCH (node)-[r:!MENTIONS]->(neighbor)
              RETURN node.id + ' - ' + type(r) + ' -> ' + neighbor.id AS output
              UNION ALL
              WITH node
              MATCH (node)<-[r:!MENTIONS]-(neighbor)
              RETURN neighbor.id + ' - ' + type(r) + ' -> ' +  node.id AS output
            }
            RETURN output LIMIT 50
            """,
            {"query": generate_full_text_query(entity)},
        )
        result += "\n".join([el['output'] for el in response])
    return result

The `structured_retriever` function starts by detecting entities in the user question. Next, it iterates over the detected entities and uses a Cypher template to retrieve the neighborhood of relevant nodes. Let's test it out!

In [None]:
print(structured_retriever("What is DACES?"))

## Final retriever
As we mentioned at the start, we'll combine the unstructured and graph retriever to create the final context that will be passed to an LLM.

In [None]:
def retriever(question: str):
    print(f"Search query: {question}")
    structured_data = structured_retriever(question)
    unstructured_data = [el.page_content for el in vector_index.similarity_search(question)]
    final_data = f"""Structured data:
{structured_data}
Unstructured data:
{"#Document ". join(unstructured_data)}
    """
    return final_data

As we are dealing with Python, we can simply concatenate the outputs using the f-string.
## Defining the RAG chain
We have successfully implemented the retrieval component of the RAG. First, we will introduce the query rewriting part that allows conversational follow up questions.


In [None]:
# Condense a chat history and follow-up question into a standalone question
_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question,
in its original language.
Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""  # noqa: E501
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

def _format_chat_history(chat_history: List[Tuple[str, str]]) -> List:
    buffer = []
    for human, ai in chat_history:
        buffer.append(HumanMessage(content=human))
        buffer.append(AIMessage(content=ai))
    return buffer

_search_query = RunnableBranch(
    # If input includes chat_history, we condense it with the follow-up question
    (
        RunnableLambda(lambda x: bool(x.get("chat_history"))).with_config(
            run_name="HasChatHistoryCheck"
        ),  # Condense follow-up question and chat into a standalone_question
        RunnablePassthrough.assign(
            chat_history=lambda x: _format_chat_history(x["chat_history"])
        )
        | CONDENSE_QUESTION_PROMPT
        | ChatOpenAI(temperature=0)
        | StrOutputParser(),
    ),
    # Else, we have no chat history, so just pass through the question
    RunnableLambda(lambda x : x["question"]),
)

Next, we introduce a prompt that leverages the context provided by the integrated hybrid retriever to produce the response, completing the implementation of the RAG chain.

In [None]:
template = """Answer the question based only on the following context:
{context}

Question: {question}
Use natural language and be concise.
Answer:"""

prompt = tokenizer.apply_chat_template(template, tokenize=False, add_generation_prompt=True)

chain = (
    RunnableParallel(
        {
            "context": _search_query | retriever,
            "question": RunnablePassthrough(),
        }
    )
    | prompt
    | llm
    | StrOutputParser()
)

Finally, we can go ahead and test our hybrid RAG implementation.

In [None]:
response = client.generate(prompt, stop_sequences=["<|eot_id|>"], max_new_tokens=256, do_sample=True)

chain.invoke({"question": "What are the main reasons military spouses are unhappy?"})

Let's test a follow up question!

In [None]:
chain.invoke(
    {
        "question": "What are the amounts of military spouses in this test and how would you break them up into segments?",
        #"chat_history": [("What are the main reasons military spouses are unhappy?", "House Of Tudor")],
    }
)