# Module 6 - GraphRAG and Agent

In [1]:
#!pip install graphdatascience neo4j dotenv openai langchain, langgraph, pydantic, gradio

Import our usual suspects (and some more...)

In [43]:
import os
import pandas as pd
from dotenv import load_dotenv
from graphdatascience import GraphDataScience
from neo4j import Query, GraphDatabase, RoutingControl, Result
from langchain.schema import HumanMessage
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_core.messages import HumanMessage
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langgraph.prebuilt import create_react_agent
from openai import OpenAI
from typing import List, Optional
from pydantic import BaseModel, Field, validator
import functools
from langchain_core.tools import tool
import gradio as gr
import time
from json import loads, dumps

## Setup

Load env variables

In [5]:
env_file = 'credentials.env'

In [6]:
if os.path.exists(env_file):
    load_dotenv(env_file, override=True)

    # Neo4j
    HOST = os.getenv('NEO4J_URI')
    USERNAME = os.getenv('NEO4J_USERNAME')
    PASSWORD = os.getenv('NEO4J_PASSWORD')
    DATABASE = os.getenv('NEO4J_DATABASE')

    # AI
    OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
    os.environ['OPENAI_API_KEY']=OPENAI_API_KEY
    LLM = os.getenv('LLM')
    EMBEDDINGS_MODEL = os.getenv('EMBEDDINGS_MODEL')
else:
    print(f"File {env_file} not found.")

Connect to neo4j db

In [7]:
driver = GraphDatabase.driver(
    HOST,
    auth=(USERNAME, PASSWORD)
)

Test the connection

In [8]:
driver.execute_query(
    """
    MATCH (n) RETURN COUNT(n) as Count
    """,
    database_=DATABASE,
    routing_=RoutingControl.READ,
    result_transformer_= lambda r: r.to_df()
)

Unnamed: 0,Count
0,1494


Test whether we got our constraints

In [9]:
schema_result_df  = driver.execute_query(
    'show indexes',
    database_=DATABASE,
    routing_=RoutingControl.READ,
    result_transformer_= lambda r: r.to_df()
)

In [10]:
schema_result_df.head(100)

Unnamed: 0,id,name,state,populationPercent,type,entityType,labelsOrTypes,properties,indexProvider,owningConstraint,lastRead,readCount
0,6,chunk-embeddings,ONLINE,100.0,VECTOR,NODE,[Chunk],[embedding],vector-2.0,,2025-05-14T15:09:53.932000000+00:00,117
1,0,index_343aff4e,ONLINE,100.0,LOOKUP,NODE,,,token-lookup-1.0,,2025-05-14T15:13:17.298000000+00:00,3826
2,1,index_f7700477,ONLINE,100.0,LOOKUP,RELATIONSHIP,,,token-lookup-1.0,,2025-05-14T15:13:17.931000000+00:00,116
3,4,unique_chunk,ONLINE,100.0,RANGE,NODE,[Chunk],[id],range-1.0,unique_chunk,2025-05-14T15:09:53.955000000+00:00,11055
4,2,unique_document,ONLINE,100.0,RANGE,NODE,[Document],[id],range-1.0,unique_document,2025-05-13T12:31:28.842000000+00:00,2742


## Agents with GraphRAG

### Lets create a Retrieval agent

In [46]:
# class Skill(BaseModel):
#     """
#     Represents a professional skill or knowledge of a person.
#     """
#     name: str = Field(..., description="Sortened name of the skill")

In [47]:
client = OpenAI()

In [48]:
llm = ChatOpenAI(model_name=LLM, temperature=0)

In [50]:
llm.model_name

'gpt-4o'

In [36]:
embedding_model = OpenAIEmbeddings(
    model=EMBEDDINGS_MODEL,
    openai_api_key=OPENAI_API_KEY
)

In [37]:
embedding_model.model

'text-embedding-ada-002'

### Tool 1

In [20]:
def retrieve_products() -> pd.DataFrame:
    """Retrieve the products in the database. Products are specified with name. """
    return driver.execute_query(
        """
        MATCH (p:ProductType)
        RETURN p.name as name
        """,
        database_=DATABASE,
        routing_=RoutingControl.READ,
        result_transformer_= lambda r: r.to_df(),
    )['name'].tolist()

In [21]:
retrieve_products()

['SpaarRekening',
 'DirectRekening',
 'Kortlopende Reis',
 'BeleggersRekening',
 'RaboBusiness Banking']

### Tool 2

In [22]:
map_products_prompt = """
As an intelligent assistant, your primary objective is to map a product name to product names in the database.

Examples:
#####
Product: savings account. 
Database Products: ['SpaarRekening', 'DirectRekening', 'Kortlopende Reis', 'BeleggersRekening', 'RaboBusiness Banking']
Assistant: Product: SpaarRekening
#####
#####
Product: Direct Rekening. 
Database Products: ['SpaarRekening', 'DirectRekening', 'Kortlopende Reis', 'BeleggersRekening', 'RaboBusiness Banking']Assistant: Customer: Jan Blok
Assistant: Product: DirectRekening

#####
#####
Product: Reis verzekering. 
Database Products: ['SpaarRekening', 'DirectRekening', 'Kortlopende Reis', 'BeleggersRekening', 'RaboBusiness Banking']Assistant: Customer: Jan Blok
Assistant: Product: Kortlopende Reis
#####
"""

def map_product_to_database_products(product) -> str:
    """Map products from the user question to the actual products in the database."""

    response = client.beta.chat.completions.parse(
        model=LLM,
        temperature=0,
        messages=[
            {"role": "system", "content": map_products_prompt},
            {"role": "user", "content": "Product: " + product},
            {"role": "user", "content": "Database Products: " + str(retrieve_products())},
            
        ],
#        response_format=DefinitionList,
    )
    return response.choices[0].message.content 

In [23]:
map_product_to_database_products('savings account')

'Product: SpaarRekening'

### Tool 3

In [24]:
def retrieve_document_from_product(product_name) -> pd.DataFrame:
    """Retrieve the documents of products in the database. Products are specified with their name. """
    return driver.execute_query(
        """
        MATCH (p:ProductType)<-[:RELATED_TO]-(d:Document)
        WHERE LOWER(p.name) = LOWER($product_name)
        RETURN d.file_name
        """,
        database_=DATABASE,
        routing_=RoutingControl.READ,
        product_name = product_name,
        result_transformer_= lambda r: r.to_df(),
    ).iloc[0]['d.file_name']

In [25]:
retrieve_document_from_product('SpaarRekening')

'Rabo SpaarRekening 2020.pdf'

### Tool 4

In [38]:
def get_context_graphrag(document, search_prompt):

    query_vector = embedding_model.embed_query(search_prompt)
    
    similarity_query = """ 
        CALL db.index.vector.queryNodes("chunk-embeddings", 30, $query_vector) YIELD node, score
        WITH node as chunk, score ORDER BY score DESC
        MATCH (d:Document {file_name: $document})<-[:PART_OF]-(chunk)
        WITH score, d, chunk LIMIT 5
        RETURN score, d.file_name as file_name, chunk.id as chunk_id, chunk.page as page, chunk.chunk_eng AS chunk
       """
    results = driver.execute_query(
        similarity_query,
        database_=DATABASE,
        routing_=RoutingControl.READ,
        document = document,
        query_vector=query_vector,
        result_transformer_= lambda r: r.to_df()
    )

    chunk_ids = list(set(results['chunk_id'].to_list()))

    results = results.to_json(orient="records")
    parsed = loads(results)
    context = dumps(parsed, indent=4)

    definition_query = """    
        MATCH (c:Chunk)-[:MENTIONS]->(d:Definition)
        WHERE c.id in $chunk_ids
        RETURN DISTINCT d.term as term, d.description as description
    """
    results = driver.execute_query(
        definition_query,
        database_=DATABASE,
        routing_=RoutingControl.READ,
        chunk_ids=chunk_ids,
        result_transformer_= lambda r: r.to_df()
    )
    results = results.to_json(orient="records")
    parsed = loads(results)
    definitions = dumps(parsed, indent=4)
    return context, definitions

In [39]:
def generate_prompt_graphrag(search_prompt, context, definitions):
    prompt_template = """

    You are a chatbot on Rabobank product. Your goal is to help people with questions on product policies.  
    A user will come to you with questions on their policy. Their questions must be answered based on the relevant documents of the policy.
    Respond in English. 

    The question is the following: 
    {search_prompt}
    
    Always respond in the language in which the question was asked. So, do not respond in a different language.
    
    The context is the following: 
    {context}

    The definitions are the following: 
    {definitions}
    
    Please end your message with listing your sources with file name and page number. 
    """
    prompt = PromptTemplate.from_template(prompt_template)
    
    theprompt = prompt.format_prompt(search_prompt=search_prompt, context=context, definitions=definitions)
    return theprompt

In [40]:
def perform_search_in_document(document, search_prompt) -> pd.DataFrame:
    """Peform a search in the document to search relevant text and definitions to answer a user question. The document first needs to be determined before a search should be performed."""
    context, definitions = get_context_graphrag(document, search_prompt)
    return context, definitions

In [61]:
def answer_question(document, question):
    """This function is answering a question based on a search in a document (vector search on document). Document and question both need to be provided."""
    context, definitions = perform_search_in_document(document, question)
    theprompt = generate_prompt_graphrag(question, context, definitions)
    llm(theprompt.to_messages()).pretty_print()

In [62]:
answer_question("Rabo SpaarRekening 2020.pdf", "What are the rules for shared savings account?")


The rules for a shared savings account, also known as a joint account, at Rabobank are as follows:

1. **Joint Account Definition**: If the savings account has multiple account holders, it is considered a joint account. This is only different if it has been agreed with the bank that it is a joint-and account.

2. **Communication**: Rabobank only needs to inform one account holder, either in writing or electronically. It is the responsibility of the informed account holder to immediately share any information with the other account holders. All account holders are bound by the information provided to one of them, even if they do not live at the same address.

3. **Notifications**: If one account holder informs Rabobank or makes a notification, it is assumed that this is done on behalf of all account holders.

4. **Account Usage in Special Circumstances**: If one or more account holders experience bankruptcy, legal debt restructuring, or seizure, none of the account holders may use the 

### Tool 5

In [54]:
def retrieve_products_of_customers(customer_name) -> pd.DataFrame:
    """Retrieve the products of a customer in the database. Customers are specified with their name. """
    return driver.execute_query(
        """
        MATCH (c:Customer)-[:HAS_PRODUCT]->(p:Product)
        WHERE LOWER(c.name) = LOWER($customer_name)
        RETURN p.id as product_id, p.name as product_name
        """,
        database_=DATABASE,
        routing_=RoutingControl.READ,
        customer_name = customer_name,
        result_transformer_= lambda r: r.to_df(),
    )

In [55]:
retrieve_products_of_customers("Emma Bakker")

Unnamed: 0,product_id,product_name
0,NL42RABO0707670485,DirectRekening Product
1,1bb02052-9bc6-4f8d-9858-68ffbcd98815,Kortlopende Reis Product


## Setting up the Agent

In [None]:
llm = ChatOpenAI(model_name=LLM, temperature=0)

In [None]:
response = llm.invoke([HumanMessage(content="hi!")])
response.content

In [63]:
tools = [
    retrieve_products,
    map_product_to_database_products,
    retrieve_document_from_product,
    answer_question,
    retrieve_products_of_customers,
]

llm_with_tools = llm.bind_tools(tools)

## Running Agents with LangGraph

In [64]:
agent_executor = create_react_agent(llm, tools)

In [65]:
response = agent_executor.invoke({"messages": [HumanMessage(content="hi!")]})

In [66]:
response["messages"]

[HumanMessage(content='hi!', additional_kwargs={}, response_metadata={}, id='985d5d3a-87cb-4fe5-a477-2d02902abbd8'),
 AIMessage(content='Hello! How can I assist you today?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 11, 'prompt_tokens': 191, 'total_tokens': 202, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_90122d973c', 'finish_reason': 'stop', 'logprobs': None}, id='run-ba85bf0e-e862-440a-b268-3910e334eda8-0', usage_metadata={'input_tokens': 191, 'output_tokens': 11, 'total_tokens': 202, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})]

#### Run some examples! 

In [67]:
def ask_to_agent(question):
    for step in agent_executor.stream(
        {"messages": [HumanMessage(content=question)]},
        stream_mode="values",
    ):
        step["messages"][-1].pretty_print()

In [69]:
question = "What Products does Emma Jansen have?"

In [70]:
ask_to_agent(question)


What Products does Emma Jansen have?
Tool Calls:
  retrieve_products_of_customers (call_nZnJoB8ZnlAncIOesAPzG4NR)
 Call ID: call_nZnJoB8ZnlAncIOesAPzG4NR
  Args:
    customer_name: Emma Jansen
Name: retrieve_products_of_customers

           product_id            product_name
0  NL86RABO5202385969   SpaarRekening Product
1  NL43RABO0278547078  DirectRekening Product

Emma Jansen has the following products:

1. SpaarRekening Product
2. DirectRekening Product


In [71]:
question = "I got a question on my savings account, what are the rules for a joint account?"

In [72]:
ask_to_agent(question)


I got a question on my savings account, what are the rules for a joint account?
Tool Calls:
  retrieve_products (call_Wrq4FwvrhnANfLjceyhYZ0vB)
 Call ID: call_Wrq4FwvrhnANfLjceyhYZ0vB
  Args:
Name: retrieve_products

["SpaarRekening", "DirectRekening", "Kortlopende Reis", "BeleggersRekening", "RaboBusiness Banking"]
Tool Calls:
  map_product_to_database_products (call_ziNA1gM7ekJdlHZgRe91UZcP)
 Call ID: call_ziNA1gM7ekJdlHZgRe91UZcP
  Args:
    product: savings account
Name: map_product_to_database_products

Product: SpaarRekening
Tool Calls:
  retrieve_document_from_product (call_OnLnS8xUHvrBnWNpkiHBXUCc)
 Call ID: call_OnLnS8xUHvrBnWNpkiHBXUCc
  Args:
    product_name: SpaarRekening
Name: retrieve_document_from_product

Rabo SpaarRekening 2020.pdf
Tool Calls:
  answer_question (call_Rf9PM4gI6S9RKRKzo7LHbLen)
 Call ID: call_Rf9PM4gI6S9RKRKzo7LHbLen
  Args:
    document: Rabo SpaarRekening 2020.pdf
    question: What are the rules for a joint account?

The rules for a joint account at

In [73]:
question = "When is my travel insurance exprired? My name is "

In [74]:
ask_to_agent(question)


When is my travel insurance exprired?

To determine when your travel insurance expires, I would need to know the specific details of your insurance policy, such as the provider, policy number, or any other relevant information. If you have access to your insurance documents, you can check the expiration date there. Alternatively, you can contact your insurance provider directly for this information.


In [75]:
question = "When is my travel insurance exprired? My name is Anna Vos"

In [76]:
ask_to_agent(question)


When is my travel insurance exprired? My name is Anna Vos
Tool Calls:
  retrieve_products_of_customers (call_gzILYzHSzqATL3OkVz0djXQQ)
 Call ID: call_gzILYzHSzqATL3OkVz0djXQQ
  Args:
    customer_name: Anna Vos
Name: retrieve_products_of_customers

                             product_id                  product_name
0  45ec49ba-2f50-4dc0-8b65-e230ff1bc25a  RaboBusiness Banking Product
1  b28e2c9d-1a0d-43b9-9751-0a7a6e23d6c4     BeleggersRekening Product
2  5e8f4b3a-cb8a-4a64-9995-00bace934892      Kortlopende Reis Product
Tool Calls:
  retrieve_document_from_product (call_8QM85mJYchuk0EOWiEfICDL2)
 Call ID: call_8QM85mJYchuk0EOWiEfICDL2
  Args:
    product_name: Kortlopende Reis Product
Name: retrieve_document_from_product

Error: IndexError('single positional indexer is out-of-bounds')
 Please fix your mistakes.
Tool Calls:
  retrieve_products (call_0q3hVMFQGhhCYFwQvAV2owQZ)
 Call ID: call_0q3hVMFQGhhCYFwQvAV2owQZ
  Args:
Name: retrieve_products

["SpaarRekening", "DirectRekening", 

## Chatbot

Now create a chatbot with the agent providing the responses

In [None]:
def user(user_message, history):
    if history is None:
        history = []
    history.append({"role": "user", "content": user_message})
    return "", history

def get_answer(history):
    steps = []
    full_prompt = "\n".join([f"{msg['role'].capitalize()}: {msg['content']}" for msg in history])
    
    for step in agent_executor.stream(
            {"messages": [HumanMessage(content=full_prompt)]},
            stream_mode="values",
    ):
        step["messages"][-1].pretty_print()
        steps.append(step["messages"][-1].content)
    
    return steps[-1]

def bot(history):
    bot_message = get_answer(history)
    history.append({"role": "assistant", "content": ""})

    for character in bot_message:
        history[-1]["content"] += character
        time.sleep(0.01)
        yield history

with gr.Blocks() as demo:
    chatbot = gr.Chatbot(
        label="Chatbot on a Graph",
        avatar_images=[
            "https://png.pngtree.com/png-vector/20220525/ourmid/pngtree-concept-of-facial-animal-avatar-chatbot-dog-chat-machine-illustration-vector-png-image_46652864.jpg",
            "https://d-cb.jc-cdn.com/sites/crackberry.com/files/styles/larger/public/article_images/2023/08/openai-logo.jpg"
        ],
        type="messages", 
    )
    msg = gr.Textbox(label="Message")
    clear = gr.Button("Clear")

    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
        bot, [chatbot], chatbot
    )

    clear.click(lambda: [], None, chatbot, queue=False)

demo.queue()
demo.launch(share=True)

If you want to have the light-mode for the chatbot paste the following after the URL: /?__theme=light

### Text2Cypher

If time allows we can still experiment with the Text2Cypher functionality. 

In [None]:
text2cypher_prompt =  PromptTemplate.from_template(
    """
    Task: Generate a Cypher statement for querying a Neo4j graph database from a user input. 
    - Do not include triple backticks ``` or ```cypher or any additional text except the generated Cypher statement in your response.
    - Do not use any properties or relationships not included in the schema.
    
    Schema:
    {schema}
    
    #User Input
    {question}
    
    Cypher query:
    """
)

In [None]:
annotated_schema = """
    Nodes:
      Person:
        description: "A person in our talent pool."
        properties:
          name:
            type: "string"
            description: "The full name of the person. serves as a unique identifier."
          email:
            type: "string"
            description: "The email address of the person."
          leiden_community:
            type: "integer"
            description: "The talent community for the person.  People in the same talent segment share similar skills."
      Skill:
        description: "A professional skill."
        properties:
          name:
            type: "string"
            description: "The unique name of the skill."
    Relationships:
        KNOWS:
            description: "A person knowing a skill."
            query_pattern: "(:Person)-[:KNOWS]->(:Skill)"
    """

In [None]:
text2cypher_llm = ChatOpenAI(model=LLM, temperature=0)

In [None]:
@tool
def perform_aggregation_query(question: str) -> pd.DataFrame:
    """
    perform an aggregation query on the Neo4j graph database and obtain the results.
    """
    prompt = text2cypher_prompt.invoke({'schema': annotated_schema, 'question': question})
    query = text2cypher_llm.invoke(prompt).content
    print(f"executing Cypher query:\n{query}")
    return driver.execute_query(
        query,
        database_=DATABASE,
        routing_=RoutingControl.READ,
        result_transformer_= lambda r: r.to_df()
    )    

In [None]:
perform_aggregation_query('describe communities by skills') 

In [None]:
perform_aggregation_query('how many people share skills with Isabella Allen, and what are the skills')

In [None]:
perform_aggregation_query('Can you list me a 5 random person name from the database?')