# Module 6 - GraphRAG and Agent

In [1]:
#!pip install graphdatascience neo4j dotenv openai langchain, langgraph, pydantic, gradio

Import our usual suspects (and some more...)

In [2]:
import os
import pandas as pd
from dotenv import load_dotenv
from graphdatascience import GraphDataScience
from neo4j import Query, GraphDatabase, RoutingControl, Result
from langchain.schema import HumanMessage
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_core.messages import HumanMessage
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langgraph.prebuilt import create_react_agent
from openai import OpenAI
from typing import List, Optional
from pydantic import BaseModel, Field, validator
import functools
from langchain_core.tools import tool
import gradio as gr
import time
from json import loads, dumps

## Setup

Load env variables

In [3]:
env_file = 'ws.env'

In [4]:
if os.path.exists(env_file):
    load_dotenv(env_file, override=True)

    # Neo4j
    HOST = os.getenv('NEO4J_URI')
    USERNAME = os.getenv('NEO4J_USERNAME')
    PASSWORD = os.getenv('NEO4J_PASSWORD')
    DATABASE = os.getenv('NEO4J_DATABASE')

    # AI
    OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
    os.environ['OPENAI_API_KEY']=OPENAI_API_KEY
    LLM = os.getenv('LLM')
    EMBEDDINGS_MODEL = os.getenv('EMBEDDINGS_MODEL')
else:
    print(f"File {env_file} not found.")

Setup connection to the database with the [Python Driver](https://neo4j.com/docs/python-manual/5/).

In [5]:
driver = GraphDatabase.driver(
    HOST,
    auth=(USERNAME, PASSWORD)
)

Test the connection

In [6]:
driver.execute_query(
    """
    MATCH (n) RETURN COUNT(n) as Count
    """,
    database_=DATABASE,
    routing_=RoutingControl.READ,
    result_transformer_= lambda r: r.to_df()
)

Unnamed: 0,Count
0,1388


Test whether we got our constraints

In [7]:
schema_result_df  = driver.execute_query(
    'show indexes',
    database_=DATABASE,
    routing_=RoutingControl.READ,
    result_transformer_= lambda r: r.to_df()
)

In [8]:
schema_result_df.head(100)

Unnamed: 0,id,name,state,populationPercent,type,entityType,labelsOrTypes,properties,indexProvider,owningConstraint,lastRead,readCount
0,6,chunk-embeddings,ONLINE,100.0,VECTOR,NODE,[Chunk],[embedding],vector-2.0,,2025-05-16T10:24:41.768000000+00:00,120
1,7,definition-embeddings,ONLINE,100.0,VECTOR,NODE,[Definition],[embedding],vector-2.0,,2025-05-16T10:24:41.783000000+00:00,744
2,0,index_343aff4e,ONLINE,100.0,LOOKUP,NODE,,,token-lookup-1.0,,2025-05-16T10:24:38.954000000+00:00,7183
3,1,index_f7700477,ONLINE,100.0,LOOKUP,RELATIONSHIP,,,token-lookup-1.0,,2025-05-16T10:24:40.327000000+00:00,140
4,2,unique_chunk,ONLINE,100.0,RANGE,NODE,[Chunk],[id],range-1.0,unique_chunk,2025-05-16T10:02:22.855000000+00:00,7026
5,4,unique_document,ONLINE,100.0,RANGE,NODE,[Document],[id],range-1.0,unique_document,2025-05-15T09:24:03.456000000+00:00,625


## Agents with GraphRAG

### Lets create a Retrieval agent

In [9]:
client = OpenAI()

In [10]:
llm = ChatOpenAI(model_name=LLM, temperature=0)

In [11]:
llm.model_name

'gpt-4o'

In [12]:
embedding_model = OpenAIEmbeddings(
    model=EMBEDDINGS_MODEL,
    openai_api_key=OPENAI_API_KEY
)

In [13]:
embedding_model.model

'text-embedding-ada-002'

### Tool 1

In [14]:
def retrieve_products() -> pd.DataFrame:
    """Retrieve the products in the database. Products are specified with name. """
    return driver.execute_query(
        """
        MATCH (p:ProductType)
        RETURN p.name as name
        """,
        database_=DATABASE,
        routing_=RoutingControl.READ,
        result_transformer_= lambda r: r.to_df(),
    )['name'].tolist()

In [15]:
retrieve_products()

['SpaarRekening',
 'DirectRekening',
 'Kortlopende Reis',
 'BeleggersRekening',
 'RaboBusiness Banking']

### Tool 2

In [16]:
map_products_prompt = """
As an intelligent assistant, your primary objective is to map a product name to product names in the database.

Examples:
#####
Product: savings account. 
Database Products: ['SpaarRekening', 'DirectRekening', 'Kortlopende Reis', 'BeleggersRekening', 'RaboBusiness Banking']
Assistant: Product: SpaarRekening
#####
#####
Product: Direct Rekening. 
Database Products: ['SpaarRekening', 'DirectRekening', 'Kortlopende Reis', 'BeleggersRekening', 'RaboBusiness Banking']Assistant: Customer: Jan Blok
Assistant: Product: DirectRekening

#####
#####
Product: Reis verzekering. 
Database Products: ['SpaarRekening', 'DirectRekening', 'Kortlopende Reis', 'BeleggersRekening', 'RaboBusiness Banking']Assistant: Customer: Jan Blok
Assistant: Product: Kortlopende Reis
#####
"""

def map_product_to_database_products(product) -> str:
    """Map products from the user question to the actual products in the database."""

    response = client.beta.chat.completions.parse(
        model=LLM,
        temperature=0,
        messages=[
            {"role": "system", "content": map_products_prompt},
            {"role": "user", "content": "Product: " + product},
            {"role": "user", "content": "Database Products: " + str(retrieve_products())},
            
        ],
#        response_format=DefinitionList,
    )
    return response.choices[0].message.content 

In [17]:
map_product_to_database_products('savings account')

'Product: SpaarRekening'

### Tool 3

In [18]:
def retrieve_document_from_product(product_name) -> pd.DataFrame:
    """Retrieve the documents of products in the database. Products are specified with their name. """
    return driver.execute_query(
        """
        MATCH (p:ProductType)<-[:RELATED_TO]-(d:Document)
        WHERE LOWER(p.name) = LOWER($product_name)
        RETURN d.file_name
        """,
        database_=DATABASE,
        routing_=RoutingControl.READ,
        product_name = product_name,
        result_transformer_= lambda r: r.to_df(),
    ).iloc[0]['d.file_name']

In [19]:
retrieve_document_from_product('SpaarRekening')

'Rabo SpaarRekening 2020.pdf'

### Tool 4

In [20]:
def get_context_graphrag(search_prompt, document):
    query_vector = embedding_model.embed_query(search_prompt)

    similarity_query = """ 
        CALL db.index.vector.queryNodes("chunk-embeddings", 30, $query_vector) YIELD node, score
        WITH node as chunk, score ORDER BY score DESC
        CALL (chunk) {
            MATCH (chunk)-[r:OVERLAPPING_DEFINITIONS]-(overlapping_chunk:Chunk)
            WHERE r.overlap > 3
            RETURN collect(overlapping_chunk) AS overlapping_chunks
        }
        WITH [chunk] + overlapping_chunks AS chunks
        UNWIND chunks as chunk
        MATCH (d:Document{file_name: $document})<-[:PART_OF]-(chunk)
        RETURN d.file_name as file_name, chunk.id as chunk_id, chunk.page as page, chunk.chunk_eng AS chunk
       """
    results_1 = driver.execute_query(
        similarity_query,
        database_=DATABASE,
        routing_=RoutingControl.READ,
        query_vector=query_vector,
        document=document,
        result_transformer_= lambda r: r.to_df()
    )

    chunk_ids = list(set(results_1['chunk_id'].to_list()))

    definition_query = """    
       CALL db.index.vector.queryNodes("definition-embeddings", 5, $query_vector) YIELD node, score
            WITH node as definition, score ORDER BY score DESC
            WHERE definition.degree < 20
            WITH definition LIMIT 1
            MATCH (definition)<-[:MENTIONS]-(chunk:Chunk)
            WHERE NOT (chunk.id IN $chunk_ids)
            WITH chunk LIMIT 3
            MATCH (d:Document{file_name: $document})<-[:PART_OF]-(chunk)
            RETURN d.file_name as file_name, chunk.id as chunk_id, chunk.page as page, chunk.chunk_eng AS chunk
    """
    results_2 = driver.execute_query(
        definition_query,
        database_=DATABASE,
        routing_=RoutingControl.READ,
        chunk_ids=chunk_ids,
        document=document,
        query_vector=query_vector,
        result_transformer_= lambda r: r.to_df()
    )

    results = pd.concat([results_1,results_2]).drop_duplicates()
    results = results.to_json(orient="records")
    parsed = loads(results)
    context = dumps(parsed, indent=2)
    return context

In [21]:
def generate_prompt(search_prompt, context):
    prompt_template = """

    You are a chatbot on Rabobank product. Your goal is to help people with questions on product policies.  
    A user will come to you with questions on their policy. Their questions must be answered based on the relevant documents of the policy.
    Respond in English. 

    The question is the following: 
    {search_prompt}
    
    Always respond in the language in which the question was asked. So, do not respond in a different language.
    
    The context is the following: 
    {context}

    Please explain your answer as thorough as possbile based on the context above. Don't come up with anything yourself.
    
    Please end your message with listing your sources with file name and page number. 
    """
    prompt = PromptTemplate.from_template(prompt_template)
    
    theprompt = prompt.format_prompt(search_prompt=search_prompt, context=context)
    return theprompt

In [22]:
context = get_context_graphrag("What are the rules for shared savings account?", "Rabo SpaarRekening 2020.pdf")

In [23]:
def perform_search_in_document(document, search_prompt) -> [str, str]:
    """Peform a search in the document to search relevant text to answer a user question. The document first needs to be determined before a search should be performed."""
    context = get_context_graphrag(document, search_prompt)
    return context

In [24]:
def answer_question_in_document(document, question) -> str:
    """This function is answering a question based on a search in a document (vector search on document). Document and question both need to be provided."""
    context = perform_search_in_document(question, document)
    theprompt = generate_prompt(question, context)
    return llm(theprompt.to_messages()).content

In [25]:
result = answer_question_in_document("What are the rules for shared savings account?", "Rabo SpaarRekening 2020.pdf",)

  return llm(theprompt.to_messages()).content


In [26]:
print(result)

I'm sorry, but I cannot provide an answer to your question about the "Rabo SpaarRekening 2020.pdf" policy because there is no context or content provided from the document. To assist you effectively, I would need specific information or excerpts from the document that relate to your question. If you can provide more details or context, I would be happy to help you further.

Sources: None available due to lack of context.


### Tool 5

In [27]:
def retrieve_products_of_customers(customer_id) -> pd.DataFrame:
    """Retrieve the products of a customer in the database. Customers are specified with their id. """
    return driver.execute_query(
        """
        MATCH (c:Customer)-[:HAS_PRODUCT]->(p:Product)
        WHERE c.id = $customer_id
        RETURN p.id as product_id, p.name as product_name
        """,
        database_=DATABASE,
        routing_=RoutingControl.READ,
        customer_id = customer_id,
        result_transformer_= lambda r: r.to_df(),
    )

In [28]:
retrieve_products_of_customers(16)

Unnamed: 0,product_id,product_name
0,ef31587f-5c96-4ab2-99e6-99f3b6fa88e8,DirectRekening Product
1,f64aae41-23d9-45d7-9310-48c7a1fd158b,RaboBusiness Banking Product
2,576fd5fe-5d27-424a-865f-56617a2955a7,Kortlopende Reis Product


### Tool 6

In [29]:
def retrieve_information_from_product(product_id) -> pd.DataFrame:
    """Retrieve the information of a product in the database. Product is specified with id."""
    result = driver.execute_query(
        """
       MATCH (p:Product{id: $product_id})
        RETURN properties(p) as props
        """,
        database_=DATABASE,
        routing_=RoutingControl.READ,
        product_id = product_id,
        result_transformer_= lambda r: r.to_df(),
    )
    return result.iloc[0]['props']

In [30]:
retrieve_information_from_product("ef31587f-5c96-4ab2-99e6-99f3b6fa88e8")

{'id': 'ef31587f-5c96-4ab2-99e6-99f3b6fa88e8',
 'expirationDate': neo4j.time.Date(2020, 4, 1),
 'name': 'DirectRekening Product',
 'iban': 'NL85RABO1352165826'}

### Tool 7

In [31]:
def retrieve_customers_from_database_based_on_full_name(full_name) -> pd.DataFrame:
    """Retrieve customers from the database database based on full_name. customer_id is returned."""
    
    query = """MATCH (c:Customer) WHERE c.name = $name RETURN c.id"""

    result = driver.execute_query(
        query,
        database_=DATABASE,
        routing_=RoutingControl.READ,
        name = full_name,
        result_transformer_= lambda r: r.to_df(),
    )
    return result

In [32]:
retrieve_customers_from_database_based_on_full_name("Lucas Van den Berg")

Unnamed: 0,c.id
0,48


## Setting up the Agent

In [33]:
llm = ChatOpenAI(model_name=LLM, temperature=0)

In [34]:
response = llm.invoke([HumanMessage(content="hi!")])
response.content

'Hello! How can I assist you today?'

In [35]:
tools = [
    retrieve_products,
    map_product_to_database_products,
    retrieve_document_from_product,
    answer_question_in_document,
    retrieve_products_of_customers,
    retrieve_information_from_product,
    retrieve_customers_from_database_based_on_full_name,
]

llm_with_tools = llm.bind_tools(tools)

## Running Agents with LangGraph

In [36]:
agent_executor = create_react_agent(llm, tools)

In [37]:
response = agent_executor.invoke({"messages": [HumanMessage(content="hi!")]})

In [38]:
response["messages"]

[HumanMessage(content='hi!', additional_kwargs={}, response_metadata={}, id='9762b298-9ddc-4251-93a6-3ff225dac6b4'),
 AIMessage(content='Hello! How can I assist you today?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 11, 'prompt_tokens': 264, 'total_tokens': 275, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_f5bdcc3276', 'finish_reason': 'stop', 'logprobs': None}, id='run-461dcc70-e747-402c-a0e4-1181c320b966-0', usage_metadata={'input_tokens': 264, 'output_tokens': 11, 'total_tokens': 275, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})]

#### Run some examples! 

In [39]:
def ask_to_agent(question):
    for step in agent_executor.stream(
        {"messages": [HumanMessage(content=question)]},
        stream_mode="values",
    ):
        step["messages"][-1].pretty_print()

In [40]:
question = "What Products does Jan Kok have?"

In [41]:
ask_to_agent(question)


What Products does Jan Kok have?
Tool Calls:
  retrieve_customers_from_database_based_on_full_name (call_ePBKSSCsO3aPJunMwVoXfW97)
 Call ID: call_ePBKSSCsO3aPJunMwVoXfW97
  Args:
    full_name: Jan Kok
Name: retrieve_customers_from_database_based_on_full_name

   c.id
0    16
Tool Calls:
  retrieve_products_of_customers (call_QPgZ2puXBbrVWplEff4qIN2c)
 Call ID: call_QPgZ2puXBbrVWplEff4qIN2c
  Args:
    customer_id: 16
Name: retrieve_products_of_customers

                             product_id                  product_name
0  ef31587f-5c96-4ab2-99e6-99f3b6fa88e8        DirectRekening Product
1  f64aae41-23d9-45d7-9310-48c7a1fd158b  RaboBusiness Banking Product
2  576fd5fe-5d27-424a-865f-56617a2955a7      Kortlopende Reis Product

Jan Kok has the following products:

1. DirectRekening Product
2. RaboBusiness Banking Product
3. Kortlopende Reis Product


In [42]:
question = "I got a question on my savings account, what are the rules for a joint account?"

In [43]:
ask_to_agent(question)


I got a question on my savings account, what are the rules for a joint account?
Tool Calls:
  retrieve_products (call_Y6LhzCJuN7oTi5TN6OIKaeRt)
 Call ID: call_Y6LhzCJuN7oTi5TN6OIKaeRt
  Args:
Name: retrieve_products

["SpaarRekening", "DirectRekening", "Kortlopende Reis", "BeleggersRekening", "RaboBusiness Banking"]
Tool Calls:
  map_product_to_database_products (call_tXp8jsaJlZzexU57BkRBXcXV)
 Call ID: call_tXp8jsaJlZzexU57BkRBXcXV
  Args:
    product: savings account
Name: map_product_to_database_products

Product: SpaarRekening
Tool Calls:
  retrieve_document_from_product (call_lImI4ph6dBxbI8ubwBMqR4kD)
 Call ID: call_lImI4ph6dBxbI8ubwBMqR4kD
  Args:
    product_name: SpaarRekening
Name: retrieve_document_from_product

Rabo SpaarRekening 2020.pdf
Tool Calls:
  answer_question_in_document (call_ngmEg9yXB9g2Fs9CRWl4lOuo)
 Call ID: call_ngmEg9yXB9g2Fs9CRWl4lOuo
  Args:
    document: Rabo SpaarRekening 2020.pdf
    question: What are the rules for a joint account?
Name: answer_question

In [44]:
question = "When is my travel insurance exprired? My name is Daan Visser"

In [45]:
ask_to_agent(question)


When is my travel insurance exprired? My name is Daan Visser
Tool Calls:
  retrieve_customers_from_database_based_on_full_name (call_v8HLRPzCOb6Zjf9lUQMEDyNj)
 Call ID: call_v8HLRPzCOb6Zjf9lUQMEDyNj
  Args:
    full_name: Daan Visser
Name: retrieve_customers_from_database_based_on_full_name

   c.id
0    47
Tool Calls:
  retrieve_products_of_customers (call_f5D5LAQWCl6q2WSopYwE2XMS)
 Call ID: call_f5D5LAQWCl6q2WSopYwE2XMS
  Args:
    customer_id: 47
Name: retrieve_products_of_customers

                             product_id                  product_name
0  5494327a-a164-43c0-99b6-bc9c837cca25      Kortlopende Reis Product
1  f7f36635-926e-4dda-9790-0b43d7df4290        DirectRekening Product
2  e155a521-bc1c-4725-8390-9a3da0f1790e  RaboBusiness Banking Product
Tool Calls:
  retrieve_information_from_product (call_fPlSbPi9DBrqcZW5cmIXWxnK)
 Call ID: call_fPlSbPi9DBrqcZW5cmIXWxnK
  Args:
    product_id: 5494327a-a164-43c0-99b6-bc9c837cca25
Name: retrieve_information_from_product

{'id'

In [55]:
question = "When is my IBAN of my Saving account? My name Lucas vd Berg"

In [56]:
ask_to_agent(question)


When is my IBAN of my Saving account? My name Lucas vd Berg
Tool Calls:
  retrieve_customers_from_database_based_on_full_name (call_yjbwfAnoOGg18ex0PnhqQs7O)
 Call ID: call_yjbwfAnoOGg18ex0PnhqQs7O
  Args:
    full_name: Lucas vd Berg
Name: retrieve_customers_from_database_based_on_full_name

Empty DataFrame
Columns: [c.id]
Index: []

I couldn't find any customer information for "Lucas vd Berg" in the database. Please ensure that the name is correct or provide additional details to help locate your account.


## Chatbot

Now create a chatbot with the agent providing the responses

In [57]:
def user(user_message, history):
    if history is None:
        history = []
    history.append({"role": "user", "content": user_message})
    return "", history

def get_answer(history):
    steps = []
    full_prompt = "\n".join([f"{msg['role'].capitalize()}: {msg['content']}" for msg in history])
    
    for step in agent_executor.stream(
            {"messages": [HumanMessage(content=full_prompt)]},
            stream_mode="values",
    ):
        step["messages"][-1].pretty_print()
        steps.append(step["messages"][-1].content)
    
    return steps[-1]

def bot(history):
    bot_message = get_answer(history)
    history.append({"role": "assistant", "content": ""})

    for character in bot_message:
        history[-1]["content"] += character
        time.sleep(0.01)
        yield history

with gr.Blocks() as demo:
    chatbot = gr.Chatbot(
        label="Chatbot on a Graph",
        avatar_images=[
            "https://png.pngtree.com/png-vector/20220525/ourmid/pngtree-concept-of-facial-animal-avatar-chatbot-dog-chat-machine-illustration-vector-png-image_46652864.jpg",
            "https://d-cb.jc-cdn.com/sites/crackberry.com/files/styles/larger/public/article_images/2023/08/openai-logo.jpg"
        ],
        type="messages", 
    )
    msg = gr.Textbox(label="Message")
    clear = gr.Button("Clear")

    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
        bot, [chatbot], chatbot
    )

    clear.click(lambda: [], None, chatbot, queue=False)

demo.queue()
demo.launch(share=True)

* Running on local URL:  http://127.0.0.1:7862
* Running on public URL: https://da7fcd088718a5923c.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)





User: When is my IBAN of my Saving account? My name Lucas vd Berg
Tool Calls:
  retrieve_customers_from_database_based_on_full_name (call_P25s0oVpTqQ2NoNjcleegq6N)
 Call ID: call_P25s0oVpTqQ2NoNjcleegq6N
  Args:
    full_name: Lucas vd Berg
Name: retrieve_customers_from_database_based_on_full_name

Empty DataFrame
Columns: [c.id]
Index: []

I couldn't find any customer information for "Lucas vd Berg" in the database. Please ensure that the name is correct or provide additional details to help locate your account.

User: When is my IBAN of my Saving account? My name Lucas vd Berg
Assistant: I couldn't find any customer information for "Lucas vd Berg" in the database. Please ensure that the name is correct or provide additional details to help locate your account.
User: Oh sorry, my name is Lucas Van den Berg
Tool Calls:
  retrieve_customers_from_database_based_on_full_name (call_noWojtLUWSiSRkYY0i60jEx2)
 Call ID: call_noWojtLUWSiSRkYY0i60jEx2
  Args:
    full_name: Lucas Van den Berg


If you want to have the light-mode for the chatbot paste the following after the URL: /?__theme=light