# Module 3 - GraphRAG and Agents

This module has the following objectives:
- Experiment with queries for an Agent
- Define Tooling
- Create an agents with the available tools
- Chatbot for an Agent
- Text2Cypher (if we got time)

In [30]:
#!pip install graphdatascience neo4j dotenv openai langchain, langgraph, pydantic, gradio

Import our usual suspects (and some more...)

In [60]:
import os
import pandas as pd
from dotenv import load_dotenv
from graphdatascience import GraphDataScience
from neo4j import Query, GraphDatabase, RoutingControl, Result
from langchain.schema import HumanMessage
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_core.messages import HumanMessage
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langgraph.prebuilt import create_react_agent
from openai import OpenAI
from typing import List, Optional
from pydantic import BaseModel, Field, validator
import functools
from langchain_core.tools import tool
import gradio as gr
import time
from json import loads, dumps

## Setup

Load env variables

In [35]:
env_file = 'credentials.env'

In [36]:
if os.path.exists(env_file):
    load_dotenv(env_file, override=True)

    # Neo4j
    HOST = os.getenv('NEO4J_URI')
    USERNAME = os.getenv('NEO4J_USERNAME')
    PASSWORD = os.getenv('NEO4J_PASSWORD')
    DATABASE = os.getenv('NEO4J_DATABASE')

    # AI
    OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
    os.environ['OPENAI_API_KEY']=OPENAI_API_KEY
    LLM = os.getenv('LLM')
    EMBEDDINGS_MODEL = os.getenv('EMBEDDINGS_MODEL')
else:
    print(f"File {env_file} not found.")

Connect to neo4j db

In [37]:
driver = GraphDatabase.driver(
    HOST,
    auth=(USERNAME, PASSWORD)
)

Test the connection

In [38]:
driver.execute_query(
    """
    MATCH (n) RETURN COUNT(n) as Count
    """,
    database_=DATABASE,
    routing_=RoutingControl.READ,
    result_transformer_= lambda r: r.to_df()
)

Unnamed: 0,Count
0,904


Test whether we got our constraints

In [39]:
schema_result_df  = driver.execute_query(
    'show indexes',
    database_=DATABASE,
    routing_=RoutingControl.READ,
    result_transformer_= lambda r: r.to_df()
)

In [40]:
schema_result_df.head(100)

Unnamed: 0,id,name,state,populationPercent,type,entityType,labelsOrTypes,properties,indexProvider,owningConstraint,lastRead,readCount
0,6,chunk-embeddings,ONLINE,100.0,VECTOR,NODE,[Chunk],[embedding],vector-2.0,,2025-05-07T12:27:38.642000000+00:00,46
1,0,index_343aff4e,ONLINE,100.0,LOOKUP,NODE,,,token-lookup-1.0,,2025-05-07T12:59:14.211000000+00:00,1683
2,1,index_f7700477,ONLINE,100.0,LOOKUP,RELATIONSHIP,,,token-lookup-1.0,,2025-04-30T14:19:28.584000000+00:00,4
3,4,unique_chunk,ONLINE,100.0,RANGE,NODE,[Chunk],[id],range-1.0,unique_chunk,2025-05-07T12:27:38.658000000+00:00,3429
4,2,unique_document,ONLINE,100.0,RANGE,NODE,[Document],[id],range-1.0,unique_document,2025-05-06T13:02:01.319000000+00:00,867


In [49]:
embedding_model = OpenAIEmbeddings(
    model=EMBEDDINGS_MODEL,
    openai_api_key=OPENAI_API_KEY
)

In [50]:
embedding_model.model

'text-embedding-ada-002'

## Agents with GraphRAG

### Lets create a Retrieval agent

In [51]:
# class Skill(BaseModel):
#     """
#     Represents a professional skill or knowledge of a person.
#     """
#     name: str = Field(..., description="Sortened name of the skill")

### Tool 1

In [52]:
def retrieve_products() -> pd.DataFrame:
    """Retrieve the products in the database. Products are specified with name. """
    return driver.execute_query(
        """
        MATCH (p:Product)
        RETURN p.name as name
        """,
        database_=DATABASE,
        routing_=RoutingControl.READ,
        result_transformer_= lambda r: r.to_df(),
    )

In [53]:
retrieve_products() 

Unnamed: 0,name
0,SpaarRekening
1,DirectRekening


### Tool 2

In [102]:
def get_context_graphrag(product, search_prompt):

    query_vector = embedding_model.embed_query(search_prompt)
    
    similarity_query = """ 
        CALL db.index.vector.queryNodes("chunk-embeddings", 30, $query_vector) YIELD node, score
        WITH node as chunk, score ORDER BY score DESC
        MATCH (p:Product{name: $product})<-[:RELATED_TO]-(d:Document)<-[:PART_OF]-(chunk)
        WITH score, d, chunk LIMIT 5
        RETURN score, d.file_name as file_name, chunk.id as chunk_id, chunk.page as page, chunk.chunk AS chunk
       """
    results = driver.execute_query(
        similarity_query,
        database_=DATABASE,
        routing_=RoutingControl.READ,
        product = product,
        query_vector=query_vector,
        result_transformer_= lambda r: r.to_df()
    )

    chunk_ids = list(set(results['chunk_id'].to_list()))

    results = results.to_json(orient="records")
    parsed = loads(results)
    context = dumps(parsed, indent=4)

    definition_query = """    
        MATCH (c:Chunk)-[:MENTIONS]->(d:Definition)
        WHERE c.id in $chunk_ids
        RETURN DISTINCT d.term as term, d.description as description
    """
    results = driver.execute_query(
        definition_query,
        database_=DATABASE,
        routing_=RoutingControl.READ,
        chunk_ids=chunk_ids,
        result_transformer_= lambda r: r.to_df()
    )
    results = results.to_json(orient="records")
    parsed = loads(results)
    definitions = dumps(parsed, indent=4)
    return context, definitions

In [103]:
def perform_search_in_document(product, search_prompt) -> pd.DataFrame:
    """Peform a search in the document to search relevant text and definitions to answer a user question. The product first needs to be determined before a search should be performed."""

    context, definitions = get_context_graphrag(product, search_prompt)
    
    return context, definitions

In [104]:
context, definitions = perform_search_in_document('DirectRekening', 'Mag ik rood staan?')

print(context)
print(definitions)

[
    {
        "score": 0.9044647217,
        "file_name": "Payment and Online Services Terms Sept 2022.pdf",
        "chunk_id": 322,
        "page": 61,
        "chunk": "ongeoorloofd roodstaan.  \nHet bedrag van de ongeoorloofde roodstand is onmiddellijk opeisbaar. Daarbij is \ngeen ingebrekestelling of andere formaliteit nodig. Dat betekent dat u het bedrag \nvan deze ongeoorloofde roodstand meteen aan ons moet terugbetalen, zonder \ndat wij daar om hoeven te vragen.\n2. Is het afboeken van de debetrente over het gebruikte krediet de reden dat u meer \nrood staat dan de kredietlimiet? Dan stellen wij u in gebreke door u te vragen \nhet bedrag van de afgeboekte debetrente die de ongeoorloofde roodstand heeft \nveroorzaakt v\u00f3\u00f3r een bepaalde datum te betalen. Als u dan nog niet betaalt, \nbrengen wij u over het bedrag dat u aan ons moet betalen vertragingsrente in \nrekening. \n175. Vertragingsrente\na. Moet u ons een bedrag betalen? En bent u daar te laat mee? Dan stellen 

## Setting up the Agent

In [81]:
llm = ChatOpenAI(model_name=LLM, temperature=0)

In [82]:
response = llm.invoke([HumanMessage(content="hi!")])
response.content

'Hello! How can I assist you today?'

In [83]:
tools = [
    retrieve_products, 
    perform_search_in_document,
]

llm_with_tools = llm.bind_tools(tools)

In [84]:
response = llm_with_tools.invoke([HumanMessage(content="I got a question on a DirectRekening. Is a negative debit allowed?")])

print(f"ContentString: {response.content}")
print(f"ToolCalls: {response.tool_calls}")

ContentString: 
ToolCalls: [{'name': 'retrieve_products', 'args': {}, 'id': 'call_W1KzX5kOH1GJJptE0eDC3RuD', 'type': 'tool_call'}]


In [73]:
response = llm_with_tools.invoke([HumanMessage(content="Is a negative debit allowed?")])

print(f"ContentString: {response.content}")
print(f"ToolCalls: {response.tool_calls}")

ContentString: 
ToolCalls: [{'name': 'retrieve_products', 'args': {}, 'id': 'call_RGwjaRvuAL9MldV2b33armh8', 'type': 'tool_call'}]


We can see that there's now no text content, but there is a tool call! It wants us to call the Tavily Search tool. This isn't calling that tool yet - it's just telling us to. In order to actually call it, we'll want to create our agent.

## Running Agents with LangGraph

In [85]:
agent_executor = create_react_agent(llm, tools)

In [86]:
response = agent_executor.invoke({"messages": [HumanMessage(content="hi!")]})

In [87]:
response["messages"]

[HumanMessage(content='hi!', additional_kwargs={}, response_metadata={}, id='3b9d0beb-f135-41d4-8768-b4eee73fab12'),
 AIMessage(content='Hello! How can I assist you today?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 11, 'prompt_tokens': 100, 'total_tokens': 111, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_90122d973c', 'finish_reason': 'stop', 'logprobs': None}, id='run-92494f07-544d-424d-832d-e8e673066fb0-0', usage_metadata={'input_tokens': 100, 'output_tokens': 11, 'total_tokens': 111, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})]

#### Run some examples! 

In [105]:
def ask_to_agent(question):
    for step in agent_executor.stream(
        {"messages": [HumanMessage(content=question)]},
        stream_mode="values",
    ):
        step["messages"][-1].pretty_print()

In [108]:
question = "Mag ik rood staan?"

In [109]:
ask_to_agent(question)


Mag ik rood staan?

Of je rood mag staan (oftewel een negatieve saldo hebben op je bankrekening) hangt af van de voorwaarden van je bank en het type bankrekening dat je hebt. Veel banken bieden de mogelijkheid om rood te staan, maar dit is vaak onderhevig aan bepaalde voorwaarden en kan rente kosten. Het is het beste om contact op te nemen met je bank of hun website te raadplegen voor specifieke informatie over rood staan en de bijbehorende voorwaarden en kosten.


## Chatbot

Now create a chatbot with the agent providing the responses

In [None]:
def user(user_message, history):
    if history is None:
        history = []
    history.append({"role": "user", "content": user_message})
    return "", history

def get_answer(history):
    steps = []
    full_prompt = "\n".join([f"{msg['role'].capitalize()}: {msg['content']}" for msg in history])
    
    for step in agent_executor.stream(
            {"messages": [HumanMessage(content=full_prompt)]},
            stream_mode="values",
    ):
        step["messages"][-1].pretty_print()
        steps.append(step["messages"][-1].content)
    
    return steps[-1]

def bot(history):
    bot_message = get_answer(history)
    history.append({"role": "assistant", "content": ""})

    for character in bot_message:
        history[-1]["content"] += character
        time.sleep(0.01)
        yield history

with gr.Blocks() as demo:
    chatbot = gr.Chatbot(
        label="Chatbot on a Graph",
        avatar_images=[
            "https://png.pngtree.com/png-vector/20220525/ourmid/pngtree-concept-of-facial-animal-avatar-chatbot-dog-chat-machine-illustration-vector-png-image_46652864.jpg",
            "https://d-cb.jc-cdn.com/sites/crackberry.com/files/styles/larger/public/article_images/2023/08/openai-logo.jpg"
        ],
        type="messages", 
    )
    msg = gr.Textbox(label="Message")
    clear = gr.Button("Clear")

    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
        bot, [chatbot], chatbot
    )

    clear.click(lambda: [], None, chatbot, queue=False)

demo.queue()
demo.launch(share=True)

If you want to have the light-mode for the chatbot paste the following after the URL: /?__theme=light