# Module 5 - RAG-Chatbot 

In [1]:
import pandas as pd
import os
from langchain_openai import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from neo4j import Query, GraphDatabase, RoutingControl, Result
from dotenv import load_dotenv
import gradio as gr
import time
from IPython.display import display, HTML
import warnings
from json import loads, dumps
warnings.filterwarnings('ignore')

## Get Credentials

In [2]:
env_file = 'credentials.env'

In [3]:
if os.path.exists(env_file):
    load_dotenv(env_file, override=True)

    # Neo4j
    HOST = os.getenv('NEO4J_URI')
    USERNAME = os.getenv('NEO4J_USERNAME')
    PASSWORD = os.getenv('NEO4J_PASSWORD')
    DATABASE = os.getenv('NEO4J_DATABASE')

    # AI
    OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
    os.environ['OPENAI_API_KEY']=OPENAI_API_KEY
    LLM = os.getenv('LLM')
    EMBEDDINGS_MODEL = os.getenv('EMBEDDINGS_MODEL')
else:
    print(f"File {env_file} not found.")

## Setup Connection to Database

Setup connection to the database with the Python Driver

In [4]:
driver = GraphDatabase.driver(
    HOST,
    auth=(USERNAME, PASSWORD)
)

In [5]:
driver.execute_query(
    """
    MATCH (n) RETURN COUNT(n) as Count
    """,
    database_=DATABASE,
    routing_=RoutingControl.READ,
    result_transformer_= lambda r: r.to_df()
)

Unnamed: 0,Count
0,1494


## Create RAG-application

For the the chatbot we both need an Embedding-model and LLM. Create both below:

In [6]:
embedding_model = OpenAIEmbeddings(
    model=EMBEDDINGS_MODEL,
    openai_api_key=OPENAI_API_KEY
)

In [7]:
embedding_model.model

'text-embedding-ada-002'

In [8]:
llm = ChatOpenAI(temperature=0, model=LLM)

In [9]:
llm.model_name

'gpt-4o'

### Retrieval Queries

To illustrate the difference between a "Regular" Vector Search and GraphRAG we create different retrieval queries.

In [10]:
def get_context_vector_search(search_prompt):
    query_vector = embedding_model.embed_query(search_prompt)
    
    similarity_query = """ 
        CALL db.index.vector.queryNodes("chunk-embeddings", 5, $query_vector) YIELD node, score
        WITH node as chunk, score ORDER BY score DESC
        MATCH (d:Document)<-[:PART_OF]-(chunk)
        RETURN score, d.file_name as file_name, chunk.id as chunk_id, chunk.page as page, chunk.chunk_eng AS chunk
       """
    results = driver.execute_query(
        similarity_query,
        database_=DATABASE,
        routing_=RoutingControl.READ,
        query_vector=query_vector,
        result_transformer_= lambda r: r.to_df()
    )
    
    results = results.to_json(orient="records")
    parsed = loads(results)
    context = dumps(parsed, indent=2)

    return context

In [11]:
def get_context_graphrag(search_prompt):

    query_vector = embedding_model.embed_query(search_prompt)
    
    similarity_query = """ 
        CALL db.index.vector.queryNodes("chunk-embeddings", 5, $query_vector) YIELD node, score
        WITH node as chunk, score ORDER BY score DESC
        MATCH (d:Document)<-[:PART_OF]-(chunk)
        RETURN score, d.file_name as file_name, chunk.id as chunk_id, chunk.page as page, chunk.chunk_eng AS chunk
       """
    results = driver.execute_query(
        similarity_query,
        database_=DATABASE,
        routing_=RoutingControl.READ,
        query_vector=query_vector,
        result_transformer_= lambda r: r.to_df()
    )

    chunk_ids = list(set(results['chunk_id'].to_list()))

    results = results.to_json(orient="records")
    parsed = loads(results)
    context = dumps(parsed, indent=2)

    definition_query = """    
        MATCH (c:Chunk)-[:MENTIONS]->(d:Definition)
        WHERE c.id in $chunk_ids
        RETURN DISTINCT d.term as term, d.description as description
    """
    results = driver.execute_query(
        definition_query,
        database_=DATABASE,
        routing_=RoutingControl.READ,
        chunk_ids=chunk_ids,
        result_transformer_= lambda r: r.to_df()
    )
    results = results.to_json(orient="records")
    parsed = loads(results)
    definitions = dumps(parsed, indent=2)
    return context, definitions

Function to retrieve the client name from a client id

### Prompts 

Prompt for vector search (without definitions)

In [12]:
def generate_prompt_vector_search(search_prompt, context):
    prompt_template = """

    You are a chatbot on Rabobank product. Your goal is to help people with questions on product policies.  
    A user will come to you with questions on their policy. Their questions must be answered based on the relevant documents of the policy.
    Respond in English. 
    
    The question is the following: 
    {search_prompt}
    
    Always respond in the language in which the question was asked. So, do not respond in a different language.
    
    The context is the following: 
    {context}

    Please end your message with listing your sources with file name and page number. 
    """
    prompt = PromptTemplate.from_template(prompt_template)
    
    theprompt = prompt.format_prompt(search_prompt=search_prompt, context=context)
    return theprompt

In [13]:
def generate_prompt_graphrag(search_prompt, context, definitions):
    prompt_template = """

    You are a chatbot on Rabobank product. Your goal is to help people with questions on product policies.  
    A user will come to you with questions on their policy. Their questions must be answered based on the relevant documents of the policy.
    Respond in English. 

    The question is the following: 
    {search_prompt}
    
    Always respond in the language in which the question was asked. So, do not respond in a different language.
    
    The context is the following: 
    {context}

    The definitions are the following: 
    {definitions}
    
    Please end your message with listing your sources with file name and page number. 
    """
    prompt = PromptTemplate.from_template(prompt_template)
    
    theprompt = prompt.format_prompt(search_prompt=search_prompt, context=context, definitions=definitions)
    return theprompt

Prompt for GraphRAG

## Some examples to test the models

For every example there can be chosen between GraphRAG and vector search. 

In [14]:
search_prompt = 'What is meant with the Rabofoon?'

context = get_context_vector_search(search_prompt)
theprompt = generate_prompt_vector_search(search_prompt, context)
llm(theprompt.to_messages()).pretty_print()


The Rabofoon is a service provided by Rabobank that allows you to perform banking activities over the phone. If you have access to Rabofoon and it is activated for you, you can give payment orders using the phone's keys. You provide permission for the payment order according to the instructions of Rabofoon, and the order is considered received once you press the confirmation key. Additionally, you can check your current balance, view transactions, and make transfers using Rabofoon. There are no subscription fees for maintaining an agreement for Rabofoon, but calling the Rabofoon number incurs a cost of €0.20 per call.

Sources:
- Payment and Online Services Terms Sept 2022.pdf, page 51
- Rabo SpaarRekening 2020.pdf, page 3


In [15]:
search_prompt = 'What is meant with the Rabofoon?'

context, definitions = get_context_graphrag(search_prompt)
theprompt = generate_prompt_graphrag(search_prompt, context, definitions)
llm(theprompt.to_messages()).pretty_print()


The "Rabofoon" is a service provided by Rabobank that allows you to give payment orders using the phone's keys. You give permission for the payment order according to the instructions of Rabofoon, and the payment order is received as soon as the confirmation key is pressed. This service can also be used to check the current balance and transactions or make transfers, and it requires a personal access code. There are no subscription fees for maintaining an agreement for the Rabofoon, but calling the Rabofoon incurs a cost of €0.20 per call.

**Sources:**
- Payment and Online Services Terms Sept 2022.pdf, page 51
- Rabo SpaarRekening 2020.pdf, page 3


In [16]:
search_prompt = 'What is merging?'

context = get_context_vector_search(search_prompt)
theprompt = generate_prompt_vector_search(search_prompt, context)
llm(theprompt.to_messages()).pretty_print()


Merging, in the context of Rabobank's policies, refers to the process where Rabobank can combine with another legal entity. This process is known as a merger. If Rabobank undergoes a merger, the legal successors of the merged entity can independently exercise all rights and powers against you and fulfill all obligations towards you. This means that the new entity formed from the merger will continue to uphold the rights and responsibilities that Rabobank had with its clients.

Sources:
- Rabo SpaarRekening 2020.pdf, Page 12
- Terms & Conditions for Online Business Services - April 2024.pdf, Page 25


In [25]:
search_prompt = 'Can I have overdraft on my account?'

context = get_context_vector_search(search_prompt)
theprompt = generate_prompt_vector_search(search_prompt, context)
llm(theprompt.to_messages()).pretty_print()


Yes, you can have an overdraft on your account even if you do not have credit. This is referred to as an unauthorized overdraft. You will be charged a variable interest rate on the amount overdrawn, which can change at any time. The interest is composed of several components, including a base rate, surcharges related to capital market developments, individual risk surcharges, ongoing costs, and a profit margin. If you overdraw your account, the amount is immediately due and payable, and you must repay it without any formal notice from the bank.

Sources:
- Payment and Online Services Terms Sept 2022.pdf, page 19
- Payment and Online Services Terms Sept 2022.pdf, page 60
- Payment and Online Services Terms Sept 2022.pdf, page 61


In [26]:
search_prompt = 'Can I have overdraft on my account?'

context, definitions = get_context_graphrag(search_prompt)
theprompt = generate_prompt_graphrag(search_prompt, context, definitions)
llm(theprompt.to_messages()).pretty_print()


Yes, you can have an overdraft on your account even if you do not have credit. This is referred to as an "unauthorized overdraft." For example, this can occur if the costs of the payment package are deducted from your account. You will be charged a variable interest on this unauthorized overdraft, which can be changed by the bank at any time. Additionally, the amount of the unauthorized overdraft is immediately due and payable, meaning you must repay it immediately without any notice of default or other formality.

Sources:
- Payment and Online Services Terms Sept 2022.pdf, page 19
- Payment and Online Services Terms Sept 2022.pdf, page 61


## Gradio Chatbot that uses RAG and GraphRAG

Example code is coming from Gradio documentation: [Creating a custom chatbot with blocks](https://www.gradio.app/guides/creating-a-custom-chatbot-with-blocks#add-streaming-to-your-chatbot)

In [19]:
def user(user_message, history):
    return "", history + [[user_message, None]]

def get_answer(search_prompt, rag_method):
    if rag_method == "Vector-Search":
        context = get_context_vector_search(search_prompt)
        theprompt = generate_prompt_vector_search(search_prompt, context)
    else: 
    # rag_method == "GraphRAG"
        context, definitions = get_context_graphrag(search_prompt)
        theprompt = generate_prompt_graphrag(search_prompt, context, definitions)
    messages = llm(theprompt.to_messages())
    return messages.content

def bot(history, rag_method):
    bot_message = get_answer(history[-1][0], rag_method)
    history[-1][1] = ""
    for character in bot_message:
        history[-1][1] += character
        time.sleep(0.01)
        yield history

with gr.Blocks() as demo:
    chatbot = gr.Chatbot(
        label="Chatbot with RAG", 
        avatar_images=["https://png.pngtree.com/png-vector/20220525/ourmid/pngtree-concept-of-facial-animal-avatar-chatbot-dog-chat-machine-illustration-vector-png-image_46652864.jpg","https://d-cb.jc-cdn.com/sites/crackberry.com/files/styles/larger/public/article_images/2023/08/openai-logo.jpg"]
    )
    msg = gr.Textbox(label="Message")
    rag_method = gr.Radio(["Vector-Search", "GraphRAG"], label="RAG-method:")
    clear = gr.Button("Clear")


    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
        bot, [chatbot, rag_method], chatbot
    )
    
    clear.click(lambda: None, None, chatbot, queue=False)

    
demo.queue()
demo.launch(share=False)

* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




If you want to have the light-mode for the chatbot paste the following after the URL: /?__theme=light