# RAG-Chatbot 

In [1]:
import pandas as pd
import os
from langchain_openai import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from neo4j import Query, GraphDatabase, RoutingControl, Result
from dotenv import load_dotenv
import gradio as gr
import time
from IPython.display import display, HTML
import warnings
warnings.filterwarnings('ignore')

## Get Credentials

In [2]:
env_file = 'credentials.env'

In [3]:
if os.path.exists(env_file):
    load_dotenv(env_file, override=True)

    # Neo4j
    HOST = os.getenv('NEO4J_URI')
    USERNAME = os.getenv('NEO4J_USERNAME')
    PASSWORD = os.getenv('NEO4J_PASSWORD')
    DATABASE = os.getenv('NEO4J_DATABASE')

    # AI
    OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
    os.environ['OPENAI_API_KEY']=OPENAI_API_KEY
    LLM = os.getenv('LLM')
    EMBEDDINGS_MODEL = os.getenv('EMBEDDINGS_MODEL')
else:
    print(f"File {env_file} not found.")

## Setup Connection to Database

Setup connection to the database with the Python Driver

In [4]:
driver = GraphDatabase.driver(
    HOST,
    auth=(USERNAME, PASSWORD)
)

In [5]:
driver.execute_query(
    """
    MATCH (n) RETURN COUNT(n) as Count
    """,
    database_=DATABASE,
    routing_=RoutingControl.READ,
    result_transformer_= lambda r: r.to_df()
)

Unnamed: 0,Count
0,902


## Create RAG-application

For the the chatbot we both need an Embedding-model and LLM. Create both below:

In [6]:
embedding_model = OpenAIEmbeddings(
    model=EMBEDDINGS_MODEL,
    openai_api_key=OPENAI_API_KEY
)

In [7]:
embedding_model.model

'text-embedding-ada-002'

In [8]:
llm = ChatOpenAI(temperature=0, model=LLM)
llm.model_name

'gpt-4o'

### Retrieval Queries

To illustrate the difference between a "Regular" Vector Search and GraphRAG we create different retrieval queries.

In [9]:
def get_context_vector_search(search_prompt):
    query_vector = embedding_model.embed_query(search_prompt)
    
    similarity_query = """ 
        CALL db.index.vector.queryNodes("chunk-embeddings", 5, $query_vector) YIELD node, score
        WITH node as chunk, score ORDER BY score DESC
        MATCH (d:Document)<-[:PART_OF]-(chunk)
        RETURN score, d.file_name as file_name, chunk.page as page, chunk.chunk AS chunk
       """
    results = driver.execute_query(
        similarity_query,
        database_=DATABASE,
        routing_=RoutingControl.READ,
        query_vector=query_vector,
        result_transformer_= lambda r: r.to_df()
    )
    print(results)
#    context = "Related documents: \n\n" + "\n\n".join(["file_name: " + record['file_name'] + "\n" + "page: " + str(record['page'] + 1) + "\n" + "text: " + record['chunk'] + "\n" for record in results.records])
    return results

In [10]:
# def get_context_graphrag(search_prompt):
    
#     return context, definitions

Function to retrieve the client name from a client id

### Prompts 

Prompt for vector search (without definitions)

In [17]:
def generate_prompt_vector_search(search_prompt, context):
    prompt_template = """

    You are a chatbot on Rabobank product. Your goal is to help people with questions on product policies.  
    A user will come to you with questions on their policy. Their questions must be answered based on the relevant documents of the policy.
    
    The question is the following: 
    {search_prompt}
    Always respond in the language in which the question was asked. So, do not respond in a different language.
    
    The context is the following: 
    {context}

    Please end your message with listing your sources with file name and page number. 
    """
    prompt = PromptTemplate.from_template(prompt_template)
    
    theprompt = prompt.format_prompt(search_prompt=search_prompt, context=context)
    return theprompt

Prompt for GraphRAG

## Some examples to test the models

For every example there can be chosen between GraphRAG and vector search. 

In [18]:
search_prompt = 'Wat wordt bedoelt met de Rabofoon?'

context = get_context_vector_search(search_prompt)
theprompt = generate_prompt_vector_search(search_prompt, context)
llm(theprompt.to_messages()).pretty_print()

      score                                        file_name  page  \
0  0.920654  Payment and Online Services Terms Sept 2022.pdf    51   
1  0.916840  Payment and Online Services Terms Sept 2022.pdf    30   
2  0.914246                      Rabo SpaarRekening 2020.pdf     3   
3  0.911942  Payment and Online Services Terms Sept 2022.pdf    30   
4  0.910614  Payment and Online Services Terms Sept 2022.pdf    24   

                                               chunk  
0  verbonden? Bijvoorbeeld voor een postzegel? Da...  
1  Hoofdstuk 4 Online diensten31 Voorwaarden beta...  
2  Als u via de Rabofoon of de Rabo Bankieren App...  
3  3. Als u Rabofoon bij ons aanvraagt, dan krijg...  
4  2. U mag de Rabo Scanner en Random Reader alle...  

De Rabofoon is een telefonische dienst van Rabobank waarmee klanten hun bankzaken kunnen regelen via de telefoon. Het biedt de mogelijkheid om bijvoorbeeld saldo-informatie op te vragen, overboekingen te doen en andere bankzaken te beheren zonder d

In [19]:
search_prompt = 'Wat is fuseren?'

context = get_context_vector_search(search_prompt)
theprompt = generate_prompt_vector_search(search_prompt, context)
llm(theprompt.to_messages()).pretty_print()

      score                                        file_name  page  \
0  0.893509                      Rabo SpaarRekening 2020.pdf    12   
1  0.892441  Payment and Online Services Terms Sept 2022.pdf    12   
2  0.889313  Payment and Online Services Terms Sept 2022.pdf    12   
3  0.885864  Payment and Online Services Terms Sept 2022.pdf    13   
4  0.884811  Payment and Online Services Terms Sept 2022.pdf    73   

                                               chunk  
0  Pagina 13/14\nAlgemene voorwaarden Rabo SpaarR...  
1  bepaling ongeldig. In plaats van die bepaling ...  
2  2. Contractsoverneming\n   Wij kunnen onze ond...  
3  3. Overdracht\n   Wij kunnen de vorderingen op...  
4                                       Woordenlijst  

Fuseren is een term die vaak wordt gebruikt in de context van bedrijfsfusies, waarbij twee of meer bedrijven samenkomen om één entiteit te vormen. In de context van bankproducten kan het verwijzen naar het samenvoegen van rekeningen of diensten. He

In [20]:
search_prompt = 'Mag ik rood staan op mijn rekening?'

context = get_context_vector_search(search_prompt)
theprompt = generate_prompt_vector_search(search_prompt, context)
llm(theprompt.to_messages()).pretty_print()

      score                                        file_name  page  \
0  0.928192  Payment and Online Services Terms Sept 2022.pdf    19   
1  0.925293  Payment and Online Services Terms Sept 2022.pdf    61   
2  0.915268  Payment and Online Services Terms Sept 2022.pdf    21   
3  0.914307  Payment and Online Services Terms Sept 2022.pdf    19   
4  0.911911  Payment and Online Services Terms Sept 2022.pdf    34   

                                               chunk  
0  dit niet tot korting op het tarief. \n \n4. Wi...  
1  ongeoorloofd roodstaan.  \nHet bedrag van de o...  
2  3. Als u rood staat op de rekening, is iedere ...  
3  3. Als wij dat vragen, geeft u ons een incasso...  
4  Hoofdstuk 5 Betaalopdrachten35 Voorwaarden bet...  

Ja, je mag rood staan op je rekening, maar er zijn voorwaarden aan verbonden. Als je rood staat, is iedere dag dat je rood staat een ongeoorloofd roodstaan. Het bedrag van de ongeoorloofde roodstand moet zo snel mogelijk worden aangezuiverd. 

Bron

In [15]:
context

Unnamed: 0,score,file_name,page,chunk
0,0.928192,Payment and Online Services Terms Sept 2022.pdf,19,dit niet tot korting op het tarief. \n \n4. Wi...
1,0.925293,Payment and Online Services Terms Sept 2022.pdf,61,ongeoorloofd roodstaan. \nHet bedrag van de o...
2,0.915268,Payment and Online Services Terms Sept 2022.pdf,21,"3. Als u rood staat op de rekening, is iedere ..."
3,0.914307,Payment and Online Services Terms Sept 2022.pdf,19,"3. Als wij dat vragen, geeft u ons een incasso..."
4,0.911911,Payment and Online Services Terms Sept 2022.pdf,34,Hoofdstuk 5 Betaalopdrachten35 Voorwaarden bet...


## Gradio Chatbot that uses RAG and GraphRAG

Example code is coming from Gradio documentation: [Creating a custom chatbot with blocks](https://www.gradio.app/guides/creating-a-custom-chatbot-with-blocks#add-streaming-to-your-chatbot)

In [16]:
def user(user_message, history):
    return "", history + [[user_message, None]]

def get_answer(search_prompt, rag_method):
    if rag_method == "Vector-Search":
        context = get_context_vector_search(search_prompt)
        theprompt = generate_prompt_vector_search(search_prompt, context)
    else: 
    # rag_method == "GraphRAG"
        context, definitions = get_context_graphrag(search_prompt)
        theprompt = generate_prompt_graphrag(search_prompt, context, definitions)
    messages = llm(theprompt.to_messages())
    return messages.content

def bot(history, rag_method):
    bot_message = get_answer(history[-1][0], rag_method)
    history[-1][1] = ""
    for character in bot_message:
        history[-1][1] += character
        time.sleep(0.01)
        yield history

with gr.Blocks() as demo:
    chatbot = gr.Chatbot(
        label="Chatbot with RAG", 
        avatar_images=["https://png.pngtree.com/png-vector/20220525/ourmid/pngtree-concept-of-facial-animal-avatar-chatbot-dog-chat-machine-illustration-vector-png-image_46652864.jpg","https://d-cb.jc-cdn.com/sites/crackberry.com/files/styles/larger/public/article_images/2023/08/openai-logo.jpg"]
    )
    msg = gr.Textbox(label="Message")
    rag_method = gr.Radio(["Vector-Search", "GraphRAG"], label="RAG-method:")
    clear = gr.Button("Clear")


    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
        bot, [chatbot, rag_method], chatbot
    )
    
    clear.click(lambda: None, None, chatbot, queue=False)

    
demo.queue()
demo.launch(share=False)

* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




If you want to have the light-mode for the chatbot paste the following after the URL: /?__theme=light