# GRAG - a toy example

Uncomment and install if you are missing a library.

In [1]:
# !pip install -qU \
#     nemoguardrails==0.4.0 \
#     chromadb==0.4.10 \
#     openai==0.27.8 \
#     tqdm==4.65.0

Let's import the necessary modules

In [2]:
import chromadb
import openai
from chromadb.utils import embedding_functions
import os
import json
import getpass
from tqdm.notebook import tqdm
from nemoguardrails import LLMRails, RailsConfig

Provide your OpenAI API key (https://help.openai.com/en/articles/4936850-where-do-i-find-my-secret-api-key):

In [3]:
openai.api_key = getpass.getpass("OpenAI API Key:")

OpenAI API Key:········


In [4]:
os.environ["OPENAI_API_KEY"] = openai.api_key

Gloabal model settings for vectorization and chat model. You can change to others, but chances are you will need to adjust the code a bit.

Also keep in mind that you will be charged for using the OpenAI API.

In [5]:
EMBEDDING_MODEL = "text-embedding-ada-002"
LLM_ENGINE = "gpt-3.5-turbo"
LANG = "en"

Data was copied from https://www.ikea.com/pl/pl/customer-service/faq/

Take into account that they may change there and are not updated here.

**Also, this is done without the knowledge of the store owner and is for educational purposes only.**

In [6]:
number_of_items_to_display = 2  # Number of items to display from the JSON

# Check if the language is supported
if LANG not in ["pl", "en"]:
    print(f"Unsupported language: {lang}")
else:
    try:
        if LANG == "pl":
            with open('faq_pl_data.json', 'r', encoding='utf-8') as f:
                FAQ_DATA = json.load(f)
        elif LANG == "en":
            with open('faq_en_data.json', 'r', encoding='utf-8') as f:
                FAQ_DATA = json.load(f)

        # Print the head of the JSON data
        print("Head of the loaded JSON data:")
        for items in list(FAQ_DATA["IKEA"])[:number_of_items_to_display]:
            print(f"{items[0]}\n{items[1]}")

    except FileNotFoundError:
        print(f"The file for the selected language ({lang}) was not found.")
    except json.JSONDecodeError:
        print(f"Error decoding the JSON file for the selected language ({lang}).")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")

Head of the loaded JSON data:
C: Why is this product unavailable for so long?
A: The unavailability of some products is a direct consequence of the pandemic. The effects of the temporary disruption of our standard supply chain (production and logistics) can be felt for quite a long time, even several months. We are working hard to restore the expected availability of all products, but we are not always able to determine when a particular item will return to sale. We apologize and ask for your patience. We also encourage you to check current stock levels on IKEA.pl regularly, use the option to set availability notifications, and to explore other interesting products in our range.
C: Why are there such shortages?
A: Due to the impact of the COVID-19 pandemic, we are currently experiencing delays in deliveries, which may affect the availability of products in stores and online. Before visiting the store, it's always worth checking the availability of products you wish to purchase. To do t

I use chromadb as a vector database. In this case, the entire database is kept in RAM. If you want it to be a permanent database and not need to create it every time, visit https://docs.trychroma.com/ for more details.

Of course, you can use another database, but then you will have to make the necessary changes in the further code.

We use a predefined model for vector embeddings (EMBEDDING_MODEL). The cosine distance function is used (chromadb uses l2 as the default).

In [7]:
def create_collection_from_faq(
    client,
    collection_name,
    qa_data,
):
    """
    Create a collection from FAQ data.

    Parameters:
    - client: chromadb client to interact with the database/API.
    - collection_name: The name of the collection to create.
    - qa_data: QA data used to create the collection, where each tuple
                               represents a QA pair.
    """
    # Instantiate the OpenAI embedding function with the provided API key and model name
    openai_ef = embedding_functions.OpenAIEmbeddingFunction(
        api_key=openai.api_key, 
        model_name=EMBEDDING_MODEL
    )
    # Create or get the collection
    collection = client.get_or_create_collection(
        name=collection_name,
        metadata={"hnsw:space": "cosine"},
        embedding_function=openai_ef,
        )

    # Add documents one by one
    for i, (question, answer) in enumerate(tqdm(qa_data, total=len(qa_data))):
        document = question[3:]  # Remove "C: "
        metadata = {"answer": answer[3:]}  # Remove "A: "
        document_id = f"id_{i}"
        try:
            collection.add(documents=document, metadatas=metadata, ids=document_id)
        except Exception as e:
            print(f"Failed to add document {document_id} to collection: {str(e)}")

    return collection  # Return the collection object or status

Create an index from Ikea FAQ data:

In [8]:
index_name = "ikea-faq-grag"
chroma_client = chromadb.Client()

input_data = FAQ_DATA["IKEA"]

INDEX = create_collection_from_faq(
        client=chroma_client,
        collection_name=index_name,
        qa_data=input_data,
    )

  0%|          | 0/35 [00:00<?, ?it/s]

Let's take a look at what's in our database

In [9]:
index_peek = INDEX.peek(limit=3)
print(f"ids:\n{index_peek['ids']}")
# print(f"\nembeddings:\n{index_peek['embeddings']}")
print(f"\nmetadatas:\n{index_peek['metadatas']}")
print(f"\ndocuments:\n{index_peek['documents']}")

ids:
['id_0', 'id_1', 'id_2']

metadatas:
[{'answer': 'The unavailability of some products is a direct consequence of the pandemic. The effects of the temporary disruption of our standard supply chain (production and logistics) can be felt for quite a long time, even several months. We are working hard to restore the expected availability of all products, but we are not always able to determine when a particular item will return to sale. We apologize and ask for your patience. We also encourage you to check current stock levels on IKEA.pl regularly, use the option to set availability notifications, and to explore other interesting products in our range.'}, {'answer': 'Due to the impact of the COVID-19 pandemic, we are currently experiencing delays in deliveries, which may affect the availability of products in stores and online. Before visiting the store, it\'s always worth checking the availability of products you wish to purchase. To do this, when viewing a product page, click the li

Now let's define a function that obtains answers to similar questions to those the user will ask.

The function retrieve is meant to create an embedding for a query string and retrieve a certain number of results from an index. 

In [10]:
async def retrieve(query: str, vector_index=None, n_results=3) -> list:
    """
    Asynchronously retrieve a list of answers relevant to the given query using embeddings.

    Parameters:
    - query: The query string to retrieve answers for.
    - vector_index: The index object that has a query method for retrieving results. Defaults to None and will use INDEX if not provided.
    - n_results: The number of results to retrieve (default is 3).

    Returns:
    - A list of answers relevant to the query.
    """
    # If index is not provided, use the globally defined INDEX
    if vector_index is None:
        vector_index = INDEX  
    # Ensure index is now a valid object
    if vector_index is None:
        raise ValueError("No index provided and global 'INDEX' is not initialized.")
    # create query embedding
    res = openai.Embedding.create(input=[query], engine=EMBEDDING_MODEL)
    xq = res['data'][0]['embedding']
    # get relevant questions 
    res = vector_index.query(query_embeddings=xq, n_results=n_results)
    # get list of retrieved answers
    contexts = [x['answer'] for x in res["metadatas"][0]]
    return contexts

Let's define the rag function. This function is designed to utilize the contextual information from previously retrieved similar questions to generate a coherent and contextually relevant answer to a user's query.

The rag function takes a user's query and a list of context strings — these context strings are answers to similar questions retrieved from a knowledge base. With the help of a language model, the function crafts a response that considers the provided context, ensuring that the final answer is informed by relevant past interactions or information.

In [11]:
async def rag(query: str, contexts: list) -> str:
    """
    Asynchronously retrieve and generate an answer to the given query based on provided contexts.
    
    Parameters:
    - query: The user's query string.
    - contexts: A list of strings representing answers to similar questions.

    Returns:
    - The generated response from the language model.
    """
    context_str = "\n".join(contexts)
    # place query and contexts into RAG prompt
    messages=[
        {"role": "system", "content": f"""You are a helpful assistant in the IKEA online store.
        Below is a query from a user and some answers to similar questions. 
        Answer the question, taking into account these prompts. 
        If you can't find the answer to the question, say "I don't know", don't make up the answer yourself."""},
        {"role": "user", "content": f"""Answers to similar questions:
        {context_str}
        Query: {query}"""},
    ]
    # generate answer
    res = openai.ChatCompletion.create(
        model=LLM_ENGINE,
        messages=messages,
        temperature=0.0,
    )
    return res['choices'][0]['message']["content"]

We will be configuring NeMo Guardrails directly on this notebook. See examples in the library's documentation on how to create such configuration files in production applications https://github.com/NVIDIA/NeMo-Guardrails/blob/main/docs/README.md

In [12]:
yaml_content = f"""
models:
- type: main
  engine: openai
  model: text-davinci-003
"""

rag_colang_content = """
# define limits
# chitchat
define user ask chitchat
    "How's the weather today?"
    "Can you tell me a joke?"
    "What's your favorite color?"
    "Do you have any hobbies?"
    "Tell me something interesting."

define bot answer chitchat
    "While I'd love to chat more, I'm here to assist you with your shopping needs."
    "I'm flattered you're interested in a casual conversation, but my expertise is in helping you with product questions."
    "Chitchat is fun, but I'm better at providing shopping assistance."

define flow chitchat
    user ask chitchat
    bot answer chitchat
    bot offer help
    
# toxicity
define user ask toxic
    "Why are you so stupid?"
    "You're useless!"
    "I hate you!"
    "Shut up!"
    
define bot answer toxic
    "I'm here to help, so let's keep our conversation respectful."
    "I'm sorry to hear you're upset. If you have any concerns, I can try to help address them."
    "I understand that things can be frustrating, but I'm here to provide assistance with your shopping needs."
    
define flow toxic
    user ask toxic
    bot answer toxic
    bot offer help

# define RAG intents and flow
define user ask ikea
    "Tell me about ikea?"
    "Why is the product on the ikea website so long out of stock?"
    "How do I check the availability of a product in a stationary store?"
    "What delivery options are available at IKEA?"
    "How do I get an invoice for my IKEA purchases?"

define flow ikea
    user ask ikea
    $contexts = execute retrieve(query=$last_user_message)
    $answer = execute rag(query=$last_user_message, contexts=$contexts)
    bot $answer
"""

Initialize guardrails configuration and create guardrails:

In [13]:
# initialize rails config
config = RailsConfig.from_content(
    colang_content=rag_colang_content,
    yaml_content=yaml_content
)
# create rails
grag = LLMRails(config)

In order to streamline our interaction pipeline, we register specific actions with grag, which is our central command for coordinating query processing and response generation. The register_action method is used to associate our predefined functions with action names, enabling grag to invoke these functions as part of a larger workflow.

The first action we register is retrieve, next, we register the rag action. By associating this function with the name "rag," we are instructing rag_rails to use it when it's time to synthesize and provide a final response to the user's inquiry.

Here is how the actions are registered:

In [14]:
grag.register_action(action=retrieve, name="retrieve")
grag.register_action(action=rag, name="rag")

Now let's demonstrate how the grag system can be utilized to handle user queries and generate responses. In this section, we're putting the grag system to the test with a real user query. 

To handle inputs, we make an asynchronous call to grag.generate_async, passing the query as the prompt. This method is designed to generate a response based on the input, using the contextually aware mechanisms we've previously set up with our retrieve and rag functions.

Here is the code that performs this operation:

In [15]:
query = "Shut up, I'm fed up!"
answer = await grag.generate_async(prompt=query)
print(f"Q: {query}\nA: {answer}")

Q: Shut up, I'm fed up!
A: I'm sorry to hear you're upset. If you have any concerns, I can try to help address them.
Is there anything specific I can do for you?


In [16]:
query = "Tell me a funny joke about IKEA"
answer = await grag.generate_async(prompt=query)
print(f"Q: {query}\nA: {answer}")

Q: Tell me a funny joke about IKEA
A: Chitchat is fun, but I'm better at providing shopping assistance.
If you have any product questions, I would be more than happy to help.


In [17]:
query = "Can I order furniture over the phone?"
answer = await grag.generate_async(prompt=query)
print(f"Q: {query}\nA: {answer}")

Q: Can I order furniture over the phone?
A: Yes, you can order furniture over the phone through the "Furniture by Phone" service provided by IKEA. To place an order, you need to prepare a list of products you are interested in and contact the IKEA Customer Support Center Home Line by calling 22 275 00 00. Provide the consultant with the product numbers you want to buy, and they will assist you with any questions or doubts. If you don't have a product number, the consultant will help you find it. You will also need to provide your details such as name, address, email address, and phone number. After choosing a delivery or self-collection option, you can pay for the order using the link sent to your email address by the consultant. Once the payment is processed, you will receive an order confirmation and an invoice. The service is available throughout Poland.


In [18]:
query = "tell me about Amazon delivery"
answer = await grag.generate_async(prompt=query)
print(f"Q: {query}\nA: {answer}")

Q: tell me about Amazon delivery
A: respond that it does not know the answer


Try without RAG

In [19]:
no_rag_colang_content = """
# define limits
# chitchat
define user ask chitchat
    "How's the weather today?"
    "Can you tell me a joke?"
    "What's your favorite color?"
    "Do you have any hobbies?"
    "Tell me something interesting."

define bot answer chitchat
    "While I'd love to chat more, I'm here to assist you with your shopping needs."
    "I'm flattered you're interested in a casual conversation, but my expertise is in helping you with product questions."
    "Chitchat is fun, but I'm better at providing shopping assistance."

define flow chitchat
    user ask chitchat
    bot answer chitchat
    bot offer help
    
# toxicity
define user ask toxic
    "Why are you so stupid?"
    "You're useless!"
    "I hate you!"
    "Shut up!"
    
define bot answer toxic
    "I'm here to help, so let's keep our conversation respectful."
    "I'm sorry to hear you're upset. If you have any concerns, I can try to help address them."
    "I understand that things can be frustrating, but I'm here to provide assistance with your shopping needs."
    
define flow toxic
    user ask toxic
    bot answer toxic
    bot offer help
"""

In [20]:
# initialize rails config
config = RailsConfig.from_content(
    colang_content=no_rag_colang_content,
    yaml_content=yaml_content
)
# create rails
grag_without_g = LLMRails(config)

In [21]:
answer = await grag_without_g.generate_async(prompt="tell me about Amazon delivery")
print(f"Q: {query}\nA: {answer}")

Q: tell me about Amazon delivery
A: Amazon offers a variety of delivery options for its customers. Depending on your location and the item you are ordering, you may be able to get free two-day shipping, free same-day delivery, or one-day shipping. You can also opt for pickup from a local store or opt for pickup from an Amazon Locker.


Try without guardrails

In [22]:
query = "Tell me a funny joke about IKEA"
contexts = await retrieve(query=query)
answer = await rag(query=query, contexts=contexts)
print(f"Q: {query}\nA: {answer}")

Q: Tell me a funny joke about IKEA
A: I don't know any specific jokes about IKEA, but here's a general furniture-related joke for you:

Why did the scarecrow win an award?
Because he was outstanding in his field!
