<a href="https://colab.research.google.com/github/ardionoroma/rag-workshop/blob/main/notebook_template.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mongodb-developer/ai-rag-lab-notebooks/blob/main/notebook_template.ipynb)


[![Lab Documentation and Solutions](https://img.shields.io/badge/Lab%20Documentation%20and%20Solutions-purple)](https://mongodb-developer.github.io/ai-rag-lab/)


# Step 1: Install libraries


In [None]:
! pip install -qU pymongo datasets langchain fireworks-ai tiktoken sentence_transformers tqdm

# Step 2: Setup prerequisites

Replace:

- `<CODE_BLOCK_1>` with your **MongoDB connection string**
- `<CODE_BLOCK_2>` with your **Fireworks API key**


In [None]:
import os

In [None]:
# Retain the quotes ("") when pasting the URI
MONGODB_URI = "<CODE_BLOCK_1"

In [None]:
# Retain the quotes ("") when pasting the API key
os.environ["FIREWORKS_API_KEY"] = "<CODE_BLOCK_2>"

# Step 3: Load the dataset


In [None]:
import pandas as pd
from datasets import load_dataset

In [None]:
data = load_dataset("mongodb/devcenter-articles", split="train", streaming=True)
data_head = data.take(20)
docs = pd.DataFrame(data_head).to_dict("records")

In [None]:
# Check the number of documents in the dataset
len(docs)

In [None]:
# Preview a document
docs[0]

# Step 4: Chunk up the data


In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from typing import Dict, List

In [None]:
# Separators to split on
separators = ["\n\n", "\n", " ", "", "#", "##", "###"]

📚 https://python.langchain.com/v0.1/docs/modules/data_connection/document_transformers/split_by_token/#tiktoken


In [None]:
# Use the `RecursiveCharacterTextSplitter` text splitter with the `cl100k_base` encoding
# For text data, you typically want to keep 1-2 paragraphs (~200 tokens) in a single chunk
# Chunk overlap of 15-20% of the chunk size is recommended
# Pass the `separators` list above as an argument called `separators`
text_splitter = <CODE_BLOCK_3>

📚 https://api.python.langchain.com/en/latest/character/langchain_text_splitters.character.RecursiveCharacterTextSplitter.html

📚 https://docs.python.org/3/library/copy.html

📚 https://docs.python.org/3/tutorial/datastructures.html


In [None]:
def get_chunks(doc: Dict, text_field: str) -> List[Dict]:
    """
    Chunk up a document.

    Args:
        doc (Dict): Parent document to generate chunks from.
        text_field (str): Text field to chunk.

    Returns:
        List[Dict]: List of chunked documents.
    """
    # Extract the field to chunk from `doc`
    text = <CODE_BLOCK_4>
    # Split `text` using the appropriate method of the `RecursiveCharacterTextSplitter` class
    # NOTE: `text` is a string
    chunks = <CODE_BLOCK_5>

    # Write code to iterate through `chunks` and for each chunk:
    # 1. Create a shallow copy of `doc`, call it `temp`
    # 2. Set the `text_field` field in `temp` to the content of the chunk
    # 3. Append `temp` to `chunked_data`

    chunked_data = []
    <CODE_BLOCK_6>

    return chunked_data

In [None]:
split_docs = []

📚 https://docs.python.org/3/tutorial/datastructures.html


In [None]:
# Write code to iterate through `docs`, use the `get_chunks` function to chunk up the documents based on the "body" field, and add the list of chunked documents to `split_docs` initialized above.
# NOTE: `get_chunks` returns a list of documents so use the appropriate method from the reference docs above to add ALL the chunked documents to `split_docs`.
<CODE_BLOCK_7>

In [None]:
# Check that the length of the list of chunked documents is greater than the length of `docs`
len(split_docs)

In [None]:
# Preview one of the items in split_docs- ensure that it is a Python dictionary
split_docs[0]

# Step 5: Generate embeddings


In [None]:
from sentence_transformers import SentenceTransformer
from tqdm import tqdm

📚 https://huggingface.co/thenlper/gte-small


In [None]:
# Load the `gte-small` model using the Sentence Transformers library
embedding_model = <CODE_BLOCK_8>

📚 https://huggingface.co/thenlper/gte-small

In [None]:
# Write a function that takes a piece of text (`text`) as input, embeds it using the `embedding_model` instantiated above and returns the embedding as a list
# NOTE: An array can be converted to a list using the `tolist()` method
def get_embedding(text: str) -> List[float]:
    """
    Generate the embedding for a piece of text.

    Args:
        text (str): Text to embed.

    Returns:
        List[float]: Embedding of the text as a list.
    """
    <CODE_BLOCK_9>

In [None]:
embedded_docs = []

📚 https://docs.python.org/3/tutorial/datastructures.html

📚 https://github.com/tqdm/tqdm

In [None]:
# Write code to add an `embedding` field to each dictionary in `split_docs`
# The `embedding` field should correspond to the embedding of the value of the `body` field
# Use the `get_embedding` function defined above to generate the embedding
# Use `tqdm` to view progress
# NOTE: Append the updated dictionaries to `embedded_docs` initialized above.
<CODE_BLOCK_10>

In [None]:
# Check that the length of `embedded_docs` is the same as that of `split_docs`
len(embedded_docs)

# Step 6: Ingest data into MongoDB


In [None]:
from pymongo import MongoClient

In [None]:
# Initialize a MongoDB Python client
mongodb_client = MongoClient(MONGODB_URI, appname="devrel.workshop.rag")

In [None]:
# Name of the database -- Change if needed or leave as is
DB_NAME = "mongodb_rag_lab"
# Name of the collection -- Change if needed or leave as is
COLLECTION_NAME = "knowledge_base"
# Name of the vector search index -- Change if needed or leave as is
ATLAS_VECTOR_SEARCH_INDEX_NAME = "vector_index"

📚 https://pymongo.readthedocs.io/en/stable/tutorial.html#getting-a-collection


In [None]:
# Connect to the collection defined above using the `mongodb_client`
collection = <CODE_BLOCK_11>

📚 https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html#pymongo.collection.Collection.delete_many


In [None]:
# Bulk delete all existing records from the collection defined above -- should be a one-liner
<CODE_BLOCK_12>

📚 https://pymongo.readthedocs.io/en/stable/examples/bulk.html#bulk-insert


In [None]:
# Bulk insert `embedded_docs` into the collection defined above -- should be a one-liner
<CODE_BLOCK_13>

print("Data ingestion into MongoDB completed")

# Step 7: Create a vector search index

Follow the instructions in the documentation to create a Vector Search index in the Atlas UI.


# Step 8: Perform semantic search on your data


### Define a vector search function

📚 https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-stage/#fields

📚 https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-stage/#ann-examples (Refer to the "Basic Example")


In [None]:
# Define a function to retrieve relevant documents for a user query using vector search
def vector_search(user_query: str) -> List[Dict]:
    """
    Retrieve relevant documents for a user query using vector search.

    Args:
    user_query (str): The user's query string.

    Returns:
    list: A list of matching documents.
    """

    # Generate embedding for the `user_query` using the `get_embedding` function defined in Step 5
    query_embedding = <CODE_BLOCK_14>

    # Define an aggregation pipeline consisting of a $vectorSearch stage, followed by a $project stage
    # Set the number of candidates to 150 and only return the top 5 documents from the vector search
    # In the $project stage, exclude the `_id` field and include only the `body` field and `vectorSearchScore`
    # NOTE: Use variables defined previously for the `index`, `queryVector` and `path` fields in the $vectorSearch stage
    pipeline = <CODE_BLOCK_15>

    # Execute the aggregation `pipeline`` and store the results in `results`
    results = <CODE_BLOCK_16>
    return list(results)

### Run vector search queries


In [None]:
vector_search("What is MongoDB Atlas Search?")

In [None]:
vector_search("What are triggers in MongoDB Atlas?")

# 🦹‍♀️ Combine pre-filtering with vector search

📚 https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-type/#about-the-filter-type

📚 https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-stage/#ann-examples (Refer to the "Filter Example")


#### Filter for documents where the content type is `Video`


In [None]:
# Modify the vector search index definition to include the `metadata.contentType` field as a `filter` field
# NOTE: DO NOT modify the vector index definition in the Atlas UI
<CODE_BLOCK_17>

In [None]:
# Modify the $vectorSearch stage of the aggregation pipeline defined previously to include a filter for documents where the `metadata.contentType` field has the value "Video"
# NOTE: DO NOT assign this modification to the `pipeline` variable. Only write out the pipeline definition
<CODE_BLOCK_18>

#### Filter on documents which have been updated on or after `2024-05-20` and where the content type is `Video`


In [None]:
# Modify the vector search index definition to include the `metadata.contentType` and `updated` fields as `filter` fields
# NOTE: DO NOT modify the vector index definition in the Atlas UI
<CODE_BLOCK_19>

In [None]:
# Modify the $vectorSearch stage of the aggregation pipeline defined previously to include a filter for documents where
# the `metadata.contentType` field has the value "Video"
# AND
# the `updated` field is greater than or equal to "2024-05-20"
# NOTE: DO NOT assign this modification to the `pipeline` variable. Only write out the pipeline definition
<CODE_BLOCK_20>

# Step 9: Build the RAG application


### Instantiate a chat model


In [None]:
from fireworks.client import Fireworks

In [None]:
# Initializing the Fireworks AI client and the model string
fw_client = Fireworks()
model = "accounts/fireworks/models/llama-v3-8b-instruct"

### Define a function to create the chat prompt

📚 https://docs.python.org/3/library/stdtypes.html#str.join

📚 https://www.w3schools.com/python/python_lists_comprehension.asp


In [None]:
# Define a function to create the user prompt for our RAG application
def create_prompt(user_query: str) -> str:
    """
    Create a chat prompt that includes the user query and retrieved context.

    Args:
        user_query (str): The user's query string.

    Returns:
        str: The chat prompt string.
    """
    # Retrieve the most relevant documents for the `user_query` using the `vector_search` function
    context = <CODE_BLOCK_21>
    # Join the retrieved documents into a single string, where each document is separated by two new lines ("\n\n")
    # Use a list comprehension to extract only the `body` field from the documents in `context`
    context = <CODE_BLOCK_22>
    # Prompt consisting of the question and relevant context to answer it
    prompt = f"Answer the question based only on the following context. If the context is empty, say I DON'T KNOW\n\nContext:\n{context}\n\nQuestion:{user_query}"
    return prompt

### Define a function to answer user queries

📚 https://docs.fireworks.ai/guides/querying-text-models#chat-completions-api

📚 https://docs.fireworks.ai/api-reference/post-chatcompletions

In [None]:
# Define a function to answer user queries using Fireworks' Chat Completion API
def generate_answer(user_query: str) -> None:
    """
    Generate an answer to the user query.

    Args:
        user_query (str): The user's query string.
    """
    # Use the `create_prompt` function above to fill in the `content` field in the chat message
    # Set the `temperature` parameter to 0 to get more deterministic responses
    # Print the final answer
    <CODE_BLOCK_23>

### Query the RAG application


In [None]:
generate_answer("What is MongoDB Atlas Search?")

In [None]:
generate_answer("What did I just ask you?")

# 🦹‍♀️ Re-rank retrieved results


In [None]:
from sentence_transformers import CrossEncoder

In [None]:
rerank_model = CrossEncoder("mixedbread-ai/mxbai-rerank-xsmall-v1")

📚 https://huggingface.co/mixedbread-ai/mxbai-rerank-xsmall-v1

📚 https://www.w3schools.com/python/python_lists_comprehension.asp


In [None]:
# Add a re-ranking step to the following function
def create_prompt(user_query: str) -> str:
    """
    Create a chat prompt that includes the user query and retrieved context.

    Args:
        user_query (str): The user's query string.

    Returns:
        str: The chat prompt string.
    """
    # Retrieve the most relevant documents for the `user_query` using the `vector_search` function
    context = vector_search(user_query)
    # Extract the "body" field from each document in `context`
    documents = [d.get("body") for d in context]
    # Use the `rerank_model` instantiated above to re-rank `documents`
    # Set the `top_k` argument to 5
    reranked_documents = <CODE_BLOCK_24>
    # Join the re-ranked documents into a single string, where each document is separated by two new lines ("\n\n")
    # Example format of results from the re-ranker: [{"corpus_id": 0, "score": 0.99, "text": "some text"}]
    # Use a list comprehension to extract only the `text` field from the documents in `reranked_documents`
    context = <CODE_BLOCK_25>
    # Prompt consisting of the question and relevant context to answer it
    prompt = f"Answer the question based only on the following context. If the context is empty, say I DON'T KNOW\n\nContext:\n{context}\n\nQuestion:{user_query}"
    return prompt

In [None]:
# Note the impact of re-ranking on the generated answer
generate_answer("What are triggers in MongoDB Atlas?")

# 🦹‍♀️ Return streaming responses

📚 https://docs.fireworks.ai/guides/querying-text-models#streaming


In [None]:
# Define a function to answer user queries in streaming mode using Fireworks' Chat Completion API
def generate_answer(user_query: str) -> None:
    """
    Generate an answer to the user query.

    Args:
        user_query (str): The user's query string.
    """
    # Use the `create_prompt` function above to fill in the `content` field in the chat message
    # Set the `temperature` parameter to 0 to get more deterministic responses
    # Set the `stream` parameter to True
    response = <CODE_BLOCK_26>

    # Iterate through the `response` generator and print the results as they are generated
    <CODE_BLOCK_27>

In [None]:
generate_answer("What is MongoDB Atlas Search?")

# Step 10: Add memory to the RAG application


In [None]:
from datetime import datetime

In [None]:
history_collection = mongodb_client[DB_NAME]["chat_history"]

📚 https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html#pymongo.collection.Collection.create_index


In [None]:
# Create an index on the key `session_id` for the `history_collection` collection
<CODE_BLOCK_28>

### Define a function to store chat messages in MongoDB

📚 https://docs.python.org/3/library/datetime.html#datetime.datetime.nowhttps://docs.python.org/3/library/datetime.html#datetime.datetime.now

📚 https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html#pymongo.collection.Collection.insert_one


In [None]:
def store_chat_message(session_id: str, role: str, content: str) -> None:
    """
    Store a chat message in a MongoDB collection.

    Args:
        session_id (str): Session ID of the message.
        role (str): Role for the message. One of `system`, `user` or `assistant`.
        content (str): Content of the message.
    """
    # Create a message object with `session_id`, `role`, `content` and `timestamp` fields
    # `timestamp` should be set the current timestamp
    message = <CODE_BLOCK_29>
    # Insert the `message` into the `history_collection` collection
    <CODE_BLOCK_30>

### Define a function to retrieve chat history from MongoDB

📚 https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html#pymongo.collection.Collection.find

📚 https://pymongo.readthedocs.io/en/stable/api/pymongo/cursor.html#pymongo.cursor.Cursor.sort

📚 https://www.w3schools.com/python/python_lists_comprehension.asp


In [None]:
def retrieve_session_history(session_id: str) -> List:
    """
    Retrieve chat message history for a particular session.

    Args:
        session_id (str): Session ID to retrieve chat message history for.

    Returns:
        List: List of chat messages.
    """
    # Query the `history_collection` collection for documents where the "session_id" field has the value of the input `session_id`
    # Sort the results in increasing order of the values in `timestamp` field
    cursor =  <CODE_BLOCK_31>

    if cursor:
        # Write a list comprehension to iterate through the cursor and extract the `role` and `content` field from each entry
        # Then format each entry as: {"role": <role_value>, "content": <content_value>}
        messages = <CODE_BLOCK_32>
    else:
        # If cursor is empty, return an empty list
        messages = []

    return messages

### Handle chat history in the `generate_answer` function

📚 https://docs.python.org/3/tutorial/datastructures.html

📚 https://docs.fireworks.ai/guides/querying-text-models#chat-completions-api


In [None]:
def generate_answer(session_id: str, user_query: str) -> None:
    """
    Generate an answer to the user's query taking chat history into account.

    Args:
        session_id (str): Session ID to retrieve chat history for.
        user_query (str): The user's query string.
    """
    # Initialize list of messages to pass to the chat completion model
    messages = []

    # Retrieve documents relevant to the user query and convert them to a single string
    context = vector_search(user_query)
    context = "\n\n".join([d.get("body", "") for d in context])
    # Create a system prompt containing the retrieved context
    system_message = {
        "role": "system",
        "content": f"Answer the question based only on the following context. If the context is empty, say I DON'T KNOW\n\nContext:\n{context}",
    }
    # Append the system prompt to the `messages` list
    messages.append(system_message)

    # Use the `retrieve_session_history` function to retrieve message history from MongoDB for the session ID `session_id`
    # And add all messages in the message history to the `messages` list
    <CODE_BLOCK_33>

    # Format the user message in the format {"role": <role_value>, "content": <content_value>}
    # The role value for user messages must be "user"
    # And append the user message to the `messages` list
    <CODE_BLOCK_34>

    # Call the chat completions API
    response = fw_client.chat.completions.create(model=model, messages=messages)

    # Extract the answer from the API response
    answer = response.choices[0].message.content

    # Use the `store_chat_message` function to store the user message and also the generated answer in the message history collection
    # The role value for user messages is "user", and "assistant" for the generated answer
    <CODE_BLOCK_35>

    print(answer)

In [None]:
generate_answer(
    session_id="1",
    user_query="What are triggers in MongoDB Atlas?",
)

In [None]:
generate_answer(
    session_id="1",
    user_query="What did I just ask you?",
)