[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mongodb-developer/ai-agents-lab-notebooks/blob/main/notebook_template.ipynb)


[![Lab Documentation and Solutions](https://img.shields.io/badge/Lab%20Documentation%20and%20Solutions-purple)](https://mongodb-developer.github.io/rag-lab/)


# Step 1: Install libraries


In [None]:
! pip install -qU pymongo langchain langchain-community fireworks-ai bs4 tiktoken sentence_transformers

# Step 2: Setup prerequisites

Replace:

- `<CODE_BLOCK_1>` with your **MongoDB connection string**
- `<CODE_BLOCK_2>` with your **Fireworks API key**


In [None]:
import os

In [None]:
# Retain the quotes ("") when pasting the URI
MONGODB_URI = "<CODE_BLOCK_1"

In [None]:
# Retain the quotes ("") when pasting the API key
os.environ["FIREWORKS_API_KEY"] = "<CODE_BLOCK_2>"

# Step 3: Load the dataset


In [None]:
from langchain_community.document_loaders import WebBaseLoader

In [None]:
loader = WebBaseLoader(
    [
        "https://www.mongodb.com/developer/products/atlas/choose-embedding-model-rag/",
        "https://www.mongodb.com/developer/products/atlas/evaluate-llm-applications-rag/",
        "https://www.mongodb.com/developer/products/atlas/choosing-chunking-strategy-rag/",
        "https://www.mongodb.com/developer/products/atlas/gemma-mongodb-huggingface-rag/",
    ]
)
docs = loader.load()

In [None]:
# Check the number of documents created
len(docs)

In [None]:
# Preview a document
docs[0]

In [None]:
# Preview the `page_content` attribute of the Document object
docs[0].page_content

In [None]:
# Preview the metadata attribute of the Document object
docs[0].metadata

# Step 4: Chunk up the data


In [None]:
from langchain.text_splitter import Language, RecursiveCharacterTextSplitter

📚 https://python.langchain.com/v0.1/docs/modules/data_connection/document_transformers/split_by_token/#tiktoken


In [None]:
# Use the `RecursiveCharacterTextSplitter` text splitter with the `cl100k_base` encoding
# For text data, you typically want to keep 1-2 paragraphs (~200 tokens) in a single chunk
# Chunk overlap of 15-20% of the chunk size is recommended
text_splitter = <CODE_BLOCK_3>

📚 https://api.python.langchain.com/en/latest/character/langchain_text_splitters.character.RecursiveCharacterTextSplitter.html


In [None]:
# Split `docs` using the appropriate method of the `RecursiveCharacterTextSplitter` class
# NOTE: `docs` is a list of LangChain documents
split_docs = <CODE_BLOCK_4>

In [None]:
# Check that the length of the list of chunked documents is greater than the length of `docs`
len(split_docs)

In [None]:
# Write a Python list comprehension to convert each LangChain Document object in `split_docs` to a Python dictionary.
# Use the `.dict()` method on each Document object.
split_docs = [<CODE_BLOCK_5>]

In [None]:
# Preview one of the items in split_docs- ensure that it is a Python dictionary
split_docs[0]

# Step 5: Generate embeddings


In [None]:
from sentence_transformers import SentenceTransformer
from typing import List

📚 https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1


In [None]:
# Load the Mixedbread AI's `mxbai-embed-large-v1` model using the Sentence Transformers library
embedding_model = <CODE_BLOCK_6>

📚 https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1


In [None]:
# Write a function that takes a piece of text (`text`) as input, embeds it using the `embedding_model` instantiated above and returns the embedding as a list
# NOTE: An array can be converted to a list using the `tolist()` method
def get_embedding(text: str) -> List[float]:
    """
    Generate the embedding for a piece of text.

    Args:
        text (str): Text to embed.

    Returns:
        List[float]: Embedding of the text as a list.
    """
    <CODE_BLOCK_7>

In [None]:
embedded_docs = []

In [None]:
# Write code to add an `embedding` field to each dictionary in `split_docs`
# The `embedding` field should correspond to the embedding of the value of the `page_content` field
# Use the `get_embedding` function defined above to generate the embedding
# NOTE: Append the updated dictionaries to `embedded_docs` initialized above.
<CODE_BLOCK_8>

# Step 6: Ingest data into MongoDB


In [None]:
from pymongo import MongoClient
from typing import Dict

📚 https://pymongo.readthedocs.io/en/stable/api/pymongo/mongo_client.html


In [None]:
# Initialize a MongoDB Python client
mongo_client = <CODE_BLOCK_9>

In [None]:
# Name of the database -- Change if needed or leave as is
DB_NAME = "mongodb_rag_lab"
# Name of the collection -- Change if needed or leave as is
COLLECTION_NAME = "knowledge_base"
# Name of the vector search index -- Change if needed or leave as is
ATLAS_VECTOR_SEARCH_INDEX_NAME = "vector_index"

📚 https://pymongo.readthedocs.io/en/stable/tutorial.html#getting-a-collection


In [None]:
# Connect to the collection defined above using the MongoDB client
collection = <CODE_BLOCK_10>

📚 https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html


In [None]:
# Bulk delete all existing records from the collection defined above -- should be a one-liner
<CODE_BLOCK_11>

📚 https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html


In [None]:
# Bulk insert `embedded_docs` into the collection defined above -- should be a one-liner
<CODE_BLOCK_12>

print("Data ingestion into MongoDB completed")

# Step 7: Create a vector search index

Follow the instructions in the documentation to create a Vector Search index in the Atlas UI.


# Step 8: Perform semantic search on your data


### Define a vector search function

📚 https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-stage/#fields

📚 https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-stage/#ann-examples (Refer to the "Basic Example")


In [None]:
# Define a function to retrieve relevant documents for a user query using vector search
def vector_search(user_query: str) -> List[Dict]:
    """
    Retrieve relevant documents for a user query using vector search.

    Args:
    user_query (str): The user's query string.

    Returns:
    list: A list of matching documents.
    """

    # Generate embedding for the `user_query` using the `get_embedding` function defined in Step 2
    <CODE_BLOCK_13>

    # Define an aggregation pipeline consisting of a $vectorSearch stage, followed by a $project stage
    # Set the number of candidates to 150 and only return the top 5 documents from the vector search
    # In the $project stage, exclude the `_id` field and include only the `page_content` field and `vectorSearchScore`
    # NOTE: Use variables defined previously for the `index`, `queryVector` and `path` fields in the $vectorSearch stage
    pipeline = <CODE_BLOCK_14>

    # Execute the aggregation `pipeline`` and store the results in `results`
    results = <CODE_BLOCK_15>
    return list(results)

### Run vector search queries


In [None]:
vector_search(
    "What are the important considerations while choosing an embedding model?"
)

In [None]:
vector_search("How to choose a chunking strategy for RAG?")

### 🦹‍♀️ Combine pre-filtering with vector search

📚 https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-type/#about-the-filter-type

📚 https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-stage/#ann-examples (Refer to the "Filter Example")


#### Filter for documents where the language is `en`


In [None]:
# Modify the vector search index definition to include the `metadata.language` field as a `filter` field
<CODE_BLOCK_16>

In [None]:
# Modify the $vectorSearch stage of the aggregation pipeline defined previously to include a filter for documents where the `metadata.language` field has the value `en`
<CODE_BLOCK_17>

#### Filter on documents where the language is `en` and type is `Document`


In [None]:
# Modify the vector search index definition to include the `metadata.language` and `type` fields as `filter` fields
<CODE_BLOCK_18>

In [None]:
# Modify the $vectorSearch stage of the aggregation pipeline defined previously to include a filter for documents where
# the `metadata.language` field has the value `en`
# AND
# the `type` field has the value `Document`
<CODE_BLOCK_19>

# Step 9: Build a RAG application


### Instantiate a chat model


In [None]:
from fireworks.client import Fireworks

In [None]:
# Initializing the Fireworks AI client and the model string
fw_client = Fireworks()
model = "accounts/fireworks/models/llama-v3-8b-instruct"

### Define a function to create the chat prompt

📚 https://docs.python.org/3/library/stdtypes.html#str.join


In [None]:
# Define a function to create the user prompt for our RAG application
def create_prompt(user_query: str) -> str:
    """
    Create a chat prompt that includes the user query and retrieved context.

    Args:
        user_query (str): The user's query string.

    Returns:
        str: The chat prompt string.
    """
    # Retrieve the most relevant documents for the `user_query` using the `vector_search` function
    context = <CODE_BLOCK_20>
    # Join the retrieved documents into a single string, where each document is separated by two new lines ("\n\n")
    # NOTE: Extract only the `page_content` field from the documents
    context = <CODE_BLOCK_21>
    # Prompt consisting of the question and relevant context to answer it
    prompt = f"Answer the question based only on the following context. If the context is empty, say I DON'T KNOW\n\nContext:\n{context}\n\nQuestion:{user_query}"
    return prompt

### Define a function to answer user queries

📚 https://docs.fireworks.ai/guides/querying-text-models#chat-completions-api

📚 https://docs.fireworks.ai/api-reference/post-completions


In [None]:
# Define a function to answer user queries using Fireworks' Chat Completion API
def generate_answer(user_query: str) -> None:
    """
    Generate an answer to the user query.

    Args:
        user_query (str): The user's query string.
    """
    # Use the `create_prompt` function above to fill in the `content` field in the chat message
    # Set the `temperature` parameter to 0 to get more deterministic responses
    # Print the final answer
    <CODE_BLOCK_22>

### Query the RAG application


In [None]:
generate_answer(
    "What are the important considerations while choosing an embedding model?"
)

In [None]:
generate_answer("What did I just ask you?")

### 🦹‍♀️ Return streaming responses

📚 https://docs.fireworks.ai/guides/querying-text-models#streaming


In [None]:
# Define a function to answer user queries in streaming mode using Fireworks' Chat Completion API 
def generate_answer(user_query: str) -> None:
    """
    Generate an answer to the user query.

    Args:
        user_query (str): The user's query string.
    """
    # Use the `create_prompt` function above to fill in the `content` field in the chat message
    # Set the `temperature` parameter to 0 to get more deterministic responses
    # Set the `stream` parameter to True
    response = <CODE_BLOCK_23>

    # Iterate through the `response` generator and print the results as they are generated
    <CODE_BLOCK_24>

In [None]:
generate_answer(
    "What are the important considerations while choosing an embedding model?"
)

# Step 10: Add memory to the RAG application


In [None]:
from datetime import datetime

In [None]:
history_collection = mongo_client[DB_NAME]["chat_history"]

📚 https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html#pymongo.collection.Collection.create_index


In [None]:
# Create an index on the key `session_id` for the `history_collection` collection
<CODE_BLOCK_25>

### Define a function to store chat messages in MongoDB

📚 https://docs.python.org/3/library/datetime.html#datetime.datetime.nowhttps://docs.python.org/3/library/datetime.html#datetime.datetime.now

📚 https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html#pymongo.collection.Collection.insert_one


In [None]:
def store_chat_message(session_id: str, role: str, content: str) -> None:
    """
    Store a chat message in a MongoDB collection.

    Args:
        session_id (str): Session ID of the message.
        role (str): Role for the message. One of `system`, `user` or `assistant`.
        content (str): Content of the message.
    """
    # Create a message object with `session_id`, `role`, `content` and `timestamp` fields
    # `timestamp` should be set the current timestamp
    message = <CODE_BLOCK_26>
    # Insert the `message` into the `history_collection` collection
    <CODE_BLOCK_27>

### Define a function to retrieve chat history from MongoDB

📚 https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html#pymongo.collection.Collection.find

📚 https://pymongo.readthedocs.io/en/stable/api/pymongo/cursor.html#pymongo.cursor.Cursor.sort


In [None]:
def retrieve_session_history(session_id: str) -> List:
    """
    Retrieve chat message history for a particular session.

    Args:
        session_id (str): Session ID to retrieve chat message history for.

    Returns:
        List: List of chat messages.
    """
    # Query the `history_collection` collection for documents where the "session_id" field has the value of the input `session_id`
    # Sort the results in increasing order of the values in `timestamp` field
    cursor =  <CODE_BLOCK_28>

    if cursor:
        # Write a list comprehension to iterate through the cursor and extract the `role` and `content` field from each entry
        # Then format each entry as: {"role": <role_value>, "content": <content_value>}
        messages = [<CODE_BLOCK_29>]
    else:
        # If cursor is empty, return an empty list
        messages = []

    return messages

### Handle chat history in the `generate_answer` function

📚 https://docs.python.org/3/tutorial/datastructures.html

📚 https://docs.fireworks.ai/guides/querying-text-models#chat-completions-api


In [None]:
def generate_answer(session_id: str, user_query: str) -> None:
    """
    Generate an answer to the user's query taking chat history into account.

    Args:
        session_id (str): Session ID to retrieve chat history for.
        user_query (str): The user's query string.
    """
    # Initialize list of messages to pass to the chat completion model
    messages = []

    # Retrieve documents relevant to the user query and convert them to a single string
    context = vector_search(user_query)
    context = "\n\n".join([d.get("page_content", "") for d in context])
    # Create a system prompt containing the retrieved context
    system_message = {
        "role": "system",
        "content": f"Answer the question based only on the following context. If the context is empty, say I DON'T KNOW\n\nContext:\n{context}",
    }
    # Append the system prompt to the `messages` list
    messages.append(system_message)

    # Use the `retrieve_session_history` function to retrieve message history from MongoDB for the session ID `session_id` 
    # And add all messages in the message history to the `messages` list 
    <CODE_BLOCK_30>

    # Format the user message in the format {"role": <role_value>, "content": <content_value>}
    # The role value for user messages must be "user"
    # And append the user message to the `messages` list
    <CODE_BLOCK_31>

    # Call the chat completions API 
    response = fw_client.chat.completions.create(model=model, messages=messages)

    # Extract the answer from the API response
    answer = response.choices[0].message.content

    # Use the `store_chat_message` function to store the user message and also the generated answer in the message history collection
    # The role value for user messages is "user", and "assistant" for the generated answer
    <CODE_BLOCK_32>

    print(answer)

In [None]:
generate_answer(
    session_id="1",
    user_query="What are the important considerations while choosing an embedding model?",
)

In [None]:
generate_answer(
    session_id="1",
    user_query="What did I just ask you?",
)