# Retrieval-Augmented Generation (RAG)

In [2]:
%pip install chromadb

Collecting chromadb
  Downloading chromadb-1.4.1-cp39-abi3-macosx_11_0_arm64.whl.metadata (7.2 kB)
Collecting build>=1.0.3 (from chromadb)
  Downloading build-1.4.0-py3-none-any.whl.metadata (5.8 kB)
Collecting pybase64>=1.4.1 (from chromadb)
  Downloading pybase64-1.4.3-cp312-cp312-macosx_11_0_arm64.whl.metadata (8.7 kB)
Collecting posthog<6.0.0,>=2.4.0 (from chromadb)
  Downloading posthog-5.4.0-py3-none-any.whl.metadata (5.7 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Downloading onnxruntime-1.23.2-cp312-cp312-macosx_13_0_arm64.whl.metadata (5.1 kB)
Collecting opentelemetry-exporter-otlp-proto-grpc>=1.2.0 (from chromadb)
  Downloading opentelemetry_exporter_otlp_proto_grpc-1.39.1-py3-none-any.whl.metadata (2.5 kB)
Collecting tokenizers>=0.13.2 (from chromadb)
  Downloading tokenizers-0.22.2-cp39-abi3-macosx_11_0_arm64.whl.metadata (7.3 kB)
Collecting pypika>=0.48.9 (from chromadb)
  Downloading pypika-0.50.0-py2.py3-none-any.whl.metadata (51 kB)
Collecting importlib-resourc

In [3]:
import chromadb
import dotenv
from pathlib import Path
from agents import Agent, Runner, function_tool, trace

dotenv.load_dotenv()

True

In [4]:
chroma_client = chromadb.PersistentClient("../chroma")
nutrition_db = chroma_client.get_collection("nutrition_db")

In [16]:
qa_db = chroma_client.get_collection("nutrition_qna")

Create a static calorie table that we can use as a tool:

In [None]:
# We populated the RAG with the data from the data/calories.csv file in
# the rag_setup.ipynb notebook


In [5]:
results = nutrition_db.query(query_texts=["banana"], n_results=2)
for i, doc in enumerate(results["documents"][0]):
    print(sorted(results["metadatas"][0][i].items()))
    print(doc)
    print("\n")

/Users/alexander.konkin/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx.tar.gz: 100%|██████████| 79.3M/79.3M [00:10<00:00, 7.98MiB/s]


[('calories_per_100g', 89.0), ('food_category', 'fruits'), ('food_item', 'banana'), ('keywords', 'banana_fruits'), ('kj_per_100g', 374.0), ('serving_info', '100g')]
Food: Banana
        Category: Fruits
        Nutritional Information:
        - Calories: 89 per 100g
        - Energy: 374 kJ per 100g
        - Serving size reference: 100g

        This is a fruits food item that provides 89 calories per 100 grams.


[('calories_per_100g', 50.0), ('food_category', '(fruit)juices'), ('food_item', 'banana juice'), ('keywords', 'banana_juice_(fruit)juices'), ('kj_per_100g', 210.0), ('serving_info', '100ml')]
Food: Banana Juice
        Category: (Fruit)Juices
        Nutritional Information:
        - Calories: 50 per 100g
        - Energy: 210 kJ per 100g
        - Serving size reference: 100ml

        This is a (fruit)juices food item that provides 50 calories per 100 grams.




In [18]:
results = qa_db.query(query_texts=["banana"], n_results=2)
for i, doc in enumerate(results["documents"][0]):
    print(sorted(results["metadatas"][0][i].items()))
    print(doc)
    print("\n")

[('is_pregnancy', False)]
Question: What is the recommended amount of bananas I should consume to count it as a single serving?
        Answer: One small-sized banana can be counted as a single serving.

        This Q&A pair provides information about nutrition and health topics.


[('is_pregnancy', False)]
Question: Which food is recommended for infants after they've been introduced to ripe bananas and sweet potatoes?
        Answer: Introduce porridge made from wheat flour or ground rice, starting with only one cereal. Once a week has passed, you may increase the frequency of this new food to two feedings per day.

        This Q&A pair provides information about nutrition and health topics.




In [21]:
results['metadatas'][0][0]

{'is_pregnancy': False}

In [None]:
@function_tool
def calorie_lookup_tool(query: str, max_results: int = 3) -> str:
    """
    Tool function for a RAG database to look up calorie information for specific food items, but not for meals.

    Args:
        query: The food item to look up.
        max_results: The maximum number of results to return.

    Returns:
        A string containing the nutrition information.
    """

    results = nutrition_db.query(query_texts=[query], n_results=max_results)

    if not results["documents"][0]:
        return f"No nutrition information found for: {query}"

    # Format results for the agent
    formatted_results = []
    for i, doc in enumerate(results["documents"][0]):
        metadata = results["metadatas"][0][i]
        food_item = metadata["food_item"].title()
        calories = metadata["calories_per_100g"]
        category = metadata["food_category"].title()

        formatted_results.append(
            f"{food_item} ({category}): {calories} calories per 100g"
        )

    return "Nutrition Information:\n" + "\n".join(formatted_results)

In [25]:
@function_tool
def qa_lookup_tool(query: str, max_results: int = 3) -> str:
    """
    Tool function for a RAG database to look up QAs about nutrition

    Args:
        query: The food item to look up.
        max_results: The maximum number of results to return.

    Returns:
        A string containing the answer to the question.
    """

    results = qa_db.query(query_texts=[query], n_results=max_results)

    if not results["documents"][0]:
        return f"No QA session found for: {query}"

    # Format results for the agent
    formatted_results = []
    for i, doc in enumerate(results["documents"][0]):
        is_pregnancy_flg = results["metadatas"][0][i]['is_pregnancy']
        if is_pregnancy_flg:
            prefix = "Pregnancy"
        else:
            prefix = "General"      

        formatted_results.append(
            f"{prefix}: {doc}"
        )

    return "Results from QA session:\n" + "\n".join(formatted_results)

Let's test this out: 

_The following cell only works before you add the `@function_tool` annotation to `calorie_lookup_tool` function_

In [23]:
print(qa_lookup_tool('bananas'))

Results from QA session:
General: Question: Which food is recommended for infants after they've been introduced to ripe bananas and sweet potatoes?
        Answer: Introduce porridge made from wheat flour or ground rice, starting with only one cereal. Once a week has passed, you may increase the frequency of this new food to two feedings per day.

        This Q&A pair provides information about nutrition and health topics.
General: Question: What is the recommended amount of bananas I should consume to count it as a single serving?
        Answer: One small-sized banana can be counted as a single serving.

        This Q&A pair provides information about nutrition and health topics.
General: Question: Which fruits are high in potassium that I should consider eating?
        Answer: Avocados, bananas, cantaloups, mangoes and papayas are all excellent options if you're looking for high-potassium fruits to eat.

        This Q&A pair provides information about nutrition and health topics

In [None]:
# Verify tools are properly decorated
assert hasattr(calorie_lookup_tool, 'name'), "calorie_lookup_tool is not properly decorated with @function_tool"
assert hasattr(qa_lookup_tool, 'name'), "qa_lookup_tool is not properly decorated with @function_tool"

# Redefine agent to ensure tools are properly decorated
calorie_agent = Agent(
    name="Nutrition Assistant",
    instructions="""
    You are a helpful nutrition assistant giving out calorie information.
    You give concise answers.
    If you need to look up calorie information, use the calorie_lookup_tool.
    Use the qa_lookup_tool to look up QAs about nutrition and pregnancy.
    """,
    tools=[calorie_lookup_tool, qa_lookup_tool],
)

In [None]:
with trace("Nutrition Assistant with RAG including QA"):
    result = await Runner.run(
        calorie_agent,
        "I want to lose weight, what should I eat?",
    )
    print(result.final_output)

AttributeError: 'function' object has no attribute 'name'