### Install dependencies

In [1]:
!pip install boto3 sagemaker langchain langchain-community langchain-core faiss-cpu requests opensearch-py sentence-transformers langchain-text-splitters requests-aws4auth qdrant-client -U



### Load CSV data from S3

In [2]:
!pwd

/home/reson/Documents/GitHub/Enterprise-RAG/notebooks


In [1]:
import boto3
import pandas as pd

s3 = boto3.client('s3')
bucket_name = 'recipes-rag'

In [2]:
# file_key = 'food_recipes.csv'

# # Download the file from s3 locally
# s3.download_file(bucket_name, file_key, '../data/food_recipes.csv')

# # Load the CSV into a DataFrame
# df = pd.read_csv('../data/food_recipes.csv')

# df.head()

In [3]:
file_key = 'recipes_w_cleaning_time_combined_features.parquet'
s3.download_file(bucket_name, file_key, f'../data/{file_key}')
df = pd.read_parquet(f'../data/{file_key}')

df.head()

Unnamed: 0,RecipeId,Name,AuthorId,AuthorName,CookTime,PrepTime,TotalTime,DatePublished,Description,Images,...,FiberContent,SugarContent,ProteinContent,RecipeServings,RecipeYield,RecipeInstructions,CookTime_Minutes,PrepTime_Minutes,TotalTime_Minutes,Combined_Features_Clean
0,38.0,Low-Fat Berry Blue Frozen Dessert,1533,Dancer,PT24H,PT45M,PT24H45M,1999-08-09 21:46:00+00:00,Make and share this Low-Fat Berry Blue Frozen ...,[https://img.sndimg.com/food/image/upload/w_55...,...,3.6,30.2,3.2,4.0,,"[Toss 2 cups berries with sugar., Let stand fo...",1440,45,1485,Low-Fat Berry Blue Frozen Dessert Frozen Desse...
1,39.0,Biryani,1567,elly9812,PT25M,PT4H,PT4H25M,1999-08-29 13:12:00+00:00,Make and share this Biryani recipe from Food.com.,[https://img.sndimg.com/food/image/upload/w_55...,...,9.0,20.4,63.4,6.0,,[Soak saffron in warm milk for 5 minutes and p...,25,240,265,Biryani Chicken Breast Make share Biryani reci...
2,40.0,Best Lemonade,1566,Stephen Little,PT5M,PT30M,PT35M,1999-09-05 19:52:00+00:00,This is from one of my first Good House Keepi...,[https://img.sndimg.com/food/image/upload/w_55...,...,0.4,77.2,0.3,4.0,,"[Into a 1 quart Jar with tight fitting lid, pu...",5,30,35,Best Lemonade Beverages one first Good House K...
3,41.0,Carina's Tofu-Vegetable Kebabs,1586,Cyclopz,PT20M,PT24H,PT24H20M,1999-09-03 14:54:00+00:00,This dish is best prepared a day in advance to...,[https://img.sndimg.com/food/image/upload/w_55...,...,17.3,32.1,29.3,2.0,4 kebabs,"[Drain the tofu, carefully squeezing out exces...",20,1440,1460,Carina's Tofu-Vegetable Kebabs Soy/Tofu dish b...
4,42.0,Cabbage Soup,1538,Duckie067,PT30M,PT20M,PT50M,1999-09-19 06:19:00+00:00,Make and share this Cabbage Soup recipe from F...,[https://img.sndimg.com/food/image/upload/w_55...,...,4.8,17.7,4.3,4.0,,"[Mix everything together and bring to a boil.,...",30,20,50,Cabbage Soup Vegetable Make share Cabbage Soup...


In [4]:
df.columns

Index(['RecipeId', 'Name', 'AuthorId', 'AuthorName', 'CookTime', 'PrepTime',
       'TotalTime', 'DatePublished', 'Description', 'Images', 'RecipeCategory',
       'Keywords', 'RecipeIngredientQuantities', 'RecipeIngredientParts',
       'AggregatedRating', 'ReviewCount', 'Calories', 'FatContent',
       'SaturatedFatContent', 'CholesterolContent', 'SodiumContent',
       'CarbohydrateContent', 'FiberContent', 'SugarContent', 'ProteinContent',
       'RecipeServings', 'RecipeYield', 'RecipeInstructions',
       'CookTime_Minutes', 'PrepTime_Minutes', 'TotalTime_Minutes',
       'Combined_Features_Clean'],
      dtype='object')

In [5]:
str(df.iloc[0]['Combined_Features_Clean'])

'Low-Fat Berry Blue Frozen Dessert Frozen Desserts Make share Low-Fat Berry Blue Frozen Dessert recipe Food.com. Dessert Low Protein Low Cholesterol Healthy Free Of... Summer Weeknight Freezer Easy'

### Load data into chunked documents

In [6]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document

In [7]:
embedding_model = HuggingFaceEmbeddings(model_name="multi-qa-mpnet-base-dot-v1")

  warn_deprecated(
  from tqdm.autonotebook import tqdm, trange


In [8]:
text_splitter_recursive = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=256)

In [9]:
# Using old raw dataset

# def create_documents(df):
#     # AWS Kendra requires non empty string values for each field
#     # Make explicit to the LLM that the field is not available
#     df.fillna('Not Available', inplace=True)
    
#     documents = []
#     for index, row in df.iterrows():
#         metadata = {
#             'recipe_title': str(row['recipe_title']) if row['recipe_title'] else 'No Title Available',
#             'url': str(row['url']) if row['url'] else 'https://example.com',
#             # 'record_health': str(row['record_health']) if row['record_health'] else 'Unknown',
#             'vote_count': str(row['vote_count']) if row['vote_count'] else 'No Votes Available',
#             'rating': str(row['rating']) if row['rating'] else 'No Rating Available',
#             'cuisine': str(row['cuisine']) if row['cuisine'] else 'No Cuisine Available',
#             'course': str(row['course']) if row['course'] else 'No Course Available',
#             'diet': str(row['diet']) if row['diet'] else 'No Diet Information Available',
#             'prep_time': str(row['prep_time']) if row['prep_time'] else 'No Prep Time Available',
#             'cook_time': str(row['cook_time']) if row['cook_time'] else 'No Cook Time Available',
#             'author': str(row['author']) if row['author'] else 'No Author Available',
#             'category': str(row['category']) if row['category'] else 'No Category Available'
#         }

#         # Combine all text fields for the document content
#         text = f"{row['description']} {row['ingredients']} {row['instructions']} {row['tags']}"
#         doc = Document(page_content=text, metadata=metadata)
#         documents.append(doc)
    
#     return documents

In [10]:
# Using data after EDA with more complex fields

# def create_documents(df):
#     # Ensure non-empty string values for each field
#     df.fillna('Not Available', inplace=True)
    
#     documents = []
#     for index, row in df.iterrows():
#         metadata = {
#             'recipe_id': str(row['RecipeId']) if row['RecipeId'] else 'No ID Available',
#             'name': str(row['Name']) if row['Name'] else 'No Name Available',
#             'author_id': str(row['AuthorId']) if row['AuthorId'] else 'No Author ID Available',
#             'author_name': str(row['AuthorName']) if row['AuthorName'] else 'No Author Name Available',
#             'cook_time': str(row['CookTime']) if row['CookTime'] else 'No Cook Time Available',
#             'prep_time': str(row['PrepTime']) if row['PrepTime'] else 'No Prep Time Available',
#             'total_time': str(row['TotalTime']) if row['TotalTime'] else 'No Total Time Available',
#             'date_published': str(row['DatePublished']) if row['DatePublished'] else 'No Date Available',
#             'recipe_category': str(row['RecipeCategory']) if row['RecipeCategory'] else 'No Category Available',
#             'keywords': str(row['Keywords']) if row['Keywords'] else 'No Keywords Available',
#             'aggregated_rating': str(row['AggregatedRating']) if row['AggregatedRating'] else 'No Rating Available',
#             'review_count': str(row['ReviewCount']) if row['ReviewCount'] else 'No Reviews Available',
#             'calories': str(row['Calories']) if row['Calories'] else 'No Calories Information Available',
#             'fat_content': str(row['FatContent']) if row['FatContent'] else 'No Fat Content Available',
#             'saturated_fat_content': str(row['SaturatedFatContent']) if row['SaturatedFatContent'] else 'No Saturated Fat Content Available',
#             'cholesterol_content': str(row['CholesterolContent']) if row['CholesterolContent'] else 'No Cholesterol Content Available',
#             'sodium_content': str(row['SodiumContent']) if row['SodiumContent'] else 'No Sodium Content Available',
#             'carbohydrate_content': str(row['CarbohydrateContent']) if row['CarbohydrateContent'] else 'No Carbohydrate Content Available',
#             'fiber_content': str(row['FiberContent']) if row['FiberContent'] else 'No Fiber Content Available',
#             'sugar_content': str(row['SugarContent']) if row['SugarContent'] else 'No Sugar Content Available',
#             'protein_content': str(row['ProteinContent']) if row['ProteinContent'] else 'No Protein Content Available',
#             'recipe_servings': str(row['RecipeServings']) if row['RecipeServings'] else 'No Servings Information Available',
#             'recipe_yield': str(row['RecipeYield']) if row['RecipeYield'] else 'No Yield Information Available'
#         }

#         # Combine relevant text fields for the document content
#         text = f"""
#         Name: {row['Name']}
#         Category: {row['RecipeCategory']}
#         Description: {row['Description']}
#         Keywords: {row['Keywords']}
#         Ingredients: {row['RecipeIngredientParts']}
#         Instructions: {row['RecipeInstructions']}
#         """

#         doc = Document(page_content=text.strip(), metadata=metadata)
#         documents.append(doc)
    
#     return documents


In [11]:
def create_documents(df):
    documents = []
    for index, row in df.iterrows():
        metadata = {
            'recipe_id': str(row['RecipeId']) if not pd.isna(row['RecipeId']) else 'No ID Available',
            'name': str(row['Name']) if not pd.isna(row['Name']) else 'No Name Available',
            'cook_time': str(row['CookTime']) if not pd.isna(row['CookTime']) else 'No Cook Time Available',
            'prep_time': str(row['PrepTime']) if not pd.isna(row['PrepTime']) else 'No Prep Time Available',
            'total_time': str(row['TotalTime']) if not pd.isna(row['TotalTime']) else 'No Total Time Available',
            'recipe_category': str(row['RecipeCategory']) if not pd.isna(row['RecipeCategory']) else 'No Category Available',
            'keywords': str(row['Keywords']) if not pd.isna(row['Keywords']).all() else 'No Keywords Available',
            'aggregated_rating': str(row['AggregatedRating']) if not pd.isna(row['AggregatedRating']) else 'No Rating Available',
            'review_count': str(row['ReviewCount']) if not pd.isna(row['ReviewCount']) else 'No Reviews Available',
            'calories': str(row['Calories']) if not pd.isna(row['Calories']) else 'No Calories Information Available',
            'fat_content': str(row['FatContent']) if not pd.isna(row['FatContent']) else 'No Fat Content Available',
            'saturated_fat_content': str(row['SaturatedFatContent']) if not pd.isna(row['SaturatedFatContent']) else 'No Saturated Fat Content Available',
            'cholesterol_content': str(row['CholesterolContent']) if not pd.isna(row['CholesterolContent']) else 'No Cholesterol Content Available',
            'sodium_content': str(row['SodiumContent']) if not pd.isna(row['SodiumContent']) else 'No Sodium Content Available',
            'carbohydrate_content': str(row['CarbohydrateContent']) if not pd.isna(row['CarbohydrateContent']) else 'No Carbohydrate Content Available',
            'sugar_content': str(row['SugarContent']) if not pd.isna(row['SugarContent']) else 'No Sugar Content Available',
            'protein_content': str(row['ProteinContent']) if not pd.isna(row['ProteinContent']) else 'No Protein Content Available',
            'recipe_servings': str(row['RecipeServings']) if not pd.isna(row['RecipeServings']) else 'No Servings Information Available',
            'recipe_yield': str(row['RecipeYield']) if not pd.isna(row['RecipeYield']) else 'No Yield Information Available'
        }

        # Use Combined_Features_Clean for the document content
        text = str(row['Combined_Features_Clean'])
        doc = Document(page_content=text, metadata=metadata)
        documents.append(doc)
        
    return documents


In [12]:
documents = create_documents(df)

In [13]:
documents[0]

Document(metadata={'recipe_id': '38.0', 'name': 'Low-Fat Berry Blue Frozen Dessert', 'cook_time': 'PT24H', 'prep_time': 'PT45M', 'total_time': 'PT24H45M', 'recipe_category': 'Frozen Desserts', 'keywords': "['Dessert' 'Low Protein' 'Low Cholesterol' 'Healthy' 'Free Of...' 'Summer'\n 'Weeknight' 'Freezer' 'Easy']", 'aggregated_rating': '4.5', 'review_count': '4.0', 'calories': '170.9', 'fat_content': '2.5', 'saturated_fat_content': '1.3', 'cholesterol_content': '8.0', 'sodium_content': '29.8', 'carbohydrate_content': '37.1', 'sugar_content': '30.2', 'protein_content': '3.2', 'recipe_servings': '4.0', 'recipe_yield': 'No Yield Information Available'}, page_content='Low-Fat Berry Blue Frozen Dessert Frozen Desserts Make share Low-Fat Berry Blue Frozen Dessert recipe Food.com. Dessert Low Protein Low Cholesterol Healthy Free Of... Summer Weeknight Freezer Easy')

In [14]:
len(documents)

522517

In [15]:
def split_documents_with_metadata(documents, text_splitter):
    split_docs = []
    for doc in documents:
        chunks = text_splitter.split_text(doc.page_content)
        for i, chunk in enumerate(chunks):
            split_docs.append(Document(page_content=chunk, metadata={**doc.metadata, "chunk_id": i}))
    return split_docs

In [16]:
split_documents = split_documents_with_metadata(documents, text_splitter_recursive)

In [17]:
split_documents[0]

Document(metadata={'recipe_id': '38.0', 'name': 'Low-Fat Berry Blue Frozen Dessert', 'cook_time': 'PT24H', 'prep_time': 'PT45M', 'total_time': 'PT24H45M', 'recipe_category': 'Frozen Desserts', 'keywords': "['Dessert' 'Low Protein' 'Low Cholesterol' 'Healthy' 'Free Of...' 'Summer'\n 'Weeknight' 'Freezer' 'Easy']", 'aggregated_rating': '4.5', 'review_count': '4.0', 'calories': '170.9', 'fat_content': '2.5', 'saturated_fat_content': '1.3', 'cholesterol_content': '8.0', 'sodium_content': '29.8', 'carbohydrate_content': '37.1', 'sugar_content': '30.2', 'protein_content': '3.2', 'recipe_servings': '4.0', 'recipe_yield': 'No Yield Information Available', 'chunk_id': 0}, page_content='Low-Fat Berry Blue Frozen Dessert Frozen Desserts Make share Low-Fat Berry Blue Frozen Dessert recipe Food.com. Dessert Low Protein Low Cholesterol Healthy Free Of... Summer Weeknight Freezer Easy')

In [21]:
# from transformers import AutoTokenizer

# def count_tokens(text, model_name="distilbert-base-uncased"):
#     tokenizer = AutoTokenizer.from_pretrained(model_name)
    
#     tokens = tokenizer.encode(text, add_special_tokens=False)
#     num_tokens = len(tokens)
    
#     return num_tokens

In [20]:
# We have > 500,000 recipes, this takes a long time to run
from langchain_community.vectorstores import Qdrant

qdrant_store = Qdrant.from_documents(documents,
    embedding_model,
    location=":memory:",
)

In [21]:
qdrant_retriever = qdrant_store.as_retriever()

In [22]:
def format_docs(docs):
    formatted_docs = []
    for doc in docs:
        formatted_docs.append(f"Metadata: {doc.metadata}\n")
    content = "\n\n".join(formatted_docs)
    
    return content

### Agent function definitions

In [24]:
recipe_db_query_tool = {
  "name": "query_food_recipe_vector_db",
  "description": """
      Queries the vector database containing food recipes to retrieve the most relevant documents. 
      This function allows the model to generate and execute multiple queries as necessary to gather comprehensive context, 
      such as ingredients, preparation steps, and metadata like cuisine and diet type, ensuring accurate and thorough responses to user queries.
      """,
  "input_schema": {
    "type": "object",
    "properties": {
      "queries": {
        "type": "array",
        "items": {
          "type": "string",
          "description": "A query generated by the model to run against the vector database to fetch recipe documents."
        },
        "description": "A list of queries generated by the model to run against the vector database to fetch recipe documents."
      }
    },
    "required": ["queries"]
  }
}


### Init bedrock model, define util to stateless messaging, no fn calling

In [25]:
import json

In [26]:
bedrock_client = boto3.client('bedrock-runtime', region_name="us-east-1")

In [27]:
# We will need to tune these prompts
# query_bedrock_llm() definition NEEDS TO BE RERUN
# each time when changes are made to this prompt

baseline_sys_prompt = """
You are a helpful assistant and expert in cooking recipes.

Before answering, always make at least one call to query_food_recipe_vector_db
to retrieve the relevant context of recipes and ingredients to generate an informed
and high-quality response to the user prompt but NEVER exceed a MAXIMUM of 
3 calls to the query_food_recipe_vector_db function.

Provide a response to the user prompt about food with recommended recipes and instructions.
"""

In [48]:
MODEL_ID = "anthropic.claude-3-sonnet-20240229-v1:0"

def query_bedrock_llm(messages):
    response = bedrock_client.invoke_model(
        modelId=MODEL_ID,
        body=json.dumps({
            'anthropic_version': 'bedrock-2023-05-31', # This is required to use chat style messages object 
            'system': baseline_sys_prompt,
            'messages': messages,
            'max_tokens': 3000,
            "tools": [recipe_db_query_tool],

            # This config forces the model to always call the recipe db query tool atleast once 
            # https://docs.anthropic.com/en/docs/build-with-claude/tool-use#controlling-claudes-output
            # "tool_choice": {
            #     "type": "tool",
            #     "name": recipe_db_query_tool['name']
            # },
            
            # TODO: TUNE THESE VALUES
            'temperature': 0.1, 
            'top_p': 0.9
        })
    )
    response_body = json.loads(response.get('body').read())
    
    return response_body

### Pipe langchain together

In [262]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableMap

In [399]:
baseline_user_prompt = """
### Here is a user prompt:
{query}
"""

In [203]:
def process_prompt(query_args):
    prompt_with_query = baseline_user_prompt.replace("{query}", query_args['query'])
    
    # This format doesn't matter much now, but we will use it later to 
    # persist chat history for continuous dialogue
    messages = [
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": prompt_with_query
                }
            ]
        }
    ]
    
    return messages

In [206]:
qdrant_rag_chain = (
    RunnableMap(
        # {"context": qdrant_retriever | format_docs,
         {"query": RunnablePassthrough()}
    )
    | process_prompt
    | query_bedrock_llm
    # | parse_event_stream
)

### Model generates dynamic context queries to vector db

In [36]:
test_query_1 = "I enjoy asian fusion food and I am a vegetarian. Give me one recipe with ingredients and instructions"

In [208]:
qdrant_rag_chain.invoke(test_query_1)

{'ResponseMetadata': {'RequestId': 'aa3d93a7-ead5-457b-87d8-1b8140b86927', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Sat, 06 Jul 2024 08:34:56 GMT', 'content-type': 'application/json', 'content-length': '375', 'connection': 'keep-alive', 'x-amzn-requestid': 'aa3d93a7-ead5-457b-87d8-1b8140b86927', 'x-amzn-bedrock-invocation-latency': '3703', 'x-amzn-bedrock-output-token-count': '39', 'x-amzn-bedrock-input-token-count': '549'}, 'RetryAttempts': 0}, 'contentType': 'application/json', 'body': <botocore.response.StreamingBody object at 0x7fa439ab6560>}
{'id': 'msg_bdrk_017xvdoKgaXAYQ9YHoMXiTy7', 'type': 'message', 'role': 'assistant', 'model': 'claude-3-sonnet-20240229', 'content': [{'type': 'tool_use', 'id': 'toolu_bdrk_01ShrYTuneuhxYy6CeAAwT5X', 'name': 'query_food_recipe_vector_db', 'input': {'queries': ['asian fusion vegetarian recipes']}}], 'stop_reason': 'tool_use', 'stop_sequence': None, 'usage': {'input_tokens': 549, 'output_tokens': 39}}


{'id': 'msg_bdrk_017xvdoKgaXAYQ9YHoMXiTy7',
 'type': 'message',
 'role': 'assistant',
 'model': 'claude-3-sonnet-20240229',
 'content': [{'type': 'tool_use',
   'id': 'toolu_bdrk_01ShrYTuneuhxYy6CeAAwT5X',
   'name': 'query_food_recipe_vector_db',
   'input': {'queries': ['asian fusion vegetarian recipes']}}],
 'stop_reason': 'tool_use',
 'stop_sequence': None,
 'usage': {'input_tokens': 549, 'output_tokens': 39}}

In [37]:
test_query_2 = """
I have a peanut allergy but I like thai food. 
I also don't enjoy spicy food much, and want a meal with low carbs. 
Give a recipe with ingredients and instructions
"""

In [210]:
qdrant_rag_chain.invoke(test_query_2)

{'ResponseMetadata': {'RequestId': '5f0f7185-0324-411a-8950-a3e0077e050d', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Sat, 06 Jul 2024 08:34:59 GMT', 'content-type': 'application/json', 'content-length': '390', 'connection': 'keep-alive', 'x-amzn-requestid': '5f0f7185-0324-411a-8950-a3e0077e050d', 'x-amzn-bedrock-invocation-latency': '2490', 'x-amzn-bedrock-output-token-count': '52', 'x-amzn-bedrock-input-token-count': '573'}, 'RetryAttempts': 0}, 'contentType': 'application/json', 'body': <botocore.response.StreamingBody object at 0x7fa439ab7100>}
{'id': 'msg_bdrk_01XHfJsQF2ivYKKhsvqBLiuh', 'type': 'message', 'role': 'assistant', 'model': 'claude-3-sonnet-20240229', 'content': [{'type': 'tool_use', 'id': 'toolu_bdrk_01KaXnaTHSYcgn7bvyNWdTN9', 'name': 'query_food_recipe_vector_db', 'input': {'queries': ['thai food', 'peanut free', 'low carb', 'not spicy']}}], 'stop_reason': 'tool_use', 'stop_sequence': None, 'usage': {'input_tokens': 573, 'output_tokens': 52}}


{'id': 'msg_bdrk_01XHfJsQF2ivYKKhsvqBLiuh',
 'type': 'message',
 'role': 'assistant',
 'model': 'claude-3-sonnet-20240229',
 'content': [{'type': 'tool_use',
   'id': 'toolu_bdrk_01KaXnaTHSYcgn7bvyNWdTN9',
   'name': 'query_food_recipe_vector_db',
   'input': {'queries': ['thai food',
     'peanut free',
     'low carb',
     'not spicy']}}],
 'stop_reason': 'tool_use',
 'stop_sequence': None,
 'usage': {'input_tokens': 573, 'output_tokens': 52}}

In [38]:
test_query_3 = """
Suggest a low-carb breakfast recipe that includes eggs and spinach, 
can be prepared in under 20 minutes, 
and is suitable for a keto diet.
"""

In [212]:
qdrant_rag_chain.invoke(test_query_3)

{'ResponseMetadata': {'RequestId': '017cd959-0727-4b43-b0d2-a356f924f166', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Sat, 06 Jul 2024 08:35:00 GMT', 'content-type': 'application/json', 'content-length': '417', 'connection': 'keep-alive', 'x-amzn-requestid': '017cd959-0727-4b43-b0d2-a356f924f166', 'x-amzn-bedrock-invocation-latency': '1399', 'x-amzn-bedrock-output-token-count': '59', 'x-amzn-bedrock-input-token-count': '568'}, 'RetryAttempts': 0}, 'contentType': 'application/json', 'body': <botocore.response.StreamingBody object at 0x7fa439ab7d60>}
{'id': 'msg_bdrk_01J3kpLzprvAfwKB1EQyWmvD', 'type': 'message', 'role': 'assistant', 'model': 'claude-3-sonnet-20240229', 'content': [{'type': 'tool_use', 'id': 'toolu_bdrk_01HKkw3US7DbmUiQbF8ib9uD', 'name': 'query_food_recipe_vector_db', 'input': {'queries': ['low-carb breakfast recipe', 'eggs', 'spinach', 'keto diet', 'under 20 minutes']}}], 'stop_reason': 'tool_use', 'stop_sequence': None, 'usage': {'input_tokens': 568, 'output_tokens

{'id': 'msg_bdrk_01J3kpLzprvAfwKB1EQyWmvD',
 'type': 'message',
 'role': 'assistant',
 'model': 'claude-3-sonnet-20240229',
 'content': [{'type': 'tool_use',
   'id': 'toolu_bdrk_01HKkw3US7DbmUiQbF8ib9uD',
   'name': 'query_food_recipe_vector_db',
   'input': {'queries': ['low-carb breakfast recipe',
     'eggs',
     'spinach',
     'keto diet',
     'under 20 minutes']}}],
 'stop_reason': 'tool_use',
 'stop_sequence': None,
 'usage': {'input_tokens': 568, 'output_tokens': 59}}

In [39]:
test_query_4 = """
Suggest a healthy dinner recipe for two people that includes fish, 
is under 500 calories per serving, 
and can be made in less than 40 minutes.
"""

In [214]:
qdrant_rag_chain.invoke(test_query_4)

{'ResponseMetadata': {'RequestId': '1346cce7-5457-4d65-be81-f24b90bbb0c8', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Sat, 06 Jul 2024 08:35:02 GMT', 'content-type': 'application/json', 'content-length': '439', 'connection': 'keep-alive', 'x-amzn-requestid': '1346cce7-5457-4d65-be81-f24b90bbb0c8', 'x-amzn-bedrock-invocation-latency': '1413', 'x-amzn-bedrock-output-token-count': '58', 'x-amzn-bedrock-input-token-count': '567'}, 'RetryAttempts': 0}, 'contentType': 'application/json', 'body': <botocore.response.StreamingBody object at 0x7fa4402a8310>}
{'id': 'msg_bdrk_01NDSdtdVPyTv1JKecy9wynf', 'type': 'message', 'role': 'assistant', 'model': 'claude-3-sonnet-20240229', 'content': [{'type': 'tool_use', 'id': 'toolu_bdrk_01QbmRJxKc7FoKavEFdF5QcW', 'name': 'query_food_recipe_vector_db', 'input': {'queries': ['healthy dinner recipe with fish', 'under 500 calories per serving', 'less than 40 minutes to make']}}], 'stop_reason': 'tool_use', 'stop_sequence': None, 'usage': {'input_tokens':

{'id': 'msg_bdrk_01NDSdtdVPyTv1JKecy9wynf',
 'type': 'message',
 'role': 'assistant',
 'model': 'claude-3-sonnet-20240229',
 'content': [{'type': 'tool_use',
   'id': 'toolu_bdrk_01QbmRJxKc7FoKavEFdF5QcW',
   'name': 'query_food_recipe_vector_db',
   'input': {'queries': ['healthy dinner recipe with fish',
     'under 500 calories per serving',
     'less than 40 minutes to make']}}],
 'stop_reason': 'tool_use',
 'stop_sequence': None,
 'usage': {'input_tokens': 567, 'output_tokens': 58}}

### Implement continuous dialogue and function calling

In [49]:
def generate_message(prompt):
    if type(prompt) != str:
        raise ValueError(f'Tried to call message generate_message with non-string input: {prompt}')
        
    return {
        "role": "user",
        "content": [
            {
                "type": "text",
                "text": prompt
            }
        ]
    }

In [50]:
def generate_tool_message(fn_results):
    
    return {
        "role": "user",
        "content": fn_results
    }

In [51]:
import json

# Adds the current prompt as a new message to the chat history
# and calls bedrock with the entire chat history
# Returns the response body, llm's message, and new chat history

'''
Example response body structure:
{
   "id":"msg_bdrk_01C5GGkafK7aL3P5i3rsMr1p",
   "type":"message",
   "role":"assistant",
   "model":"claude-3-sonnet-20240229",
   "content":[
      {
         "type":"tool_use",
         "id":"toolu_bdrk_01CQiYa8BMJfpJC68DuRdwQn",
         "name":"query_food_recipe_vector_db",
         "input":{
            "queries":[
               "healthy fish dinner recipe under 500 calories",
               "fish dinner recipe for two under 40 minutes"
            ]
         }
      }
   ],
   "stop_reason":"tool_use",
   "stop_sequence":"None",
   "usage":{
      "input_tokens":559,
      "output_tokens":55
   }
}
'''
def message_handler(existing_chat_history, prompt, is_tool_message=False):
    # Fn results is an array of tool response objects
    # message structure needs to reflect that
    if is_tool_message:
        user_message = generate_tool_message(prompt)
    else:
        user_message = generate_message(prompt)
    existing_chat_history.append(user_message)

    # Parse the response content
    response_body = query_bedrock_llm(existing_chat_history)
    llm_message = {
        'role': response_body['role'],
        'content': response_body['content']
    }

    # Add the response message to the chat history
    existing_chat_history.append(llm_message)
    
    return [response_body, llm_message, existing_chat_history]

In [52]:
# Executes a list of queries and returns a list of document results
def handle_vector_db_queries(queries, retriever=qdrant_retriever): 
    context_docs = []
    for query in queries:
        query_results = retriever.invoke(query)
        context_docs.extend(query_results)

    return context_docs

In [55]:
# Takes as an argument to LLM message content, returns a list of the fn result objects
def handle_function_calls(tool_call_message_content):
    tool_results = []
    
    for tool_call in tool_call_message_content:
        # Only process messages from the LLM that are function calls
        if tool_call['type'] != 'tool_use':
            continue
        fn_id = tool_call['id']
        fn_name = tool_call['name']
        fn_args = tool_call['input']
        fn_result = {
            "type": "tool_result",
            "tool_use_id": fn_id,
        }   

        if fn_name == 'query_food_recipe_vector_db':
            if 'queries' not in fn_args:
                print(f"ERROR: Tried to call {fn_name} with invalid args {fn_args}, skipping..")
                fn_result['content'] = ""
                fn_result['is_error'] = True
                tool_results.append(fn_result)
                continue
                
            print(f"Model called {fn_name} with args {fn_args}")
            context_docs = handle_vector_db_queries(fn_args['queries'])
            context_str = format_docs(context_docs)
            fn_result['content'] = context_str
            tool_results.append(fn_result)
            
        # TODO: handle web search invocation here
        
        else:
            print(f"ERROR: Attempted call to unknown function {fn_name}")
            fn_result['content'] = ""
            fn_result['is_error'] = True
            tool_results.append(fn_result)

    return tool_results

### Run dynamic query function calling with test queries

In [65]:
'''
Example payload structure of response_body:

{'id': 'msg_bdrk_01REesjegNiLteurBoxW7pSt',
 'type': 'message',
 'role': 'assistant',
 'model': 'claude-3-sonnet-20240229',
 'content': [{'type': 'tool_use',
   'id': 'toolu_bdrk_01191W2FuAFTRoDqKKeJSmmn',
   'name': 'query_food_recipe_vector_db',
   'input': {'queries': ['thai food',
     'peanut free',
     'low carb',
     'not spicy']}}],
 'stop_reason': 'tool_use',
 'stop_sequence': None,
 'usage': {'input_tokens': 573, 'output_tokens': 52}}


Example payload structure of llm_message['content']:

[{'type': 'tool_use',
   'id': 'toolu_bdrk_01191W2FuAFTRoDqKKeJSmmn',
   'name': 'query_food_recipe_vector_db',
   'input': {'queries': ['thai food',
     'peanut free',
     'low carb',
     'not spicy']}}]
'''

# This function is the entry point to invoke the LLM with support for function calling
# parsing output, calling requested functions, sending output is handled here
def run_chat_loop(prompt):
    print(f"[User]: {prompt}")
    response_body, llm_message, chat_history = message_handler(existing_chat_history=[], prompt=prompt)
    
    
    # The model wants to call tools, call them, provide response, repeat until content is generated
    while response_body['stop_reason'] == 'tool_use':
        fn_results = handle_function_calls(tool_call_message_content=llm_message['content'])

        # Send function results back to LLM as a new message with the existing chat history
        response_body, llm_message, chat_history = message_handler(
            existing_chat_history=chat_history, 
            prompt=fn_results,
            is_tool_message=True
        )

    # The model is done calling tools
    print(f"\n[Model]: {llm_message['content'][0]['text']}")


In [66]:
run_chat_loop(test_query_4)

[User]: 
Suggest a healthy dinner recipe for two people that includes fish, 
is under 500 calories per serving, 
and can be made in less than 40 minutes.

Model called query_food_recipe_vector_db with args {'queries': ['healthy fish dinner recipe under 500 calories quick']}

[Model]: <search_quality_reflection>
The search results provide several healthy fish dinner recipes that meet the criteria of being under 500 calories per serving and taking less than 40 minutes to prepare. The recipes include a variety of fish types like red snapper, tilapia, and other white fish. The recipes also have different cooking methods like baking and pan-frying. With this information, I should be able to provide a suitable recipe recommendation.
</search_quality_reflection>

<search_quality_score>5</search_quality_score>

<result>
Here is a healthy and delicious fish dinner recipe for two that meets your criteria:

Easy Baked Fish

Ingredients:
- 2 fish fillets (tilapia, cod, or other white fish), about 

In [67]:
run_chat_loop(test_query_3)

[User]: 
Suggest a low-carb breakfast recipe that includes eggs and spinach, 
can be prepared in under 20 minutes, 
and is suitable for a keto diet.

Model called query_food_recipe_vector_db with args {'queries': ['low-carb breakfast recipe with eggs and spinach', 'keto breakfast recipe with eggs and spinach']}

[Model]: <search_quality_reflection>
The search results provide several relevant low-carb breakfast recipes that include eggs and spinach, suitable for a keto diet. The recipes cover different preparation methods like omelets, baked eggs, and egg muffins. Most of the recipes can be prepared in under 20 minutes. I have enough context to provide a good recommendation to the user.
</search_quality_reflection>

<search_quality_score>5</search_quality_score>

<result>
Here is a delicious and keto-friendly breakfast recipe that includes eggs and spinach, and can be prepared in under 20 minutes:

Spinach and Feta Egg Muffins

Ingredients:
- 8 eggs
- 1 cup spinach, chopped 
- 1/2 cup f

In [68]:
run_chat_loop(test_query_2)

[User]: 
I have a peanut allergy but I like thai food. 
I also don't enjoy spicy food much, and want a meal with low carbs. 
Give a recipe with ingredients and instructions

Model called query_food_recipe_vector_db with args {'queries': ['thai food', 'peanut free', 'low carb', 'not spicy']}

[Model]: Based on the query results, here is a recommended Thai recipe that is peanut-free, low in carbs, and not spicy:

Thai Chicken with Spinach

Ingredients:
- 4 boneless, skinless chicken breasts
- 2 tablespoons olive oil
- 1 cup chicken broth
- 1/4 cup coconut milk
- 2 tablespoons fish sauce
- 1 tablespoon lime juice
- 1 teaspoon brown sugar
- 1/2 teaspoon ground ginger
- 1/4 teaspoon ground coriander
- 1 (10 oz) package fresh spinach
- Salt and pepper to taste

Instructions:

1. Season the chicken breasts with salt and pepper.
2. Heat the olive oil in a large skillet over medium-high heat. Add the chicken and cook for 4-5 minutes per side until browned and cooked through. Remove chicken from

### Try with higher complexity / more restrictive prompts

In [70]:
test_query_5 = """
I am on a ketogenic diet and need a dinner recipe that is dairy-free, 
low in sodium, and takes less than an hour to cook.
"""

test_query_6 = """
I'm looking for a pescatarian main course that is low in saturated fat, 
uses Asian flavors, and can be prepared in under 45 minutes.
"""

test_query_7 = """
I need a diabetic-friendly, vegan breakfast recipe that is gluten-free, 
nut-free, and low in cholesterol, but also rich in omega-3 fatty acids 
and can be prepared the night before.
"""

test_query_8 = """
I am following a strict paleo diet and need a lunch recipe that is dairy-free, 
gluten-free, low in carbs, and low in sodium. Additionally, it should be rich in antioxidants, 
and can be made in under 30 minutes with minimal cooking equipment.
"""

In [71]:
run_chat_loop(test_query_5)

[User]: 
I am on a ketogenic diet and need a dinner recipe that is dairy-free, 
low in sodium, and takes less than an hour to cook.

Model called query_food_recipe_vector_db with args {'queries': ['keto dinner recipes dairy free low sodium under 1 hour']}
Model called query_food_recipe_vector_db with args {'queries': ['keto dinner recipes dairy free low sodium under 500mg sodium under 1 hour']}

[Model]: Okay, this refined query provides some better low-sodium, keto, dairy-free dinner options that can be made in under an hour. Based on the results, here is a recipe I would recommend:

Keto Chicken Vegetable Soup

Ingredients:
- 2 boneless, skinless chicken breasts, diced into 1-inch pieces
- 2 tbsp olive oil or avocado oil
- 1 onion, diced 
- 2 carrots, sliced
- 2 celery stalks, sliced
- 4 cups chicken broth
- 1 tsp dried thyme
- Salt and pepper to taste

Instructions:
1. In a large pot or dutch oven, heat the oil over medium-high heat. Add the diced chicken and cook for 5-7 minutes un

In [73]:
run_chat_loop(test_query_6)

[User]: 
I'm looking for a pescatarian main course that is low in saturated fat, 
uses Asian flavors, and can be prepared in under 45 minutes.

Model called query_food_recipe_vector_db with args {'queries': '["pescatarian main course" "asian flavors" "low saturated fat" "under 45 minutes"]'}

[Model]: Here is a recipe for a pescatarian Asian-inspired main dish that is low in saturated fat and can be prepared in under 45 minutes:

Soy-Ginger Seared Ahi Tuna Steaks

Ingredients:
- 4 (6 oz) ahi tuna steaks 
- 2 tbsp sesame oil
- 1/4 cup low-sodium soy sauce
- 2 tbsp rice vinegar
- 1 tbsp freshly grated ginger
- 2 cloves garlic, minced
- 1 tsp sesame seeds
- 2 green onions, sliced 
- Cooked brown rice or quinoa, for serving

Instructions:

1. Pat the tuna steaks dry and let come to room temperature, about 15 minutes.

2. In a shallow dish, whisk together the soy sauce, rice vinegar, ginger, and garlic. Add the tuna steaks and turn to coat both sides with the marinade. Let marinate for 10 m

In [74]:
run_chat_loop(test_query_7)

[User]: 
I need a diabetic-friendly, vegan breakfast recipe that is gluten-free, 
nut-free, and low in cholesterol, but also rich in omega-3 fatty acids 
and can be prepared the night before.

Model called query_food_recipe_vector_db with args {'queries': ['diabetic-friendly vegan breakfast recipe gluten-free nut-free low cholesterol omega-3 fatty acids overnight']}
Model called query_food_recipe_vector_db with args {'queries': ['overnight oats omega-3 fatty acids']}

[Model]: <search_quality_reflection>
The additional search results provide some useful information on overnight oat recipes that contain omega-3 fatty acids. The "Simple Overnight Oats" recipe in particular looks promising as it contains ingredients like chia seeds and flaxseeds which are good sources of omega-3s. I now have enough context to provide a suitable recipe recommendation.
</search_quality_reflection>

<search_quality_score>5</search_quality_score>

<result>
Here is a diabetic-friendly, vegan, gluten-free, nut-

In [75]:
run_chat_loop(test_query_8)

[User]: 
I am following a strict paleo diet and need a lunch recipe that is dairy-free, 
gluten-free, low in carbs, and low in sodium. Additionally, it should be rich in antioxidants, 
and can be made in under 30 minutes with minimal cooking equipment.

Model called query_food_recipe_vector_db with args {'queries': ['paleo lunch recipes dairy-free gluten-free low carb low sodium antioxidants quick easy']}
Model called query_food_recipe_vector_db with args {'queries': ['paleo lunch salad antioxidant rich low carb low sodium 30 minutes']}

[Model]: This second query provides some better options for a paleo-friendly, antioxidant-rich salad that is low in carbs, sodium, and can be prepared quickly.

Based on the results, here is a recipe I would recommend:

Antioxidant Spinach Salad with Chicken (Paleo, Dairy-Free, Gluten-Free)

Ingredients:
- 4 cups fresh baby spinach 
- 1 cup cherry tomatoes, halved
- 1/2 cup sliced cucumber
- 1/4 cup sliced red onion
- 1/4 cup sliced almonds
- 2 cooked 