In [1]:
from openai import OpenAI
import json

In [2]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/ECE1786_Project/Project_code

Mounted at /content/drive
/content/drive/MyDrive/ECE1786_Project/Project_code


## RAG

In [None]:
!pip install -qU langchain-openai
!pip install jq
!pip install langchain-community
!pip install langchain-chroma

In [4]:
import getpass
import os

os.environ["OPENAI_API_KEY"]=getpass.getpass()

from langchain_openai import ChatOpenAI

llm=ChatOpenAI(model="gpt-4o")


··········


In [6]:
def get_API_response(client,sys_prompt,user_prompt,temp,topp):
  completion=client.chat.completions.create(
      model="gpt-4o",
      temperature=temp,
      top_p=topp,
      messages=[
          {"role":"system","content":sys_prompt},
          {"role":"user","content":user_prompt}
      ],
  )
  response=completion.choices[0].message.content
  return response

In [7]:
from langchain_community.document_loaders import JSONLoader
from pathlib import Path
from pprint import pprint
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain.schema import Document


In [97]:
file_path_final_recipes = "./datasets/processed_recipes_init_200.json"
map_file_path='./ingre_nutrition_map/ingredient_nutrient_map.json'

## Retrive the nutrients from nutrient map

In [85]:
def metadata_fuc(record:dict, metadata:dict)->dict:
  metadata["ingredient_name"]=record.get("ingredient_name")
  metadata["nutrients"]=''.join(map(str,record.get("nutrients")))
  return metadata


In [86]:
# can be modified according to the structure of the nutrient map
loader=JSONLoader(
    file_path=map_file_path,
    jq_schema=".[]",
    content_key="ingredient_name",
    metadata_func=metadata_fuc
)
data=loader.load()

In [87]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(data)

vectorstore=Chroma.from_documents(documents=all_splits,embedding=OpenAIEmbeddings())
retriever=vectorstore.as_retriever(search_type="similarity",search_kwargs={"k":1})

In [88]:
# retrieve the most similar food description and its nutrients
def retrieve_food_and_nutrients(retriever,query):
  results=retriever.get_relevant_documents(query)
  if not results:
    return None,None
  best_match=results[0]
  food_description=best_match.metadata.get("food_description")
  nutrients=best_match.metadata.get("nutrients")

  return food_description, nutrients

## Retrive similar recipe


### Prepare the RAG + Vector

In [56]:
def load_and_process_json(file_path):
    with open(file_path, "r") as file:
        data = json.load(file)

    # Process each recipe
    processed_data = []
    for record in data:
        # Extract page_content and metadata
        pure_ingredients = record.get("pure_ingredients", [])
        page_content = ", ".join(pure_ingredients) if isinstance(pure_ingredients, list) else ""

        metadata = {
            "recipe_title": record.get("recipe_title", ""),
            "recipe_id": record.get("recipe_id", ""),
            "pure_ingredients": page_content,  # Include processed ingredients in metadata
        }

        # Append processed record
        processed_data.append({"page_content": page_content, "metadata": metadata})

    return processed_data

In [66]:
#prepare the meta data
recipe_data_RAG = load_and_process_json(file_path_final_recipes)
recipe_data_documents = [
    Document(page_content=item["page_content"], metadata=item["metadata"])
    for item in recipe_data_RAG
]

#text splitter
text_splitter_2 = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits_recipe = text_splitter_2.split_documents(recipe_data_documents)

#save to vector
vectorstore_recipe=Chroma.from_documents(documents=all_splits_recipe,embedding=OpenAIEmbeddings())
retriever_recipe=vectorstore_recipe.as_retriever(search_type="similarity",search_kwargs={"k":5})

In [70]:
def retrieve_similar_recipe_id(retriever_recipe,input_ingredients):

  query = ", ".join(sorted(input_ingredients))
  results=retriever_recipe.get_relevant_documents(query)
  if not results:
    return None

  recipe_ingredient_set = set()
  for result in results:
    if result:
      recipe_id = result.metadata.get("recipe_id")
      if recipe_id:
        recipe_ingredient_set.add(recipe_id)

  return recipe_ingredient_set

In [71]:
def get_recipe_by_id(recipes, recipe_id):
    for recipe in recipes:
        if recipe.get("recipe_id") == recipe_id:
            return recipe
    return None

## Calculate Health Score

In [72]:
def get_health_score_with_rag(client,retriever,temp,topp,recipe):
  recipe_data = json.loads(recipe)
  # Extract title, ingredients, and instructions safely
  recipe_title = recipe_data["title"]
  recipe_ingredient_list = recipe_data["ingredients"]
  pure_ingredients = recipe_data["pure_ingredients"]
  instructions = recipe_data["instructions"]

  nutrient_map=[]
  for ingredient in pure_ingredients:
    matched_food, nutrients=retrieve_food_and_nutrients(retriever,ingredient)
    nutrient_map.append(nutrients)

  if not nutrient_map:
    return{
        "error": "No relevant nutrient map found for the given ingredient name."
    }
  sys_prompt=""
  user_prompt=f"""
  You are a helpful assistant that can evaluate the recipes' healthiness.
  You only need to consider 7 key macronutrients and their ranges to assess a recipe’s healthiness:
  Proteins: 10%-15% of total energy
  Carbohydrates: 55%-75% of total energy
  Sugars: less than 10% of total energy
  Sodium: less than 5 grams
  Fats: 15%-30% of total energy
  Saturated Fats: less than 10% of total energy
  Fibers: more than 25 grams

  Here's the generated recipe:
  Recipe Title: {recipe_title}
  Ingredients and Measurements: {recipe_ingredient_list}
  Nutrient Map: {nutrient_map}

  Evaluation Instructions:
  - Find the 7 key macronutrients of each ingredient (ingredient_name in the nutrient map). Add up each types of macronutrient of each ingreident to get the total content of each macronutrient.
  - Evaluate if each macronutrient is in range of evaluation criteria (1 point if yes, 0 if no).
  - Use one sentence to explain why each macronutrients is 1 or 0.
  - Sum the points to get a health score (0-7).

  The sample evaluation result and health score for the first recipe:
  Summary of Points:
  Proteins: 0 points
  Carbohydrates: 1 point (potatoes provide substantial carbohydrates)
  Sugars: 1 point (natural sugars in potatoes and cream are likely to be less than 10% of total energy)
  Sodium: 1 point (assuming moderate salt use, likely to stay under 5 grams)
  Fats: 1 point (butter, olive oil, cream are high in fats, possibly within the 15%-30% range)
  Saturated Fats: 0 points (butter, cream, and sour cream likely push this over 10%)
  Fibers: 0 points (likely less than 25 grams, as potatoes are not high in fibers)
  Total Health Score: 4

  Calculate the health score. The output should only contain the following attributes:
  - title: recipe title
  - ingredients: each ingredient must have measurement in the recipe
  - instructions: step by step instructions with numbering
  - summary of points: name of the key macronutrients and their corresponding points. (use one sentence to explain why each macronutrients is 1 or 0 like the sample above, right after the point, don't make another paragraph)
  - total health score: the number of the total health score.
  The output must be a string in JSON format that contains the above attributes. Do not need to specify the format type(i.e. json) at the beginning of the output string.
  """
  response=get_API_response(client,user_prompt,user_prompt,temp,topp)
  return response

## Recipe Generation

In [91]:
client = OpenAI(api_key="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")
file_path_recipe = file_path_final_recipes
recipe_generated_path = "./generated_result/recipe_generated.json"

In [92]:
def get_generate_sys_prompt(recipes_file_path, ingredients,retriever_ingre,retriever_recipe,provide_example=True):

  ingredients = sorted([ingredient.lower() for ingredient in ingredients])

  sample_recipes_text=''

  if provide_example:
    #Retrive sample recipe
    all_sample_reciples = load_file_content(recipes_file_path)
    recipe_examples = []
    similar_recipes_ids = retrieve_similar_recipe_id(retriever_recipe,ingredients)

    for eachID in similar_recipes_ids:
      eachRecipe = get_recipe_by_id(all_sample_reciples, eachID)
      title = eachRecipe.get("recipe_title", "Untitled Recipe")
      ingredients = eachRecipe.get("ingredients", [])
      pure_ingredients = eachRecipe.get("processed_output", {}).get("pure_ingredients", [])
      instructions = eachRecipe.get("processed_output", {}).get("step_by_step_instructions", [])
      formatted_recipe = (
          f"Title: {title}\n"
          f"Ingredients: {', '.join(ingredients)}\n"
          f"Pure Ingredients: {' '.join(pure_ingredients)}\n"
          f"Instructions: {' '.join(instructions)}\n"
      )
      recipe_examples.append(formatted_recipe)
      sample_recipes_text = "\n\n".join(recipe_examples)



  #Retrive nutrient content
  nutrient_map=[]
  for ingredient in ingredients:
    matched_food, nutrients=retrieve_food_and_nutrients(retriever_ingre,ingredient)
    nutrient_map.append(nutrients)


  print(sample_recipes_text)
  print(nutrient_map)

  sys_prompt = f'''
    You are a helpful assistant that can generate a healthy recipe based on some sample recipes and their evaluations of healthiness.

    Here's some sample recipes and nutrient map for reference:
    {f'sample recipes: {sample_recipes_text}' if provide_example else ''}
    nutrient map: {nutrient_map}

    Here's the evaluation criteria:
    You only need to consider 7 key macronutrients and their ranges to assess a recipe's healthiness:
    Proteins: 10%-15% of total energy
    Carbohydrates: 55%-75% of total energy
    Sugars: less than 10% of total energy
    Sodium: less than 5 grams
    Fats: 15%-30% of total energy
    Saturated Fats: less than 10% of total energy
    Fibers: more than 25 grams

    Evaluation Instructions:
    - Find the 7 key macronutrients of each ingredient (ingredient_name in the nutrient map). Add up each types of macronutrient of each ingreident to get the total content of each macronutrient.
    - Evaluate if each macronutrient is in range of evaluation criteria (1 point if yes, 0 if no).
    - Sum the points to get a health score (0-7).

    Task:
    - Create a recipe using only user-provided ingredients and tools that has the highest health score (health score must be greater or equal to 5).
    - Choose ingredients with high nutrients based on the nutrient map and evaluation criteria (not all ingredients have to be chosen for the recipe, only choose the ones with good nutrients).
    - Adjust ingredient amounts to maximize health score.

    The output should have the following attributes:
    - title: recipe title
    - ingredients: each ingredient must have measurement in the recipe
    - pure_ingredients: only ingredient names (without measurements)
    - instructions: step by step instructions
    The output must be a string in JSON format that contains the above attributes. Do not need to specify the format type(i.e. json) at the beginning of the output string.
  '''
  return sys_prompt

In [77]:
def load_file_content(file_path):
    """Loads and returns the content of the file as a string."""
    try:
        with open(file_path, "r") as file:
            return json.load(file)
    except FileNotFoundError:
        return "File not found. Please check the file path."

In [78]:
def get_recipe(client, ingredients, tools, temp, topp, file_path,provide_example=True):

    user_prompt = (
        f"I have the following ingredients: {', '.join(ingredients)}.\n"
        f"I also have these cooking tools: {', '.join(tools)}.\n"
    )
    sys_prompt = get_generate_sys_prompt(file_path, ingredients,retriever,retriever_recipe,provide_example)
    try:
        completion = client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": sys_prompt},
                {"role": "user", "content": user_prompt}
            ],
            temperature=temp,
            top_p=topp
        )

        recipe_generated = completion.choices[0].message.content.strip()

        return recipe_generated
    except Exception as e:
        return f"An error occurred: {e}"

#### Testing: Generated Recipe with provided example

In [93]:
# main
print("Welcome to RecipePrep!")

ingredients_input = input("Enter your ingredients, separated by commas: ").strip().split(",")
avail_ingredients = [ingredient.strip() for ingredient in ingredients_input]

tools_input = input("Enter your cooking tools, separated by commas: ").strip().split(",")
avail_tools = [tool.strip() for tool in tools_input]

# Generate recipe using the inputs
print("\nGenerating your recipe...\n")
recipe = get_recipe(client, avail_ingredients, avail_tools, 1, 1, file_path_recipe)
eval = get_health_score_with_rag(client, retriever, 1, 1, recipe)

with open("final_recipe_with_score", "w") as file:
    json.dump(eval, file, indent=4)
print("Recipe generated and saved")

print("Here's your recipe:\n")
print(eval)

Welcome to RecipePrep!
Enter your ingredients, separated by commas: rice,egg, tomato,carrot
Enter your cooking tools, separated by commas: fying pan, bowl

Generating your recipe...

Title: Salad on a Stick
Ingredients: 2 carrots, thinly sliced diagonally, 1 cucumber, thinly sliced diagonally, 1/2 head iceberg lettuce, cut into 2 1/2-inch chunks, 1 cup grape or cherry tomatoes, Blue cheese dressing, for serving
Pure Ingredients: 
Instructions: 

["{'food_code': 2380, 'nutrient_value': 0, 'nutrient_web_name': 'Fatty acids, trans, total', 'unit': 'g'}{'food_code': 2380, 'nutrient_value': 0.014, 'nutrient_web_name': 'Fatty acids, monounsaturated, total', 'unit': 'g'}{'food_code': 2380, 'nutrient_value': 0.037, 'nutrient_web_name': 'Fatty acids, saturated, total', 'unit': 'g'}{'food_code': 2380, 'nutrient_value': 0.117, 'nutrient_web_name': 'Fatty acids, polyunsaturated, total', 'unit': 'g'}{'food_code': 2380, 'nutrient_value': 0.24, 'nutrient_web_name': 'Total Fat', 'unit': 'g'}{'food_cod

#### Testing: Generated example Without providing example


In [94]:
client_2 = OpenAI(api_key="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")

In [96]:
# main
print("Welcome to RecipePrep!")

ingredients_input = input("Enter your ingredients, separated by commas: ").strip().split(",")
avail_ingredients = [ingredient.strip() for ingredient in ingredients_input]

tools_input = input("Enter your cooking tools, separated by commas: ").strip().split(",")
avail_tools = [tool.strip() for tool in tools_input]

# Generate recipe using the inputs
print("\nGenerating your recipe...\n")
recipe = get_recipe(client_2, avail_ingredients, avail_tools, 1, 1, file_path_recipe,provide_example=False)
eval = get_health_score_with_rag(client, retriever, 1, 1, recipe)

with open("final_recipe_with_score", "w") as file:
    json.dump(eval, file, indent=4)
print("Recipe generated and saved")

print("Here's your recipe:\n")
print(eval)

Welcome to RecipePrep!
Enter your ingredients, separated by commas:  rice,egg, tomato,carrot
Enter your cooking tools, separated by commas: fying pan, bowl

Generating your recipe...


["{'food_code': 2380, 'nutrient_value': 0, 'nutrient_web_name': 'Fatty acids, trans, total', 'unit': 'g'}{'food_code': 2380, 'nutrient_value': 0.014, 'nutrient_web_name': 'Fatty acids, monounsaturated, total', 'unit': 'g'}{'food_code': 2380, 'nutrient_value': 0.037, 'nutrient_web_name': 'Fatty acids, saturated, total', 'unit': 'g'}{'food_code': 2380, 'nutrient_value': 0.117, 'nutrient_web_name': 'Fatty acids, polyunsaturated, total', 'unit': 'g'}{'food_code': 2380, 'nutrient_value': 0.24, 'nutrient_web_name': 'Total Fat', 'unit': 'g'}{'food_code': 2380, 'nutrient_value': 0.93, 'nutrient_web_name': 'Protein', 'unit': 'g'}{'food_code': 2380, 'nutrient_value': 2.44, 'nutrient_web_name': 'Fibre, total dietary', 'unit': 'g'}{'food_code': 2380, 'nutrient_value': 4.74, 'nutrient_web_name': 'Sugars, total', 'uni