# Calculate Health Score Using RAG

In [None]:
pip install -qU langchain-openai


In [None]:
pip install langchain-community


In [None]:
pip install jq


In [None]:
pip install langchain-chroma

In [5]:
import getpass
import os

os.environ["OPENAI_API_KEY"]=getpass.getpass()

from langchain_openai import ChatOpenAI

llm=ChatOpenAI(model="gpt-4o")

··········


In [6]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [7]:
import json
from langchain_community.document_loaders import JSONLoader
from pathlib import Path
from pprint import pprint

def metadata_fuc(record:dict, metadata:dict)->dict:
  metadata["ingredient_name"]=record.get("ingredient_name")
  metadata["nutrients"]=''.join(map(str,record.get("nutrients")))
  return metadata

file_path='/content/drive/MyDrive/ECE1786/ingredient_nutrient_map_5.json'
# can be modified according to the structure of the nutrient map
loader=JSONLoader(
    file_path=file_path,
    jq_schema=".[]",
    content_key="ingredient_name",
    metadata_func=metadata_fuc
)
data=loader.load()

In [8]:
print(data[0].metadata.get("nutrients"))

{'value': 0, 'nutrient_name': 'Protein', 'unit': 'g'}{'value': 0, 'nutrient_name': 'Carbohydrate', 'unit': 'g'}{'value': 0, 'nutrient_name': 'Sugars, total', 'unit': 'g'}{'value': 0, 'nutrient_name': 'Fibre, total dietary', 'unit': 'g'}{'value': 0, 'nutrient_name': 'Sodium, Na', 'unit': 'mg'}{'value': 9.1, 'nutrient_name': 'Fatty acids, saturated, total', 'unit': 'g'}{'value': 100, 'nutrient_name': 'Total Fat', 'unit': 'g'}{'value': 885, 'nutrient_name': 'Energy (kcal)', 'unit': 'kCal'}{'value': 3699, 'nutrient_name': 'Energy (kJ)', 'unit': 'kJ'}


## split documents into chunks for embedding and vector storage

In [9]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(data)

In [10]:
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings


vectorstore=Chroma.from_documents(documents=all_splits,embedding=OpenAIEmbeddings())

## Retrieve

In [11]:
retriever=vectorstore.as_retriever(search_type="similarity",search_kwargs={"k":1})

# retrieve the most similar food description and its nutrients
def retrieve_food_and_nutrients(retriever,query):
  results=retriever.get_relevant_documents(query)
  if not results:
    return None,None
  best_match=results[0]
  ingredient_name=best_match.metadata.get("ingredient_name")
  nutrients=best_match.metadata.get("nutrients")

  return ingredient_name, nutrients

## Generate

In [12]:
# change value with different units to gram
# the format of ingredient_dict: e.g.{'value': '3', 'unit': 'tablespoon', 'name': 'rice vinegar'}
# 3 tablespoons rice vinegar
def convert_to_grams(ingredient_dict):
  convert_table={
      'tablespoon':17.07,
      'teaspoon': 5.69,
      'ounce':28.35,
      'cup':150.00,
      'lb':453.59,
      'pound':453.59,
      'tbsp':17.07,
      'tsp':5.69,
      'oz':28.35,
      'kg':1000.00,
      'kilogram':1000.00,
      'gram': 1.00,
      'g':1.00,
      'mg': 0.001
      }
  unit=ingredient_dict['unit']
  value=ingredient_dict['value']
  convert_factor=convert_table.get(unit,None)
  try:
    numeric_value=eval(value)
    convert_value=numeric_value * convert_factor if convert_factor else 100
  except:
    convert_value=100
  ingredient_dict['value']=convert_value
  ingredient_dict['unit']='gram'
  return ingredient_dict


In [24]:
import re

def get_health_score_with_rag(retriever,recipe):
  ingredients=recipe.get("processed_ingredients")
  pure_ingredients=recipe.get("pure_ingredients")
  nutrient_map={
      "Protein":0,
      "Carbohydrate":0,
      "Sugars, total":0,
      "Sodium, Na":0,
      "Total Fat":0,
      "Fatty acids, saturated, total":0,
      "Fibre, total dietary": 0,
      "Energy (kJ)": 0,
  }

  for i,ingredient in enumerate(ingredients):
    match = re.match(r"([\d\s/]+)\s*([a-zA-Z]+)?\s*(.*)", ingredient)
    if match:
      value = match.group(1).strip()
      unit = match.group(2) if match.group(2) else ""
      if len(pure_ingredients)==len(ingredients):
        name = pure_ingredients[i]
      else:
        name = match.group(3).strip()


      if unit.endswith("s"):  # Handle plural forms
        unit = unit[:-1]
      parsed_ingredient={"value": value, "unit": unit, "name": name}
      ingredient_dict=convert_to_grams(parsed_ingredient)
      matched_ingredient,nutrients=retrieve_food_and_nutrients(retriever,ingredient_dict["name"])
      print(matched_ingredient)
      # print(nutrients)
      nutrient_pattern = r"'value': ([\d.]+), 'nutrient_name': '([^']+)'"
      matches=re.findall(nutrient_pattern,nutrients)
      print(matches)
      for value,name in matches:
        if name in nutrient_map:
          nutrient_map[name]+=float(value)*ingredient_dict["value"]/100

  health_score=0
  score_summary={
      "Proteins": 0,
      "Carbohydrates": 0,
      "Sugars": 0,
      "Sodium": 0,
      "Fats": 0,
      "Saturated Fats": 0,
      "Fibers": 0
  }
  # print(nutrient_map)
  protein_energy=nutrient_map['Protein']*17
  carbo_energy=nutrient_map['Carbohydrate']*17
  fat_energy=nutrient_map['Total Fat']*37
  sugar_energy=nutrient_map['Sugars, total']*17
  sat_fat_energy=nutrient_map['Fatty acids, saturated, total']*37
  fiber_energy=nutrient_map['Fibre, total dietary']*8
  sodium_energy=nutrient_map['Sodium, Na']*0
  total_energy=nutrient_map['Energy (kJ)']

  if protein_energy >=total_energy*0.1 and protein_energy<=total_energy*0.35:
    health_score+=1
    score_summary["Proteins"]=1
  if carbo_energy>=total_energy*0.45 and carbo_energy<=total_energy*0.75:
    health_score+=1
    score_summary["Carbohydrates"]=1
  if sugar_energy<=total_energy*0.1:
    health_score+=1
    score_summary["Sugars"]=1
  if nutrient_map['Sodium, Na']<=500:
    health_score+=1
    score_summary["Sodium"]=1
  if fat_energy>=total_energy*0.15 and fat_energy<=total_energy*0.3:
    health_score+=1
    score_summary["Fats"]=1
  if sat_fat_energy<=total_energy*0.10:
    health_score+=1
    score_summary["Saturated Fats"]=1
  if nutrient_map['Fibre, total dietary']>=6:
    health_score+=1
    score_summary["Fibers"]=1
  return health_score, score_summary

### Test

In [14]:
file_path="/content/drive/MyDrive/ECE1786/processed_recipes_init_200_batch_1.json"
with open(file_path,"r") as file:
  recipes=json.load(file)
recipe=recipes[1]
# print(recipe["processed_ingredients"])
# print(len(recipe["pure_ingredients"]))
print(get_health_score_with_rag(retriever,recipe))

  results=retriever.get_relevant_documents(query)


potato
[('0', 'Fatty acids, saturated, total'), ('3.13', 'Total Fat'), ('6.3', 'Fibre, total dietary'), ('9.38', 'Sugars, total'), ('12.5', 'Protein'), ('47.07', 'Carbohydrate'), ('266', 'Energy (kcal)'), ('360', 'Energy (kJ)'), ('375', 'Sodium, Na')]
garlic
[('0.089', 'Fatty acids, saturated, total'), ('0.5', 'Total Fat'), ('1', 'Sugars, total'), ('2.1', 'Fibre, total dietary'), ('6.36', 'Protein'), ('17', 'Sodium, Na'), ('33.06', 'Carbohydrate'), ('149', 'Energy (kcal)'), ('623', 'Energy (kJ)')]
salt
[('0', 'Carbohydrate'), ('0', 'Energy (kcal)'), ('0', 'Energy (kJ)'), ('0', 'Sugars, total'), ('0', 'Fibre, total dietary'), ('0', 'Protein'), ('0', 'Total Fat'), ('0', 'Fatty acids, saturated, total'), ('38758', 'Sodium, Na')]
pepper
[('1.04', 'Protein'), ('1.557', 'Fatty acids, saturated, total'), ('1.8', 'Fibre, total dietary'), ('4.28', 'Sugars, total'), ('6.57', 'Carbohydrate'), ('12.75', 'Total Fat'), ('21', 'Sodium, Na'), ('133', 'Energy (kcal)'), ('555', 'Energy (kJ)')]
mustard
[

### Add `total_health_score` in JSON file

In [25]:
file_path="/content/drive/MyDrive/ECE1786/processed_recipes_init_200_batch_3.json"
with open(file_path,"r") as file:
  recipes=json.load(file)

for i,recipe in enumerate(recipes):
  print(f"Recipe {i}")
  health_score,score_summary=get_health_score_with_rag(retriever,recipe)
  recipe["total_health_score"]=health_score
  recipe["summary_of_points"] = score_summary

output_file_path="/content/drive/MyDrive/ECE1786/scored_recipes_init_200_batch_3.json"
with open(output_file_path, "w") as file:
  json.dump(recipes, file, indent=4)
  print(f"health score in recipes has been saved")

Recipe 0
eggs
[('1.39', 'Sugars, total'), ('3.047', 'Carbohydrate'), ('3.312', 'Fatty acids, saturated, total'), ('10.388', 'Protein'), ('11.394', 'Total Fat'), ('153', 'Energy (kcal)'), ('369.155', 'Sodium, Na'), ('640', 'Energy (kJ)'), ('0', 'Fibre, total dietary')]
brown sugar
[('0', 'Total Fat'), ('0', 'Fibre, total dietary'), ('0', 'Fatty acids, saturated, total'), ('0.12', 'Protein'), ('28', 'Sodium, Na'), ('97.02', 'Sugars, total'), ('98.09', 'Carbohydrate'), ('380', 'Energy (kcal)'), ('1590', 'Energy (kJ)')]
vanilla
[('0.9', 'Fibre, total dietary'), ('3.37', 'Protein'), ('3.962', 'Fatty acids, saturated, total'), ('6.52', 'Total Fat'), ('13.63', 'Sugars, total'), ('19.59', 'Carbohydrate'), ('81', 'Sodium, Na'), ('148', 'Energy (kcal)'), ('620', 'Energy (kJ)')]
zucchini
[('1.178', 'Fibre, total dietary'), ('1.257', 'Fatty acids, saturated, total'), ('2.056', 'Sugars, total'), ('2.585', 'Protein'), ('9.284', 'Carbohydrate'), ('13.748', 'Total Fat'), ('129.843', 'Sodium, Na'), ('1