# Calories RAG - OpenAIVersion

In [31]:
import os
import glob
from dotenv import load_dotenv

load_dotenv(override=True)
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')

In [77]:
import json
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain_openai import OpenAIEmbeddings
from langchain.prompts import ChatPromptTemplate
from langchain.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableLambda, RunnablePassthrough

embedding_function = OpenAIEmbeddings()

# loader = CSVLoader("./projected_food_dataset.csv", encoding="windows-1252")
loader = CSVLoader("./openfood_sanitized_dataset.csv", encoding="utf-8")
documents = loader.load()

In [66]:
db = Chroma.from_documents(documents, embedding_function)
retriever = db.as_retriever()

In [71]:
template = """
Get the nutritional data of the following food ingredient: {question}
The food ingredient data is intended to be for 100 grams of it. If you can't find the ingredient, return the values for it as raw/not cooked.

Answer the question based only on the following context: {context}

Reply only with a JSON that contains the following data: protein, carbohydrates and fats. 
Please be strict to this JSON format, if you don't know the answer use the context as much as possible.
"""
prompt = ChatPromptTemplate.from_template(template)

model = ChatOpenAI()

chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

In [83]:
result = chain.invoke("proteic pudding")

In [84]:
res

'{\n    "protein": 12.0,\n    "carbohydrates": 4.0,\n    "fats": 2.5,\n    "calories": 118.5\n}'

In [81]:
json_res = json.loads(result)
json_res['calories'] = json_res['protein'] * 4 + json_res['carbohydrates'] * 4 + json_res['fats'] * 9

json_res

{'protein': 3.4, 'carbohydrates': 4.6, 'fats': 0.1, 'calories': 32.9}