# Calories RAG - OpenAIVersion

In [1]:
import os
import glob
from dotenv import load_dotenv

load_dotenv(override=True)
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')

## Setup Mongo connection

In [2]:
import json
from pymongo import MongoClient
from openai import OpenAI
from sentence_transformers import SentenceTransformer

In [3]:
MONGO_DB_USER = os.getenv('MONGO_DB_USER')
MONGO_DB_PASSWORD = os.getenv('MONGO_DB_PASSWORD')
MONGO_DB_CLUSTER_NAME = os.getenv('MONGO_DB_CLUSTER_NAME')

DB_NAME = 'nutritional_rag'
COLLECTION_NAME = 'food'

uri = f"mongodb+srv://{MONGO_DB_USER}:{MONGO_DB_PASSWORD}@{MONGO_DB_CLUSTER_NAME}.i1ndjzi.mongodb.net/?retryWrites=true&w=majority&appName={MONGO_DB_CLUSTER_NAME}"

client = MongoClient(uri)
collection = client[DB_NAME][COLLECTION_NAME]

## Setup search index function

In [4]:
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

In [5]:
# Define a function to run vector search queries
def get_query_results(query):
  """Gets results from a vector search query."""

  query_embedding = model.encode(query).tolist()
  pipeline = [
      {
            "$vectorSearch": {
              "index": "food_vector_index",
              "queryVector": query_embedding,
              "path": "embedding",
              "exact": True,
              "limit": 5
            }
      }, {
            "$project": {
              "_id": 0,
              "text": 1
         }
      }
  ]

  results = collection.aggregate(pipeline)

  array_of_results = []
  for doc in results:
      array_of_results.append(doc)
  return array_of_results

## Create query function to GPT

In [6]:
def get_nutritional_data(food):
    context = get_query_results(food)
    context_string = " - ".join([doc["text"] for doc in context])
    prompt = f"""
    Get the nutritional data of the following food ingredient: {food}
    The food ingredient data is intended to be for 100 grams of it. If you can't find the ingredient, return the values for it as raw/not cooked.
    
    Answer the question based only on the following context: {context_string}
    
    Reply only with a JSON that contains the following data: protein, carbohydrates, fats, calories, sugars, fibers. 
    Please be strict to this JSON format, if you don't know the answer use the context as much as possible.
    """

    openai_client = OpenAI()

    completion = openai_client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
        {
            "role": "user",
            "content": prompt
        }
    ])
    
    response = completion.choices[0].message.content

    return response.replace('```json', '').replace('```', '').replace('\n', '')
    

In [7]:
json.loads(get_nutritional_data("salmon fish"))

{'protein': 22.56,
 'carbohydrates': 0.0,
 'fats': 5.57,
 'calories': 140,
 'sugars': 0.0,
 'fibers': 0.0}