In [1]:
from openai import OpenAI
from pinecone.grpc import PineconeGRPC as Pinecone

In [2]:
pc = Pinecone()
index_name = "branded-food-data"
index = pc.Index(index_name)

In [7]:
def get_food_nutrients(data):
    food_data = []
    for doc in data:
        food_info = {
            "Food Name": doc['metadata'].get("FOOD_NAME", "Unknown Food Name"),
            "Nutrients": doc['metadata']
        }
        food_data.append(food_info)
    return food_data

def generate_prompt(query, food_data):
    prompt_template = """
    Answer the user's query: {query}

    Using the food data information provided. Not all data will be required to properly answer the user's query. Only use the data for the food which they are asking about. Don't interpolate information.
    For example, if the user asks about potential allergens, answer analytically, using the information available to you to cite the source. You have data from the FDA loaded into your context, such as food data directly from food labels, ensuring accuracy. Therefore, there is no need to advise the user to "always check the back of the packaging for the most up-to-date information," as the data provided comes from reliable sources, including public and private FDA data gathering methods.

    Nutrient values are provided as concentrations per 100 grams of the edible portion of the food. If a nutrient value is listed as 0.0, it indicates that the nutrient is present in such a small quantity that it falls below the detectable limit (Limit of Quantification (LOQ)).

    Serving size information is available, but nutrient data is consistently expressed per 100 grams/milliliters. If the user asks for serving size data, provide the available information, but clarify that the nutrient values themselves are not based on the serving size.

    The user may input their question about a very specific branded food item, or a more general food item, such as an ingredient. In the case where they ask about a specific branded food item by name, use only the data associated with that food item. In the case where the user asks about a general food, you'll have multiple brands of that food available in your context—choose the most general version of that product in that case.

    Don't use markdown formatting.
    
    If the information provided isn't enough to accurately answer the question, reply with "I'm sorry, I don't have enough information to accurately answer that question.".

    Make sure to start the response by mentioning the item you are referring to:

    Food Data: {food_data}
    """
    return prompt_template.format(query=query, food_data=food_data)

def get_completion(query, food_data):
    client = OpenAI()
    prompt = generate_prompt(query, food_data)

    completion = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "user", "content": prompt}
        ]
    )
    return completion.choices[0].message.content

def similarity_search(query, index, pc, top_k=10):
    query_embedding = pc.inference.embed(
        model="multilingual-e5-large",
        inputs=[query],
        parameters={"input_type": "query"}
    )
    if query_embedding and 'values' in query_embedding[0]:
        results = index.query(
            vector=query_embedding[0]['values'],
            top_k=top_k,
            include_metadata=True
        )
        return results['matches'] if results['matches'] else []
    else:
        return []

def retrieve_food_data_from_pinecone(query):
    raw_retrieved_food_data = similarity_search(query, index, pc)
    food_data = get_food_nutrients(raw_retrieved_food_data)
    return food_data

In [8]:
user_query = "Give me a detailed nutrient breakdown of and potential allergens within Oreo Cookies Milk Chocolate Covered"
retrieved_food_data = retrieve_food_data_from_pinecone(user_query)
response = get_completion(user_query, retrieved_food_data)
print(response)

Food Item: Oreo Cookies Milk Chocolate Covered

Nutrient Breakdown per 100 grams:
- Carbohydrates: 67.0 g
- Sugars: 47.0 g
- Dietary Fiber: 2.3 g
- Protein: 4.8 g
- Total Fat: 26.0 g
  - Saturated Fat: 14.5 g
- Sodium: 230.0 mg

Potential Allergens:
The ingredients list indicates the presence of several potential allergens:
- Wheat (from wheat flour and wheat starch)
- Milk (from skimmed milk powder, lactose, and anhydrous milk fat)
- Soy (from soya lecithin and sunflower lecithin)

These common allergens could pose a risk to individuals with allergies to wheat, milk, or soy.
