In [1]:
from openai import OpenAI
from langchain_pinecone import PineconeEmbeddings, PineconeVectorStore

In [4]:
def get_food_nutrients(data):
    food_data = []

    for doc in data:
        food_info = {
            "Food Name": doc.page_content,
            "Nutrients": doc.metadata
        }
        food_data.append(food_info)

    return food_data

def generate_prompt(query, food_data):
    prompt_template = """
    Answer the user's query: {query}

    Using the food data information provided. Not all data will be required to properly answer the user's query. Only use the data for the food which they are asking about. Don't interpolate information.
    For example, if the user asks about potential allergens, answer analytically, using the information available to you to cite the source. You have data from the FDA loaded into your context, such as food data directly from food labels, ensuring accuracy. Therefore, there is no need to advise the user to "always check the back of the packaging for the most up-to-date information," as the data provided comes from reliable sources, including public and private FDA data gathering methods.

    Nutrient values are provided as concentrations per 100 grams of the edible portion of the food. If a nutrient value is listed as 0.0, it indicates that the nutrient is present in such a small quantity that it falls below the detectable limit (Limit of Quantification (LOQ)).

    Serving size information is available, but nutrient data is consistently expressed per 100 grams. If the user asks for serving size data, provide the available information, but clarify that the nutrient values themselves are not based on the serving size.

    The user may input their question about a very specific branded food item, or a more general food item, such as an ingredient. In the case where they ask about a specific branded food item by name, use only the data associated with that food item. In the case where the user asks about a general food, you'll have multiple brands of that food available in your context—choose the most general version of that product in that case.

    If the information provided isn't enough to accurately answer the question, reply with "I'm sorry, I don't have enough information to accurately answer that question.".

    Make sure to start the response by mentioning the item you are referring to:

    Food Data: {food_data}
    """

    return prompt_template.format(query=query, food_data=food_data)

def get_completion(query, food_data):
    client = OpenAI()
    prompt = generate_prompt(query, food_data)

    completion = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "user", "content": prompt}
        ]
    )

    return completion.choices[0].message.content

def retrieve_food_data_from_pinecone(query):
    embedding_model = PineconeEmbeddings(model="multilingual-e5-large")
    food_data_index = PineconeVectorStore.from_existing_index("food-data", embedding_model)
    retrieved_food_data = food_data_index.similarity_search(query, k=10)
    food_data = get_food_nutrients(retrieved_food_data)

    return food_data


SyntaxError: invalid syntax (1696668197.py, line 41)

In [None]:
user_query = "TORTELLINI ALFREDO THREE CHEESE TORTELLINI IN ALFREDO SAUCE WITH BROCCOLI, TORTELLINI ALFREDO"
retrieved_food_data = retrieve_food_data_from_pinecone(user_query)

response = get_completion(user_query, retrieved_food_data)
print(response)

Unclosed client session
client_session: <aiohttp.client.ClientSession object at 0x00000227DCDCCD90>
