# Sample Prompt: Get the first 10 rows from the processed results, and asks gpt3.5 to filter out SIMILARFOODS

In [1]:
from openai import OpenAI
import os
import pandas as pd
import ast

client = OpenAI()
client.api_key = os.getenv('OPENAI_API_KEY')
# MAX_TOKENS = 4096
# PROMPT = "Hello. Can you introduce yourself."

In [2]:
df_new = pd.read_excel('test_results/results_testAll_frida_to_nevo.xlsx')
sample_df = df_new[['FoodName','SIMILARFOODS']].head(10)
sample_df["FILTEREDSIMILARFOODS"] = '{}'
sample_df

Unnamed: 0,FoodName,SIMILARFOODS,FILTEREDSIMILARFOODS
0,"Strawberry, raw",{'Strawberries'},{}
1,"Apple, raw, all varieties","{'Bilberries', 'Blackcurrants', 'Apple with sk...",{}
2,"Banana, raw","{'Blackberries', 'Banana', 'Strawberries', 'Gr...",{}
3,"Potato, raw","{'Potatoes raw', 'Potatoes old raw', 'Potatoes...",{}
4,"Milk, whole, konventional (not organic), 3.5 %...",{'Milk semi-skimmed'},{}
5,"Parsley, raw",{'Parsley fresh'},{}
6,"Pear, raw","{'Pear without skin', 'Bilberries', 'Blackcurr...",{}
7,"Rhubarb, raw","{'Fennel raw', 'Kohlrabi raw', 'Celery raw'}",{}
8,"Currant, black, raw",{'Blackcurrants'},{}
9,"Yogurt plain, whole milk","{'Yogurt whole milk', 'Custard several flavour...",{}


In [3]:
# We turn df to text which we will give to the gpt model
def df_to_text(df):
    rows = []
    for _, row in df.iterrows():
        food_name = row['FoodName']
        similar_foods = row['SIMILARFOODS']
        rows.append(f"FoodName: {food_name}\nSIMILARFOODS: {similar_foods}\nFILTEREDSIMILARFOODS: []\n")
    return '\n'.join(rows)

df_text = df_to_text(sample_df)
print(df_text)

FoodName: Strawberry, raw
SIMILARFOODS: {'Strawberries'}
FILTEREDSIMILARFOODS: []

FoodName: Apple, raw, all varieties
SIMILARFOODS: {'Bilberries', 'Blackcurrants', 'Apple with skin', 'Apple without skin', 'Figs fresh', 'Raspberries', 'Blueberries', 'Gooseberries', 'Redcurrant', 'Cowberries', 'Cranberry'}
FILTEREDSIMILARFOODS: []

FoodName: Banana, raw
SIMILARFOODS: {'Blackberries', 'Banana', 'Strawberries', 'Grapes white/black with skin'}
FILTEREDSIMILARFOODS: []

FoodName: Potato, raw
SIMILARFOODS: {'Potatoes raw', 'Potatoes old raw', 'Potatoes new raw'}
FILTEREDSIMILARFOODS: []

FoodName: Milk, whole, konventional (not organic), 3.5 % fat
SIMILARFOODS: {'Milk semi-skimmed'}
FILTEREDSIMILARFOODS: []

FoodName: Parsley, raw
SIMILARFOODS: {'Parsley fresh'}
FILTEREDSIMILARFOODS: []

FoodName: Pear, raw
SIMILARFOODS: {'Pear without skin', 'Bilberries', 'Blackcurrants', 'Figs fresh', 'Raspberries', 'Blueberries', 'Gooseberries', 'Redcurrant', 'Cowberries', 'Cranberry'}
FILTEREDSIMILARFOOD

In [4]:
MAX_TOKENS = 4096
PROMPT = "Given the following set, could you fill in FILTEREDSIMILARFOODS by choosing the top food names inside SIMILARFOODS that correspond best to FoodName? Make sure that it matches the exact fruit and not just related fruit. Also, you do not have to choose any from the filtered similar food if you believe that none of the filtered foods matches the food name best. Make the output the same format as my input please."

response = client.chat.completions.create(
    model="gpt-3.5-turbo-1106",
    messages=[
        {
            "role": "system", 
            "content": PROMPT
        },
        {"role": "user", "content": df_text},
    ],
    max_tokens=MAX_TOKENS
)

result = response.choices[0].message.content
print(result)

FoodName: Strawberry, raw
SIMILARFOODS: {'Strawberries'}
FILTEREDSIMILARFOODS: ['Strawberries']

FoodName: Apple, raw, all varieties
SIMILARFOODS: {'Bilberries', 'Blackcurrants', 'Apple with skin', 'Apple without skin', 'Figs fresh', 'Raspberries', 'Blueberries', 'Gooseberries', 'Redcurrant', 'Cowberries', 'Cranberry'}
FILTEREDSIMILARFOODS: ['Apple with skin', 'Apple without skin']

FoodName: Banana, raw
SIMILARFOODS: {'Blackberries', 'Banana', 'Strawberries', 'Grapes white/black with skin'}
FILTEREDSIMILARFOODS: ['Banana']

FoodName: Potato, raw
SIMILARFOODS: {'Potatoes raw', 'Potatoes old raw', 'Potatoes new raw'}
FILTEREDSIMILARFOODS: ['Potatoes raw']

FoodName: Milk, whole, konventional (not organic), 3.5 % fat
SIMILARFOODS: {'Milk semi-skimmed'}
FILTEREDSIMILARFOODS: []

FoodName: Parsley, raw
SIMILARFOODS: {'Parsley fresh'}
FILTEREDSIMILARFOODS: ['Parsley fresh']

FoodName: Pear, raw
SIMILARFOODS: {'Pear without skin', 'Bilberries', 'Blackcurrants', 'Figs fresh', 'Raspberries', '

In [5]:
def parse_input_text(input_text):
    data = {
        'FoodName': [],
        'SIMILARFOODS': [],
        'FILTEREDSIMILARFOODS': []
    }
    lines = input_text.strip().split('\n')
    for i in range(0, len(lines), 4):
        if i + 2 < len(lines):
            food_name_line = lines[i].split(': ', 1)
            similar_foods_line = lines[i+1].split(': ', 1)
            filtered_similar_foods_line = lines[i+2].split(': ', 1)

            if len(food_name_line) > 1 and len(similar_foods_line) > 1 and len(filtered_similar_foods_line) > 1:
                food_name = food_name_line[1]
                similar_foods = eval(similar_foods_line[1])
                filtered_similar_foods = eval(filtered_similar_foods_line[1])
                
                data['FoodName'].append(food_name)
                data['SIMILARFOODS'].append(similar_foods)
                data['FILTEREDSIMILARFOODS'].append(filtered_similar_foods)
    
    return pd.DataFrame(data)

In [6]:
df_response = parse_input_text(result)
sample_df['FILTEREDSIMILARFOODS'] = df_response['FILTEREDSIMILARFOODS']

sample_df

Unnamed: 0,FoodName,SIMILARFOODS,FILTEREDSIMILARFOODS
0,"Strawberry, raw",{'Strawberries'},[Strawberries]
1,"Apple, raw, all varieties","{'Bilberries', 'Blackcurrants', 'Apple with sk...","[Apple with skin, Apple without skin]"
2,"Banana, raw","{'Blackberries', 'Banana', 'Strawberries', 'Gr...",[Banana]
3,"Potato, raw","{'Potatoes raw', 'Potatoes old raw', 'Potatoes...",[Potatoes raw]
4,"Milk, whole, konventional (not organic), 3.5 %...",{'Milk semi-skimmed'},[]
5,"Parsley, raw",{'Parsley fresh'},[Parsley fresh]
6,"Pear, raw","{'Pear without skin', 'Bilberries', 'Blackcurr...",[Pear without skin]
7,"Rhubarb, raw","{'Fennel raw', 'Kohlrabi raw', 'Celery raw'}",[]
8,"Currant, black, raw",{'Blackcurrants'},[Blackcurrants]
9,"Yogurt plain, whole milk","{'Yogurt whole milk', 'Custard several flavour...",[Yogurt whole milk]
