In [2]:
import pandas as pd
import os
import json
from openai import OpenAI
from tqdm.auto import tqdm


In [4]:
os.environ["OPENAI_API_KEY"]=''

In [5]:
client = OpenAI()

In [6]:
df = pd.read_csv('../data/recipes.csv')
documents = df.to_dict(orient='records')

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 170 entries, 0 to 169
Data columns (total 28 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   RecipeId                    170 non-null    int64  
 1   Name                        170 non-null    object 
 2   AuthorId                    170 non-null    int64  
 3   AuthorName                  170 non-null    object 
 4   CookTime                    132 non-null    object 
 5   PrepTime                    170 non-null    object 
 6   TotalTime                   170 non-null    object 
 7   DatePublished               170 non-null    object 
 8   Description                 170 non-null    object 
 9   Images                      170 non-null    object 
 10  RecipeCategory              170 non-null    object 
 11  Keywords                    169 non-null    object 
 12  RecipeIngredientQuantities  170 non-null    object 
 13  RecipeIngredientParts       170 non

In [8]:
# Select and reorder columns in the dataset for further analysis
selected_columns = ['Name', 'Description', 'RecipeInstructions', 'Calories', 'FatContent', 'SaturatedFatContent',
                     'CholesterolContent', 'SodiumContent', 'CarbohydrateContent',
                     'FiberContent', 'SugarContent', 'ProteinContent']

# Apply the selected columns to the DataFrame
df = df[selected_columns]

# Display the first few rows of the updated DataFrame
df.head()

Unnamed: 0,Name,Description,RecipeInstructions,Calories,FatContent,SaturatedFatContent,CholesterolContent,SodiumContent,CarbohydrateContent,FiberContent,SugarContent,ProteinContent
0,Low-Fat Berry Blue Frozen Dessert,Make and share this Low-Fat Berry Blue Frozen ...,"c(""Toss 2 cups berries with sugar."", ""Let stan...",170.9,2.5,1.3,8.0,29.8,37.1,3.6,30.2,3.2
1,Biryani,Make and share this Biryani recipe from Food.com.,"c(""Soak saffron in warm milk for 5 minutes and...",1110.7,58.8,16.6,372.8,368.4,84.4,9.0,20.4,63.4
2,Best Lemonade,This is from one of my first Good House Keepi...,"c(""Into a 1 quart Jar with tight fitting lid, ...",311.1,0.2,0.0,0.0,1.8,81.5,0.4,77.2,0.3
3,Carina's Tofu-Vegetable Kebabs,This dish is best prepared a day in advance to...,"c(""Drain the tofu, carefully squeezing out exc...",536.1,24.0,3.8,0.0,1558.6,64.2,17.3,32.1,29.3
4,Cabbage Soup,Make and share this Cabbage Soup recipe from F...,"c(""Mix everything together and bring to a boil...",103.6,0.4,0.1,0.0,959.3,25.1,4.8,17.7,4.3


In [9]:
prompt_template = """
You emulate a user of our diet plan assistant application.
Formulate 5 questions this user might ask based on a provided recipes.
Make the questions specific to this recipe.
The record should contain the answer to the questions, and the questions should
be complete and not too short. Use as fewer words as possible from the record. 

The record:

Name: {Name}
Description: {Description}
RecipeInstructions: {RecipeInstructions}
Calories: {Calories}
FatContent: {FatContent}
SaturatedFatContent: {SaturatedFatContent}
CholesterolContent: {CholesterolContent}
SodiumContent: {SodiumContent}
CarbohydrateContent: {CarbohydrateContent}
FiberContent: {FiberContent}
SugarContent: {SugarContent}
ProteinContent: {ProteinContent}

Provide the output in parsable JSON without using code blocks:

{{"questions": ["question1", "question2", ..., "question5"]}}
""".strip()

In [10]:
prompt = prompt_template.format(**documents[0])

In [11]:
def llm(prompt):
    response = client.chat.completions.create(
        model='gpt-4o-mini',
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [12]:
questions = llm(prompt)

In [13]:
json.loads(questions)

{'questions': ['How long should I let the berries and sugar mixture stand before processing?',
  'What should I do after processing the berry and yogurt mixture?',
  'How many cups of blueberries do I need to fold in at the end?',
  'What is the total calorie count for the Low-Fat Berry Blue Frozen Dessert?',
  'What are the saturated fat and cholesterol contents in this recipe?']}

In [14]:
def generate_questions(doc):
    prompt = prompt_template.format(**doc)

    response = client.chat.completions.create(
        model='gpt-4o-mini',
        messages=[{"role": "user", "content": prompt}]
    )

    json_response = response.choices[0].message.content
    return json_response

In [15]:
results = {}

In [16]:
len(documents)

170

In [17]:
for doc in tqdm(documents): 
    doc_id = doc['RecipeId']
    if doc_id in results:
        continue

    questions_raw = generate_questions(doc)
    questions = json.loads(questions_raw)
    results[doc_id] = questions['questions']

  0%|          | 0/170 [00:00<?, ?it/s]

In [22]:
final_results = []

for doc_id, questions in results.items():
    for q in questions:
        final_results.append((doc_id, q))

In [23]:
final_results[0]

(38,
 'What type of berries are needed for the Low-Fat Berry Blue Frozen Dessert?')

In [24]:
df_results = pd.DataFrame(final_results, columns=['RecipeId', 'question'])

In [25]:
df_results.to_csv('../data/ground-truth-retrieval.csv', index=False)

In [26]:
!head ../data/ground-truth-retrieval.csv

RecipeId,question
38,What type of berries are needed for the Low-Fat Berry Blue Frozen Dessert?
38,How long should I let the berry and sugar mixture stand before processing?
38,Is it necessary to strain the mixture after processing it in the food processor?
38,Can I use an ice cream maker instead of a baking pan for this recipe?
38,How long do I need to freeze the dessert before serving it?
39,What ingredients are blended to create the smooth paste for the Biryani recipe?
39,How long should I marinate the chicken in the yogurt mixture before cooking it?
39,What should I do with the chicken after cooking it in the skillet with the marinade?
39,How long do I need to cook the rice after adding it to the sauce?
