### Ingestion

In [1]:
# Import the datafile
import requests
data_url = 'https://raw.githubusercontent.com/eadka/fridgechef/main/Data/RecipeData.json'
data_response = requests.get(data_url)
recipes_data = data_response.json()

In [2]:
# Ensuring all the data has strings because minsearch, under the hood uses TfidfVectorizer and expects each text_field to be a string
for recipe in recipes_data:
    for field in ["dish_name",  "cuisine",  "diet", "tags",  "main_ingredients", 
                 "cooking_time_minutes", "difficulty",  "ingredients_full", 
                 "instructions", "substitutions", "flavor_notes"]:
        value = recipe.get(field, "")
        if isinstance(value,list):
            recipe[field] = " ".join(map(str,value)) # join the list into string
        elif not isinstance(value, str):
            recipe[field] = str(value) # convert numbers to string

In [3]:
# Search engine and indexing
import minsearch

# Indexing the document
index = minsearch.Index(
    text_fields=["dish_name",  "cuisine",  "diet", "tags",  "main_ingredients", 
                 "cooking_time_minutes", "difficulty",  "ingredients_full", 
                 "instructions", "substitutions", "flavor_notes"],
    keyword_fields=[]
)

In [4]:
index.fit(recipes_data)

<minsearch.minsearch.Index at 0x776546c64b60>

In [5]:
query = 'Give me recipes for carrots and beans'

In [6]:
index.search(query,num_results=2)

[{'dish_name': 'Rajma Masala',
  'cuisine': 'Indian',
  'diet': 'Vegan',
  'tags': 'protein-rich curry comfort food',
  'main_ingredients': 'kidney beans onion tomato ginger garam masala',
  'cooking_time_minutes': '45',
  'difficulty': 'Medium',
  'ingredients_full': "{'item': 'kidney beans', 'quantity': '2 cups cooked'} {'item': 'onion', 'quantity': '1 large'} {'item': 'tomato', 'quantity': '2'} {'item': 'ginger', 'quantity': '1 inch'} {'item': 'garam masala', 'quantity': '1 tsp'}",
  'instructions': 'Sauté onion and ginger until golden. Add tomato and spices, cook until soft. Add kidney beans, simmer for 20 minutes.',
  'substitutions': "{'kidney beans': ['black beans']}",
  'flavor_notes': 'Rich, spiced, and hearty.'},
 {'dish_name': 'Minestrone Soup',
  'cuisine': 'Italian',
  'diet': 'Vegan',
  'tags': 'soup hearty vegetable-rich',
  'main_ingredients': 'carrot celery zucchini beans pasta tomato',
  'cooking_time_minutes': '40',
  'difficulty': 'Easy',
  'ingredients_full': "{'it

### RAG Flow

In [7]:
# Open AI for LLM integration
from openai import OpenAI

client = OpenAI()

In [8]:
# response = client.chat.completions.create(
#     model='gpt-4o-mini',
#     messages=[{"role": "user", "content": query}]
# )

# response.choices[0].message.content

In [9]:
# Defining the RAG flow
def search(query):
    boost = {}

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=5
    )

    return results

In [10]:
recipes_data[0]

{'dish_name': 'Vegetable Pad Thai',
 'cuisine': 'Thai',
 'diet': 'Vegan',
 'tags': 'quick noodles stir-fry',
 'main_ingredients': 'rice noodles tofu carrot bean sprouts spring onions peanuts soy sauce lime garlic',
 'cooking_time_minutes': '25',
 'difficulty': 'Easy',
 'ingredients_full': "{'item': 'rice noodles', 'quantity': '200g'} {'item': 'tofu', 'quantity': '150g'} {'item': 'carrot', 'quantity': '1 medium'} {'item': 'bean sprouts', 'quantity': '1 cup'} {'item': 'spring onions', 'quantity': '2'} {'item': 'peanuts', 'quantity': '2 tbsp, crushed'} {'item': 'soy sauce', 'quantity': '3 tbsp'} {'item': 'lime', 'quantity': '1'} {'item': 'garlic', 'quantity': '2 cloves'}",
 'instructions': 'Soak rice noodles in warm water for 20 minutes. Stir-fry garlic and tofu until golden. Add vegetables and stir-fry for 2-3 minutes. Add noodles and sauce, toss until combined. Garnish with peanuts and lime.',
 'substitutions': "{'tofu': ['tempeh', 'chickpeas'], 'soy sauce': ['tamari', 'coconut aminos']

In [11]:
# prompt_template = """
# You're a "Fridge Chef", a helpful cooking assistant. 
# The user will give you a list of vegetables or ingredients they have available.
# Base your answer only on the recipes in the CONTEXT.
# If you cannot find an exact match, suggest the closest dishes using the available ingredients.

# When answering:
# - Include the dish name, cuisine, diet type, main ingredients, and cooking time.
# - Provide short cooking instructions based on the CONTEXT.
# - Suggest possible ingredient substitutions if given in the CONTEXT.
# - If multiple dishes fit, return the top 3–5 most relevant recipes.

# QUESTION: {question}

# CONTEXT: 
# {context}
# """.strip()

# entry_template = """
# dish_name: {dish_name}
# cuisine: {cuisine}
# diet: {diet}
# tags: {tags}
# main_ingredients: {main_ingredients}
# cooking_time_minutes: {cooking_time_minutes}
# difficulty: {difficulty}
# ingredients_full: {ingredients_full}
# instructions: {instructions}
# substitutions: {substitutions}
# flavor_notes: {flavor_notes}
# """.strip()

# def build_prompt(query, search_results):
#     context = ""
    
#     for doc in search_results:
#         context = context + entry_template.format(**doc) + "\n\n"
    
#     prompt = prompt_template.format(question=query, context=context).strip()
#     return prompt

In [12]:
prompt_template = """
You're a "Fridge Chef", a helpful cooking assistant. 
The user will give you a list of vegetables or ingredients they have available.
Base your answer only on the recipes in the CONTEXT.
If you cannot find an exact match, suggest the closest dishes using the available ingredients.

QUESTION: {question}

CONTEXT: 
{context}
""".strip()

entry_template = """
dish_name: {dish_name}
cuisine: {cuisine}
diet: {diet}
tags: {tags}
main_ingredients: {main_ingredients}
cooking_time_minutes: {cooking_time_minutes}
difficulty: {difficulty}
ingredients_full: {ingredients_full}
instructions: {instructions}
substitutions: {substitutions}
flavor_notes: {flavor_notes}
""".strip()

def build_prompt(query, search_results):
    context = ""
    
    for doc in search_results:
        context = context + entry_template.format(**doc) + "\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [13]:
search_results = search(query)
prompt = build_prompt(query, search_results)

In [14]:
print(prompt)

You're a "Fridge Chef", a helpful cooking assistant. 
The user will give you a list of vegetables or ingredients they have available.
Base your answer only on the recipes in the CONTEXT.
If you cannot find an exact match, suggest the closest dishes using the available ingredients.

QUESTION: Give me recipes for carrots and beans

CONTEXT: 
dish_name: Rajma Masala
cuisine: Indian
diet: Vegan
tags: protein-rich curry comfort food
main_ingredients: kidney beans onion tomato ginger garam masala
cooking_time_minutes: 45
difficulty: Medium
ingredients_full: {'item': 'kidney beans', 'quantity': '2 cups cooked'} {'item': 'onion', 'quantity': '1 large'} {'item': 'tomato', 'quantity': '2'} {'item': 'ginger', 'quantity': '1 inch'} {'item': 'garam masala', 'quantity': '1 tsp'}
instructions: Sauté onion and ginger until golden. Add tomato and spices, cook until soft. Add kidney beans, simmer for 20 minutes.
substitutions: {'kidney beans': ['black beans']}
flavor_notes: Rich, spiced, and hearty.

di

In [76]:
def llm(prompt, model='gpt-4o-mini'):
    response = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [78]:
def rag(query,model='gpt-4o-mini'):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt,model=model)
    return answer

In [17]:
answer = rag('What is the main cooking technique used in the Vegetable Pad Thai?')
print(answer)

The main cooking technique used in the Vegetable Pad Thai is **stir-frying**.


### Retrieval Evaluation

In [18]:
import pandas as pd

In [19]:
df_question = pd.read_csv('../Data/ground-truth-retrieval.csv')

In [20]:
df_question.head()

Unnamed: 0,id,question
0,Vegetable Pad Thai,What are the main ingredients used in the Vege...
1,Vegetable Pad Thai,How long does it take to cook the Vegetable Pa...
2,Vegetable Pad Thai,What can I use instead of tofu in the Vegetabl...
3,Vegetable Pad Thai,What type of cuisine does the Vegetable Pad Th...
4,Vegetable Pad Thai,Can you describe the flavor profile of the Veg...


In [21]:
df_question.describe()

Unnamed: 0,id,question
count,490,490
unique,98,486
top,Vegetable Pad Thai,Can I use tofu instead of paneer in this recipe?
freq,5,2


In [22]:
ground_truth = df_question.to_dict(orient='records')

In [23]:
ground_truth[0]

{'id': 'Vegetable Pad Thai',
 'question': 'What are the main ingredients used in the Vegetable Pad Thai recipe?'}

In [24]:
def hit_rate(relevance_total):
    cnt = 0

    for line in relevance_total:
        if True in line:
            cnt = cnt + 1

    return cnt / len(relevance_total)

def mrr(relevance_total):
    total_score = 0.0

    for line in relevance_total:
        for rank in range(len(line)):
            if line[rank] == True:
                total_score = total_score + 1 / (rank + 1)

    return total_score / len(relevance_total)

In [25]:
def minsearch_search(query):
    boost = {}

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

In [26]:
def evaluate(ground_truth, search_function):
    relevance_total = []

    for q in tqdm(ground_truth):
        doc_id = q['id']
        results = search_function(q)
        relevance = [d['dish_name'] == doc_id for d in results]
        relevance_total.append(relevance)

    return {
        'hit_rate': hit_rate(relevance_total),
        'mrr': mrr(relevance_total),
    }

In [27]:
from tqdm.auto import tqdm

In [28]:
evaluate(ground_truth, lambda q: minsearch_search(q['question']))

  0%|          | 0/490 [00:00<?, ?it/s]

{'hit_rate': 0.9755102040816327, 'mrr': 0.836089245221898}

### Finding the best parameters

In [29]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope

  import pkg_resources


In [30]:
# Breaking the data into validation and test data sets
df_validation = df_question[:100]
df_test = df_question[100:]

In [31]:
import random

def simple_optimize(param_ranges, objective_function, n_iterations=10):
    best_params = None
    best_score = float('-inf')  # Using float('-inf') if maximizing.

    for _ in range(n_iterations):
        # Generate random parameters
        current_params = {}
        for param, (min_val, max_val) in param_ranges.items():
            if isinstance(min_val, int) and isinstance(max_val, int):
                current_params[param] = random.randint(min_val, max_val)
            else:
                current_params[param] = random.uniform(min_val, max_val)
        
        # Evaluate the objective function
        current_score = objective_function(current_params)
        
        # Update best if current is better
        if current_score > best_score:  # Change to > if maximizing
            best_score = current_score
            best_params = current_params
    
    return best_params, best_score

In [32]:
gt_val = df_validation.to_dict(orient='records')

In [33]:
def minsearch_search(query,boost=None):
    if boost is None:
        boost = {}

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

In [34]:
# mrr
param_ranges = {
    'dish_name': (0.0,3.0),
    'cuisine': (0.0,3.0),
    'diet': (0.0,3.0),
    'tags': (0.0,3.0),
    'main_ingredients': (0.0,3.0),
    'cooking_time_minutes': (0.0,3.0),
    'difficulty': (0.0,3.0),
    'ingredients_full': (0.0,3.0)
}

def objective(boost_params):
    def search_function(q):
        return minsearch_search(q['question'], boost_params)

    results = evaluate(gt_val, search_function)
    return results['mrr']

In [35]:
simple_optimize(param_ranges, objective, n_iterations=20)

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

({'dish_name': 2.0204481947865123,
  'cuisine': 1.2792705807152922,
  'diet': 1.9303747602823613,
  'tags': 0.17171978515156383,
  'main_ingredients': 0.6681086140894844,
  'cooking_time_minutes': 2.2878789594254574,
  'difficulty': 0.947580998942233,
  'ingredients_full': 2.094028493962976},
 0.9400396825396825)

In [36]:
# mrr
def minsearch_improved(query):
    boost = {'dish_name': 2.49,
        'cuisine': 2.16,
        'diet': 2.745,
        'tags': 0.23,
        'main_ingredients': 1.631,
        'cooking_time_minutes': 0.39,
        'difficulty': 2.64,
        'ingredients_full': 1.73
        } 

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

evaluate(ground_truth, lambda q: minsearch_improved(q['question']))

  0%|          | 0/490 [00:00<?, ?it/s]

{'hit_rate': 0.9734693877551021, 'mrr': 0.9050413022351798}

### RAG evaluation

#### gpt-4o-mini

In [37]:
prompt2_template = """
You are an expert evaluator for a RAG system.
Your task is to analyze the relevance of the generated answer to the given question.
Based on the relevance of the generated answer, you will classify it
as "NON_RELEVANT", "PARTLY_RELEVANT", or "RELEVANT".

Here is the data for evaluation:

Question: {question}
Generated Answer: {answer_llm}

Please analyze the content and context of the generated answer in relation to the question
and provide your evaluation in parsable JSON without using code blocks:

{{
  "Relevance": "NON_RELEVANT" | "PARTLY_RELEVANT" | "RELEVANT",
  "Explanation": "[Provide a brief explanation for your evaluation]"
}}
""".strip()

In [40]:
record = ground_truth[0]
question = record['question']
answer_llm = rag(question)

In [42]:
print(question, answer_llm)

What are the main ingredients used in the Vegetable Pad Thai recipe? The main ingredients used in the Vegetable Pad Thai recipe are:

- Rice noodles (200g)
- Tofu (150g)
- Carrot (1 medium)
- Bean sprouts (1 cup)
- Spring onions (2)
- Peanuts (2 tbsp, crushed)
- Soy sauce (3 tbsp)
- Lime (1)
- Garlic (2 cloves)


In [45]:
prompt = prompt2_template.format(question=question, answer_llm = answer_llm)
print(prompt)

You are an expert evaluator for a RAG system.
Your task is to analyze the relevance of the generated answer to the given question.
Based on the relevance of the generated answer, you will classify it
as "NON_RELEVANT", "PARTLY_RELEVANT", or "RELEVANT".

Here is the data for evaluation:

Question: What are the main ingredients used in the Vegetable Pad Thai recipe?
Generated Answer: The main ingredients used in the Vegetable Pad Thai recipe are:

- Rice noodles (200g)
- Tofu (150g)
- Carrot (1 medium)
- Bean sprouts (1 cup)
- Spring onions (2)
- Peanuts (2 tbsp, crushed)
- Soy sauce (3 tbsp)
- Lime (1)
- Garlic (2 cloves)

Please analyze the content and context of the generated answer in relation to the question
and provide your evaluation in parsable JSON without using code blocks:

{
  "Relevance": "NON_RELEVANT" | "PARTLY_RELEVANT" | "RELEVANT",
  "Explanation": "[Provide a brief explanation for your evaluation]"
}


In [46]:
llm(prompt)

'{\n  "Relevance": "RELEVANT",\n  "Explanation": "The generated answer lists the main ingredients used in the Vegetable Pad Thai recipe clearly and accurately, directly addressing the question asked."\n}'

In [57]:
# evaluations = []

# for record in tqdm(ground_truth):
#     id = record['id']

#     if id in evaluations:
#         continue
        
#     question = record['question']
#     answer_llm = rag(question,model='gpt-4o-mini') 

#     prompt = prompt2_template.format(
#         question=question,
#         answer_llm=answer_llm
#     )

#     evaluation = llm(prompt)
#     evaluation = json.loads(evaluation)

#     evaluations.append((record, answer_llm, evaluation))

  0%|          | 0/490 [00:00<?, ?it/s]

In [58]:
evaluations

[({'id': 'Vegetable Pad Thai',
   'question': 'What are the main ingredients used in the Vegetable Pad Thai recipe?'},
  'The main ingredients used in the Vegetable Pad Thai recipe are:\n\n- Rice noodles\n- Tofu\n- Carrot\n- Bean sprouts\n- Spring onions\n- Peanuts\n- Soy sauce\n- Lime\n- Garlic',
  {'Relevance': 'RELEVANT',
   'Explanation': 'The generated answer directly lists the main ingredients used in a Vegetable Pad Thai recipe, addressing the question accurately and completely.'}),
 ({'id': 'Vegetable Pad Thai',
   'question': 'How long does it take to cook the Vegetable Pad Thai from start to finish?'},
  'The Vegetable Pad Thai takes 25 minutes to cook from start to finish.',
  {'Relevance': 'RELEVANT',
   'Explanation': 'The generated answer directly addresses the question by providing a specific time (25 minutes) for cooking the Vegetable Pad Thai from start to finish, which is exactly what the question asks for.'}),
 ({'id': 'Vegetable Pad Thai',
   'question': 'What can I

In [61]:
df_eval = pd.DataFrame(evaluations, columns=['record', 'answer', 'evaluation'])

df_eval['id'] = df_eval.record.apply(lambda d: d['id'])
df_eval['question'] = df_eval.record.apply(lambda d: d['question'])
df_eval['relevance'] = df_eval.evaluation.apply(lambda d: d['Relevance'])
df_eval['explanation'] = df_eval.evaluation.apply(lambda d: d['Explanation'])

In [63]:
del df_eval['record']
del df_eval['evaluation']

In [65]:
df_eval

Unnamed: 0,answer,id,question,relevance,explanation
0,The main ingredients used in the Vegetable Pad...,Vegetable Pad Thai,What are the main ingredients used in the Vege...,RELEVANT,The generated answer directly lists the main i...
1,The Vegetable Pad Thai takes 25 minutes to coo...,Vegetable Pad Thai,How long does it take to cook the Vegetable Pa...,RELEVANT,The generated answer directly addresses the qu...
2,"In the Vegetable Pad Thai recipe, you can subs...",Vegetable Pad Thai,What can I use instead of tofu in the Vegetabl...,RELEVANT,The generated answer directly addresses the qu...
3,The Vegetable Pad Thai belongs to Thai cuisine.,Vegetable Pad Thai,What type of cuisine does the Vegetable Pad Th...,RELEVANT,The generated answer directly addresses the qu...
4,The flavor profile of Vegetable Pad Thai is sa...,Vegetable Pad Thai,Can you describe the flavor profile of the Veg...,RELEVANT,The generated answer provides a detailed descr...
...,...,...,...,...,...
485,"To make Vegetable Manchurian Gravy, you will n...",Vegetable Manchurian Gravy,What are the main ingredients needed to make V...,RELEVANT,The generated answer lists the main ingredient...
486,The Vegetable Manchurian Gravy takes 35 minute...,Vegetable Manchurian Gravy,How long does it take to cook the Vegetable Ma...,RELEVANT,The generated answer directly addresses the qu...
487,"Yes, you can use tofu cubes instead of vegetab...",Vegetable Manchurian Gravy,Can I use tofu cubes instead of vegetable ball...,RELEVANT,The generated answer directly addresses the qu...
488,The cooking difficulty level for Vegetable Man...,Vegetable Manchurian Gravy,What is the cooking difficulty level for Veget...,RELEVANT,The generated answer directly addresses the qu...


In [67]:
df_eval.relevance.value_counts()

relevance
RELEVANT           474
PARTLY_RELEVANT     13
NON_RELEVANT         3
Name: count, dtype: int64

In [74]:
df_eval[df_eval.relevance=='NON_RELEVANT']

Unnamed: 0,answer,id,question,relevance,explanation
185,The Mushroom Stroganoff belongs to Italian cui...,Mushroom Stroganoff,What type of cuisine does the Mushroom Strogan...,NON_RELEVANT,The generated answer incorrectly classifies Mu...
224,The context provided does not include a vegan ...,Pad Thai with Tofu,What are the main ingredients in this vegan Pa...,NON_RELEVANT,The generated answer does not address the ques...
435,The main cooking method used for the idlis is ...,Idli with Coconut Chutney,What is the main cooking method used for the i...,NON_RELEVANT,The generated answer does not address the ques...


In [79]:
df_eval.relevance.value_counts(normalize=True)

relevance
RELEVANT           0.967347
PARTLY_RELEVANT    0.026531
NON_RELEVANT       0.006122
Name: proportion, dtype: float64

In [81]:
df_eval.to_csv('../Data/rag-eval-gpt-4o-mini.csv', index=False)

#### gpt-3.5-turbo

In [82]:
evaluations_gpt35turbo = []

for record in tqdm(ground_truth):
    id = record['id']

    if id in evaluations:
        continue
        
    question = record['question']
    answer_llm = rag(question, model ='gpt-3.5-turbo') 

    prompt = prompt2_template.format(
        question=question,
        answer_llm=answer_llm
    )

    evaluation = llm(prompt)
    evaluation = json.loads(evaluation)

    evaluations_gpt35turbo.append((record, answer_llm, evaluation))

  0%|          | 0/490 [00:00<?, ?it/s]

In [83]:
df_eval_35turbo = pd.DataFrame(evaluations_gpt35turbo, columns=['record', 'answer', 'evaluation'])

df_eval_35turbo['id'] = df_eval_35turbo.record.apply(lambda d: d['id'])
df_eval_35turbo['question'] = df_eval_35turbo.record.apply(lambda d: d['question'])
df_eval_35turbo['relevance'] = df_eval_35turbo.evaluation.apply(lambda d: d['Relevance'])
df_eval_35turbo['explanation'] = df_eval_35turbo.evaluation.apply(lambda d: d['Explanation'])

In [84]:
df_eval_35turbo.relevance.value_counts()

relevance
RELEVANT           459
PARTLY_RELEVANT     29
NON_RELEVANT         2
Name: count, dtype: int64

In [85]:
df_eval_35turbo.relevance.value_counts(normalize=True)

relevance
RELEVANT           0.936735
PARTLY_RELEVANT    0.059184
NON_RELEVANT       0.004082
Name: proportion, dtype: float64

In [None]:
df_eval_35turbo.to_csv('../Data/rag-eval-gpt35turbo.csv', index=False)