In [8]:
##Data Set Ingestion and Cleaning

In [9]:
import pandas as pd

In [10]:
df=pd.read_csv('../data/data.csv')

In [None]:
##Data Set Indexing

In [4]:
! wget https://raw.githubusercontent.com/alexeygrigorev/minsearch/refs/heads/main/minsearch.py

--2025-09-12 11:19:24--  https://raw.githubusercontent.com/alexeygrigorev/minsearch/refs/heads/main/minsearch.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4273 (4.2K) [text/plain]
Saving to: ‘minsearch.py’


2025-09-12 11:19:24 (58.3 MB/s) - ‘minsearch.py’ saved [4273/4273]



In [11]:
documents=df.to_dict(orient='records')

In [12]:
cleaned_docs = []
for doc in documents:
    clean_doc = {}
    for k, v in doc.items():
        if v is None:  # None → пустая строка или 0
            clean_doc[k] = "" if isinstance(v, str) else 0
        elif isinstance(v, float):
            import math
            clean_doc[k] = "" if math.isnan(v) else v
        else:
            clean_doc[k] = v
    cleaned_docs.append(clean_doc)

documents = cleaned_docs

In [13]:
import minsearch



In [14]:
index = minsearch.Index(
    text_fields=['food', 'serving_size_g', 'calories_kcal', 'protein_g', 'fat_g',
       'carbohydrates_g', 'vitamin_a_mg', 'vitamin_b6_mg', 'vitamin_b12_mg',
       'vitamin_c_mg', 'vitamin_d_mg', 'vitamin_e_mg', 'calcium_mg', 'iron_mg',
       'potassium_mg', 'magnesium_mg', 'selenium_mg', 'zinc_mg', 'iodine_mg',
       'allergens'],
    keyword_fields=['id']
)

In [15]:
index.fit(documents)

<minsearch.Index at 0x735e45fe20f0>

In [16]:
##RAG Flow

In [19]:
import os
!pip install openai

In [23]:
from openai import OpenAI
client = OpenAI()

OpenAIError: The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable

In [501]:
def search(query):
    boost = {}

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,

    )

    return results

In [502]:
prompt_template = """
You are a nutrition assistant. Answer the QUESTION based on the CONTEXT from the food database. 
QUESTION: {question}
CONTEXT: {context}
""".strip()

entry_template = """
food: {food}
serving_size_g: {serving_size_g}
calories_kcal: {calories_kcal}
protein_g: {protein_g}
fat_g: {fat_g}
carbohydrates_g: {carbohydrates_g}
vitamin_a_mg: {vitamin_a_mg}
vitamin_b6_mg: {vitamin_b6_mg}
vitamin_b12_mg: {vitamin_b12_mg}
vitamin_c_mg: {vitamin_c_mg}
vitamin_d_mg: {vitamin_d_mg}
vitamin_e_mg: {vitamin_e_mg}
calcium_mg: {calcium_mg}
iron_mg: {iron_mg}
potassium_mg: {potassium_mg}
magnesium_mg: {magnesium_mg}
selenium_mg: {selenium_mg}
zinc_mg: {zinc_mg}
iodine_mg: {iodine_mg}
allergens: {allergens}
""".strip()


def build_prompt(query, search_results):
    context = ""
    
    for doc in search_results:
        context = context + entry_template.format(**doc) + "\n\n"

    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt


In [503]:
def llm(prompt, model='gpt-4o-mini'):
    response = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [504]:
def rag(query, model='gpt-4o-mini'):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    #print(prompt)
    answer = llm(prompt, model=model)
    return answer

In [505]:
question = 'Which meat has no allergen and have less calories'
answer = rag(question)
print(answer)

Based on the provided context, the meat with no allergens and the least calories is **Goat meat** with 109 calories per 100 grams.


In [472]:
##Retrieval evaluation

In [506]:
df_question = pd.read_csv('../data/ground-truth-retrieval.csv')

In [507]:
df_question.head()

Unnamed: 0,id,question
0,1,What is the protein content in 100 grams of ra...
1,1,How many calories are there in a 100-gram serv...
2,1,Can you tell me the fat amount present in 100 ...
3,1,"Is there any vitamin C in raw chicken breast, ..."
4,1,What are the main allergens associated with ra...


In [508]:
ground_truth = df_question.to_dict(orient='records')

In [509]:
ground_truth[0]

{'id': 1,
 'question': 'What is the protein content in 100 grams of raw, skinless chicken breast?'}

In [510]:
def hit_rate(relevance_total):
    cnt = 0

    for line in relevance_total:
        if True in line:
            cnt = cnt + 1

    return cnt / len(relevance_total)

def mrr(relevance_total):
    total_score = 0.0

    for line in relevance_total:
        for rank in range(len(line)):
            if line[rank] == True:
                total_score = total_score + 1 / (rank + 1)

    return total_score / len(relevance_total)

In [511]:
def minsearch_search(query):
    boost = {}

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

In [512]:
def evaluate(ground_truth, search_function):
    relevance_total = []

    for q in tqdm(ground_truth):
        doc_id = q['id']
        results = search_function(q)
        relevance = [d['id'] == doc_id for d in results]
        relevance_total.append(relevance)

    return {
        'hit_rate': hit_rate(relevance_total),
        'mrr': mrr(relevance_total),
    }

In [513]:
from tqdm.auto import tqdm

In [514]:
evaluate(ground_truth, lambda q: minsearch_search(q['question']))

  0%|          | 0/2480 [00:00<?, ?it/s]

{'hit_rate': 0.6471774193548387, 'mrr': 0.5518553187403998}

In [515]:
##Finding the best parameters

In [517]:
df_question

Unnamed: 0,id,question
0,1,What is the protein content in 100 grams of ra...
1,1,How many calories are there in a 100-gram serv...
2,1,Can you tell me the fat amount present in 100 ...
3,1,"Is there any vitamin C in raw chicken breast, ..."
4,1,What are the main allergens associated with ra...
...,...,...
2475,496,What is the total calorie content in a 100-gra...
2476,496,How much protein can I get from 100 grams of h...
2477,496,What is the fat content in hard sheep cheese p...
2478,496,Does hard sheep cheese contain any vitamin C p...


In [525]:
df_validation = df_question[:100]
df_test = df_question[100:]

In [526]:
import random

def simple_optimize(param_ranges, objective_function, n_iterations=10):
    best_params = None
    best_score = float('-inf')  # Assuming we're minimizing. Use float('-inf') if maximizing.

    for _ in range(n_iterations):
        # Generate random parameters
        current_params = {}
        for param, (min_val, max_val) in param_ranges.items():
            if isinstance(min_val, int) and isinstance(max_val, int):
                current_params[param] = random.randint(min_val, max_val)
            else:
                current_params[param] = random.uniform(min_val, max_val)
        
        # Evaluate the objective function
        current_score = objective_function(current_params)
        
        # Update best if current is better
        if current_score > best_score:  # Change to > if maximizing
            best_score = current_score
            best_params = current_params
    
    return best_params, best_score

In [527]:
gt_val = df_validation.to_dict(orient='records')

In [528]:
evaluate(gt_val, lambda q: minsearch_search(q['question']))

  0%|          | 0/100 [00:00<?, ?it/s]

{'hit_rate': 0.49, 'mrr': 0.3630357142857143}

In [529]:
def minsearch_search(query, boost=None):
    if boost is None:
        boost = {}

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results


In [530]:
documents[0]

{'food': 'Chicken breast, raw, skinless',
 'serving_size_g': '100',
 'calories_kcal': '120',
 'protein_g': '22.5',
 'fat_g': '2.6',
 'carbohydrates_g': '0',
 'vitamin_a_mg': '0.01',
 'vitamin_b6_mg': '0.60',
 'vitamin_b12_mg': '0.0003',
 'vitamin_c_mg': '0',
 'vitamin_d_mg': '0',
 'vitamin_e_mg': '0',
 'calcium_mg': '12',
 'iron_mg': '0.4',
 'potassium_mg': '256',
 'magnesium_mg': '27',
 'selenium_mg': '0.027',
 'zinc_mg': '1.0',
 'iodine_mg': '0',
 'allergens': 'none',
 'id': 1}

In [531]:
param_ranges = {
     'food': (0.0, 3.0),
     'serving_size_g': (0.0, 3.0),
     'calories_kcal': (0.0, 3.0),
     'protein_g': (0.0, 3.0),
     'fat_g': (0.0, 3.0),
     'carbohydrates_g': (0.0, 3.0),
     'vitamin_a_mg': (0.0, 3.0),
     'vitamin_b6_mg': (0.0, 3.0),
     'vitamin_b12_mg': (0.0, 3.0),
     'vitamin_c_mg': (0.0, 3.0),
     'vitamin_d_mg': (0.0, 3.0),
     'vitamin_e_mg': (0.0, 3.0),
     'calcium_mg': (0.0, 3.0),
     'iron_mg': (0.0, 3.0),
     'potassium_mg': (0.0, 3.0),
     'magnesium_mg': (0.0, 3.0),
     'selenium_mg': (0.0, 3.0),
     'zinc_mg': (0.0, 3.0),
     'iodine_mg': (0.0, 3.0),
     'allergens': (0.0, 3.0),
}

def objective(boost_params):
    def search_function(q):
        return minsearch_search(q['question'], boost_params)

    results = evaluate(gt_val, search_function)
    return results['mrr']

In [532]:
simple_optimize(param_ranges, objective, n_iterations=20)

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

({'food': 2.8408975576409867,
  'serving_size_g': 1.966708438529639,
  'calories_kcal': 2.0254924509606607,
  'protein_g': 0.910326687901147,
  'fat_g': 1.8873359522077655,
  'carbohydrates_g': 2.187337604558727,
  'vitamin_a_mg': 0.7143971918707978,
  'vitamin_b6_mg': 1.6280177756983494,
  'vitamin_b12_mg': 0.7650785909265626,
  'vitamin_c_mg': 0.08901307449600471,
  'vitamin_d_mg': 2.4507684816315445,
  'vitamin_e_mg': 2.4211606137489596,
  'calcium_mg': 0.47441875285394974,
  'iron_mg': 0.21001480682169194,
  'potassium_mg': 2.0032575358512372,
  'magnesium_mg': 1.4509042101782008,
  'selenium_mg': 1.4068406394683315,
  'zinc_mg': 2.639624309646852,
  'iodine_mg': 0.6247916087548824,
  'allergens': 2.6357239358095104},
 0.5369999999999999)

In [533]:
def minsearch_improved(query):
    boost = {
        'food': 2.84,
     'serving_size_g': 1.97,
     'calories_kcal': 2.03,
     'protein_g': 0.91,
     'fat_g': 1.89,
     'carbohydrates_g': 2.19,
     'vitamin_a_mg': 0.72,
     'vitamin_b6_mg': 1.62,
     'vitamin_b12_mg': 0.77,
     'vitamin_c_mg': 0.09,
     'vitamin_d_mg': 2.45,
     'vitamin_e_mg': 2.42,
     'calcium_mg': 0.47,
     'iron_mg': 0.21,
     'potassium_mg': 2.00,
     'magnesium_mg': 1.45,
     'selenium_mg': 1.41,
     'zinc_mg': 2.64,
     'iodine_mg': 0.62,
     'allergens': 2.64,
    }

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

evaluate(ground_truth, lambda q: minsearch_improved(q['question']))

  0%|          | 0/2480 [00:00<?, ?it/s]

{'hit_rate': 0.8891129032258065, 'mrr': 0.7286029505888372}

In [None]:
##RAG evaluation

In [534]:
prompt2_template = """
You are an expert evaluator for a RAG system.
Your task is to analyze the relevance of the generated answer to the given question.
Based on the relevance of the generated answer, you will classify it
as "NON_RELEVANT", "PARTLY_RELEVANT", or "RELEVANT".

Here is the data for evaluation:

Question: {question}
Generated Answer: {answer_llm}

Please analyze the content and context of the generated answer in relation to the question
and provide your evaluation in parsable JSON without using code blocks:

{{
  "Relevance": "NON_RELEVANT" | "PARTLY_RELEVANT" | "RELEVANT",
  "Explanation": "[Provide a brief explanation for your evaluation]"
}}
""".strip()

In [535]:
len(ground_truth)

2480

In [540]:
record = ground_truth[0]
question = record['question']
answer_llm=rag(question)

In [541]:
print(answer_llm)

The protein content in 100 grams of raw, skinless chicken breast is not provided in the context you've given. However, generally, raw, skinless chicken breast contains approximately 31 grams of protein per 100 grams. If you need further assistance or more specific data, please let me know!


In [542]:
prompt = prompt2_template.format(question=question, answer_llm=answer_llm)
print(prompt)

You are an expert evaluator for a RAG system.
Your task is to analyze the relevance of the generated answer to the given question.
Based on the relevance of the generated answer, you will classify it
as "NON_RELEVANT", "PARTLY_RELEVANT", or "RELEVANT".

Here is the data for evaluation:

Question: What is the protein content in 100 grams of raw, skinless chicken breast?
Generated Answer: The protein content in 100 grams of raw, skinless chicken breast is not provided in the context you've given. However, generally, raw, skinless chicken breast contains approximately 31 grams of protein per 100 grams. If you need further assistance or more specific data, please let me know!

Please analyze the content and context of the generated answer in relation to the question
and provide your evaluation in parsable JSON without using code blocks:

{
  "Relevance": "NON_RELEVANT" | "PARTLY_RELEVANT" | "RELEVANT",
  "Explanation": "[Provide a brief explanation for your evaluation]"
}


In [543]:
import json

In [544]:
df_sample = df_question.sample(n=200, random_state=1)

In [545]:
sample = df_sample.to_dict(orient='records')

In [556]:
evaluations = []

for record in tqdm(sample):
    question = record['question']
    answer_llm = rag(question) 

    prompt = prompt2_template.format(
        question=question,
        answer_llm=answer_llm
    )

    evaluation = llm(prompt)
    evaluation = json.loads(evaluation)

    evaluations.append((record, answer_llm, evaluation))

  0%|          | 0/200 [00:00<?, ?it/s]

In [557]:
df_eval = pd.DataFrame(evaluations, columns=['record', 'answer', 'evaluation'])

df_eval['id'] = df_eval.record.apply(lambda d: d['id'])
df_eval['question'] = df_eval.record.apply(lambda d: d['question'])

df_eval['relevance'] = df_eval.evaluation.apply(lambda d: d['Relevance'])
df_eval['explanation'] = df_eval.evaluation.apply(lambda d: d['Explanation'])

del df_eval['record']
del df_eval['evaluation']

In [558]:
df_eval.relevance.value_counts()

relevance
RELEVANT           129
NON_RELEVANT        43
PARTLY_RELEVANT     28
Name: count, dtype: int64

In [548]:
df_eval.relevance.value_counts(normalize=True)

relevance
RELEVANT           0.660
NON_RELEVANT       0.175
PARTLY_RELEVANT    0.165
Name: proportion, dtype: float64

In [549]:
df_eval.to_csv('../data/rag-eval-gpt-4o-mini.csv', index=False)

In [550]:
df_eval[df_eval.relevance == 'NON_RELEVANT']

Unnamed: 0,answer,id,question,relevance,explanation
2,The amount of potassium found in 100 grams of ...,262,What is the amount of potassium found in 100 g...,NON_RELEVANT,The generated answer does not provide any spec...
3,The provided context does not include specific...,272,What is the calorie content per 100 grams of s...,NON_RELEVANT,The generated answer does not provide any info...
4,The provided context does not include informat...,303,How much protein is found in a 100 gram servin...,NON_RELEVANT,The generated answer explicitly states a lack ...
15,The calorie content per 100 grams of fontina c...,488,What is the calorie content per 100 grams of f...,NON_RELEVANT,The generated answer does not provide any info...
24,The potassium level in a 100-gram serving of w...,415,What is the potassium level in a 100-gram serv...,NON_RELEVANT,The generated answer states that the potassium...
38,The provided context does not contain specific...,280,How much fat is present in 100 grams of blue c...,NON_RELEVANT,The generated answer does not address the ques...
39,The provided context does not contain informat...,265,What minerals are present in 100 grams of toma...,NON_RELEVANT,The generated answer explicitly states that it...
43,The context provided does not include informat...,218,What is the amount of vitamin C found in 100 g...,NON_RELEVANT,The generated answer does not address the ques...
46,The context provided does not include informat...,129,What are the main vitamins present in 100 gram...,NON_RELEVANT,The generated answer does not provide any rele...
52,The provided context does not include informat...,358,What is the total calorie content in 100 grams...,NON_RELEVANT,The generated answer states that it cannot pro...


In [551]:
evaluations_gpt4o = []

for record in tqdm(sample):
    question = record['question']
    answer_llm = rag(question, model='gpt-4o') 

    prompt = prompt2_template.format(
        question=question,
        answer_llm=answer_llm
    )

    evaluation = llm(prompt)
    evaluation = json.loads(evaluation)
    
    evaluations_gpt4o.append((record, answer_llm, evaluation))

  0%|          | 0/200 [00:00<?, ?it/s]

In [552]:
df_eval = pd.DataFrame(evaluations_gpt4o, columns=['record', 'answer', 'evaluation'])

df_eval['id'] = df_eval.record.apply(lambda d: d['id'])
df_eval['question'] = df_eval.record.apply(lambda d: d['question'])

df_eval['relevance'] = df_eval.evaluation.apply(lambda d: d['Relevance'])
df_eval['explanation'] = df_eval.evaluation.apply(lambda d: d['Explanation'])

del df_eval['record']
del df_eval['evaluation']


In [553]:
df_eval.relevance.value_counts()

relevance
RELEVANT           148
PARTLY_RELEVANT     37
NON_RELEVANT        15
Name: count, dtype: int64

In [554]:
df_eval.relevance.value_counts(normalize=True)

relevance
RELEVANT           0.740
PARTLY_RELEVANT    0.185
NON_RELEVANT       0.075
Name: proportion, dtype: float64

In [559]:
df_eval.to_csv('../data/rag-eval-gpt-4o.csv', index=False)