# Retrieval Evaluation

This notebook helps you experiment with the restaurant retrieval engine and the RAG query step.

- Ensure Qdrant is running, we will create a new demo collection for this notebook purposes as our original collection does not have an id/index column.
- Set your `OPENAI_API_KEY` in the environment if you want to use the LLM step.
- This model evaluates retrieval using hit rate and mrr

In [1]:
# Setup: add project `src` to path and optional envs
import os, sys
from pathlib import Path

# In Jupyter, __file__ is not defined. Use the current notebook's directory.
# Notebook lives in PROJECT_ROOT / "notebooks", so project root is parent of cwd.
PROJECT_ROOT = Path.cwd().parent
SRC_PATH = PROJECT_ROOT / "src"
if str(SRC_PATH) not in sys.path:
    sys.path.insert(0, str(SRC_PATH))

# Reduce tokenizer threads warning noise for fastembed
os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")

# Optionally set your API key for LLM usage here (prefer using a .env or shell env)
# os.environ["OPENAI_API_KEY"] = "<your_key_here>"

print("Project root:", PROJECT_ROOT)
print("Using src path:", SRC_PATH)


Project root: /Users/anupamgupta/Desktop/Github projects/LLM-based-Agentic-RAG
Using src path: /Users/anupamgupta/Desktop/Github projects/LLM-based-Agentic-RAG/src


In [2]:
prompt_template = """
You emulate a user of our restaurant assistant application.
Formulate 3 questions this user might ask based on a provided restaurant and menu item details.
Make the questions specific to this restaurant and its relevant details including menu item details and location.
The record should contain the answer to the questions, and the questions should
be complete and not too short. Use as fewer words as possible from the record. 

The record:

index : {index_column}
restaurant_name: {name_x}
score: {score}
ratings_count: {ratings}
restaurant_category: {category_x}
price_range: {price_range}
full_address: {full_address}
zip_code: {zip_code}
lat: {lat}
lng: {lng}
restaurant_id: {restaurant_id}
menu_category: {category_y}
menu_item_name: {name_y}
description: {description}
item_price: {price}
city: {city}
state: {state}

Provide the output in parsable JSON without using code blocks:

{{"questions": ["question1", "question2", ..., "question3"]}}
""".strip()

In [3]:
# Imports and engine setup

import pandas as pd
from llm_utility import RAGQueryEngine

# Data paths relative to project root
restaurants_csv = str(PROJECT_ROOT / "data" / "restaurants.csv")
menus_csv = str(PROJECT_ROOT / "data" / "restaurant-menus.csv")

df_rest = pd.read_csv(restaurants_csv, nrows=10)
df_rest.drop_duplicates(subset=["name"], inplace=True)

df_menu = pd.read_csv(menus_csv, nrows=10)
df_menu.drop_duplicates(inplace=True)

df = pd.merge(
    df_rest, df_menu,
    left_on="id", right_on="restaurant_id",
    how="inner"
)
df.drop(columns=["id", "position"], inplace=True)
df[["city", "state"]] = df["full_address"].str.extract(
    r",\s*([^,]+?)\s*,\s*([A-Z]{2})\b"
)
df['index_column'] = df.index
documents = df.to_dict(orient='records')


In [20]:
df.head()

Unnamed: 0,name_x,score,ratings,category_x,price_range,full_address,zip_code,lat,lng,restaurant_id,category_y,name_y,description,price,city,state,index_column
0,PJ Fresh (224 Daniel Payne Drive),,,"Burgers, American, Sandwiches",$,"224 Daniel Payne Drive, Birmingham, AL, 35207",35207,33.562365,-86.830703,1,Extra Large Pizza,Extra Large Meat Lovers,Whole pie.,15.99 USD,Birmingham,AL,0
1,PJ Fresh (224 Daniel Payne Drive),,,"Burgers, American, Sandwiches",$,"224 Daniel Payne Drive, Birmingham, AL, 35207",35207,33.562365,-86.830703,1,Extra Large Pizza,Extra Large Supreme,Whole pie.,15.99 USD,Birmingham,AL,1
2,PJ Fresh (224 Daniel Payne Drive),,,"Burgers, American, Sandwiches",$,"224 Daniel Payne Drive, Birmingham, AL, 35207",35207,33.562365,-86.830703,1,Extra Large Pizza,Extra Large Pepperoni,Whole pie.,14.99 USD,Birmingham,AL,2
3,PJ Fresh (224 Daniel Payne Drive),,,"Burgers, American, Sandwiches",$,"224 Daniel Payne Drive, Birmingham, AL, 35207",35207,33.562365,-86.830703,1,Extra Large Pizza,Extra Large BBQ Chicken &amp; Bacon,Whole Pie,15.99 USD,Birmingham,AL,3
4,PJ Fresh (224 Daniel Payne Drive),,,"Burgers, American, Sandwiches",$,"224 Daniel Payne Drive, Birmingham, AL, 35207",35207,33.562365,-86.830703,1,Extra Large Pizza,Extra Large 5 Cheese,Whole pie.,14.99 USD,Birmingham,AL,4


In [4]:
prompt = prompt_template.format(**documents[0])

In [14]:
questions = llm(prompt)

In [15]:
import json

json.loads(questions)

{'questions': ['What is the price of the Extra Large Meat Lovers pie at PJ Fresh?',
  'Is the Extra Large Meat Lovers offered as a whole pie at PJ Fresh?',
  "What is PJ Fresh's address in Birmingham, AL 35207?"]}

In [33]:
def generate_questions(doc):
    prompt = prompt_template.format(**doc)

    response = client.chat.completions.create(
        model='gpt-5-nano',
        messages=[{"role": "user", "content": prompt}]
    )

    json_response = response.choices[0].message.content
    return json_response

In [6]:
from tqdm.auto import tqdm

In [7]:
results = {}

In [34]:
for doc in tqdm(documents): 
    doc_id = doc['index_column']
    if doc_id in results:
        continue

    questions_raw = generate_questions(doc)
    questions = json.loads(questions_raw)
    results[doc_id] = questions['questions']

  0%|          | 0/10 [00:00<?, ?it/s]

In [35]:
final_results = []

for doc_id, questions in results.items():
    for q in questions:
        final_results.append((doc_id, q))

In [36]:
final_results[0]

(0, 'What is the price of the Extra Large Meat Lovers pie at PJ Fresh?')

In [37]:
df_results = pd.DataFrame(final_results, columns=['id', 'question'])

In [38]:
df_results.to_csv(PROJECT_ROOT / "data" / "ground-truth-retrieval.csv", index=False)

In [8]:
retrieval_file_path = PROJECT_ROOT / "data" / "ground-truth-retrieval.csv"
!head "{retrieval_file_path}"

id,question
0,What is the price of the Extra Large Meat Lovers pie at PJ Fresh?
0,"What is PJ Fresh's full address in Birmingham, AL 35207?"
0,Under which menu category is the Extra Large Meat Lovers pizza listed?
1,What is the price of the Extra Large Supreme pizza?
1,Is the Extra Large Supreme a whole pie?
1,"What is the street address and ZIP code for PJ Fresh in Birmingham, AL 35207?"
2,"What is the price of the Extra Large Pepperoni pizza at PJ Fresh in Birmingham, AL?"
2,What is the street address of PJ Fresh?
2,What is the description for the Extra Large Pepperoni item on the menu?


In [9]:
df_ground_truth = pd.read_csv(retrieval_file_path)
df_ground_truth.head()

Unnamed: 0,id,question
0,0,What is the price of the Extra Large Meat Love...
1,0,"What is PJ Fresh's full address in Birmingham,..."
2,0,Under which menu category is the Extra Large M...
3,1,What is the price of the Extra Large Supreme p...
4,1,Is the Extra Large Supreme a whole pie?


In [10]:
ground_truth = df_ground_truth.to_dict(orient='records')

In [11]:
def hit_rate(relevance_total):
    cnt = 0

    for line in relevance_total:
        if True in line:
            cnt = cnt + 1

    return cnt / len(relevance_total)

def mrr(relevance_total):
    total_score = 0.0

    for line in relevance_total:
        for rank in range(len(line)):
            if line[rank] == True:
                total_score = total_score + 1 / (rank + 1)

    return total_score / len(relevance_total)

In [12]:
from restaurant_retreival_engine import RestaurantVectorStore, EmbeddingService, DataLoader, RestaurantSearchEngine
from typing import Any
import os

vector_store = RestaurantVectorStore()
embedding = EmbeddingService()
data_loader = DataLoader(
    restaurants_csv,
    menus_csv
)

collection_name = "rag-eval-temp"
engine = RestaurantSearchEngine(vector_store, embedding, data_loader)
engine.initialize_collection(collection_name)
engine.index_data(collection_name, documents)

Collection 'rag-eval-temp' already exists. Deleting...
Creating collection 'rag-eval-temp'...


<Thread(Thread-5 (_batch_upsert), started daemon 6306836480)>

Indexing → rag-eval-temp:   0%|          | 0/10 [00:00<?, ?pts/s]

✅ Finished upserting 10 points into 'rag-eval-temp'


In [26]:
def evaluate(ground_truth, search_function):
    relevance_total = []

    for q in tqdm(ground_truth):
        doc_id = q['id']
        results = search_function(q)
        relevance = [d.payload['index_column'] == doc_id for d in results.points]
        relevance_total.append(relevance)
    return {
        'hit_rate': hit_rate(relevance_total),
        'mrr': mrr(relevance_total),
    }

In [27]:
evaluate(ground_truth, lambda q: engine.search(q['question'], collection_name = collection_name))

  0%|          | 0/30 [00:00<?, ?it/s]

{'hit_rate': 0.8, 'mrr': 0.6094444444444443}