# Review Rec Bot

# Set up

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from loguru import logger
import json

import sys
sys.path.insert(0, '..')

In [3]:
import os

from tqdm.notebook import tqdm
import pandas as pd

In [4]:
import nest_asyncio

nest_asyncio.apply()

In [5]:
from dotenv import load_dotenv

load_dotenv()

True

# Arguments

In [6]:
from src.run.args import RunInputArgs

ARGS = RunInputArgs(
    EXPERIMENT_NAME="Review Rec Bot - Yelp Review Rec Bot",
    RUN_NAME="007_bm25_standalone",
    RUN_DESCRIPTION="""
# Retriever - Test BM25

## Changelog
Compared to 006:
- Use BM25 in replacement of Vector Retriever
""",
    TESTING=False,
    LOG_TO_MLFLOW=True,
    OBSERVABILITY=True,
    RECREATE_INDEX=False,
    RECREATE_RETRIEVAL_EVAL_DATASET=False,
    RECREATE_RESPONSE_EVAL_DATASET=False,
    DEBUG=False,
)

ARGS

{
  "EXPERIMENT_NAME": "Review Rec Bot - Yelp Review Rec Bot",
  "RUN_NAME": "007_bm25_standalone",
  "RUN_DESCRIPTION": "\n# Retriever - Test BM25\n\n## Changelog\nCompared to 006:\n- Use BM25 in replacement of Vector Retriever\n",
  "TESTING": false,
  "DEBUG": false,
  "OBSERVABILITY": true,
  "LOG_TO_MLFLOW": true,
  "RECREATE_INDEX": false,
  "RECREATE_RETRIEVAL_EVAL_DATASET": false,
  "RECREATE_RESPONSE_EVAL_DATASET": false
}

# Load config

In [7]:
from src.run.cfg import RunConfig

In [8]:
cfg = RunConfig()
cfg.init(ARGS)

[32m2024-08-09 08:40:54.279[0m | [1mINFO    [0m | [36msrc.run.cfg[0m:[36minit[0m:[36m125[0m - [1mStarting Observability server with Phoenix...[0m
INFO:phoenix.config:📋 Ensuring phoenix working directory: /home/dvquys/.phoenix


🌍 To view the Phoenix app in your browser, visit http://localhost:6006/
📖 For more information on how to use Phoenix, check out https://docs.arize.com/phoenix


[32m2024-08-09 08:41:00.001[0m | [1mINFO    [0m | [36msrc.run.cfg[0m:[36minit[0m:[36m143[0m - [1mSetting up MLflow experiment Review Rec Bot - Yelp Review Rec Bot - run 007_bm25_standalone...[0m
[32m2024-08-09 08:41:00.216[0m | [1mINFO    [0m | [36msrc.run.cfg[0m:[36minit[0m:[36m152[0m - [1mNotebook-generated artifacts are persisted at data/007_bm25_standalone[0m


In [9]:
cfg

  Expected `int` but got `str` - serialized value may not be as expected
  Expected `int` but got `str` - serialized value may not be as expected
  Expected `int` but got `str` - serialized value may not be as expected
  Expected `int` but got `str` - serialized value may not be as expected
  Expected `int` but got `str` - serialized value may not be as expected
  Expected `int` but got `str` - serialized value may not be as expected
  return self.__pydantic_serializer__.to_python(


{
  "args": {
    "EXPERIMENT_NAME": "Review Rec Bot - Yelp Review Rec Bot",
    "RUN_NAME": "007_bm25_standalone",
    "RUN_DESCRIPTION": "\n# Retriever - Test BM25\n\n## Changelog\nCompared to 006:\n- Use BM25 in replacement of Vector Retriever\n",
    "TESTING": false,
    "DEBUG": false,
    "OBSERVABILITY": true,
    "LOG_TO_MLFLOW": true,
    "RECREATE_INDEX": false,
    "RECREATE_RETRIEVAL_EVAL_DATASET": false,
    "RECREATE_RESPONSE_EVAL_DATASET": false
  },
  "app_name": "review_rec_bot",
  "db_collection": "review_rec_bot__huggingface__Snowflake_snowflake_arctic_embed_m_v1_5__005_use_smaller_embedding_model",
  "nodes_persist_fp": "data/005_use_smaller_embedding_model/nodes.pkl",
  "notebook_cache_dp": "data/007_bm25_standalone",
  "data_fp": "../data/yelp_dataset/sample/sample_100_biz/denom_review.parquet",
  "llm_cfg": {
    "llm_provider": "togetherai",
    "llm_model_name": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
    "embedding_provider": "huggingface",
    "embedd

## Set up logger to collect additional info

In [10]:
collect_fp = f"{cfg.notebook_cache_dp}/collect.log"
logger.add(collect_fp, filter=lambda record: "[COLLECT]" in record['message'], mode='w')

1

# Load input data

In [11]:
data = pd.read_parquet(cfg.data_fp)
data = data.assign(
    biz_categories=lambda df: df['biz_categories'].str.split(', '),
    date=lambda df: df['date'].dt.strftime('%Y-%m-%dT%H:%M:%S')
)
logger.info(f"[COLLECT] {len(data)=}")
data.iloc[0]

[32m2024-08-09 08:41:16.934[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m6[0m - [1m[COLLECT] len(data)=5240[0m


review_id                                      L0jv8c2FbpWSlfNC6bbUEA
user_id                                        bFPdtzu11Oi0f92EAcjqmg
business_id                                    IDtLPgUrqorrpqSLdfMhZQ
review_stars                                                        5
useful                                                              0
funny                                                               0
cool                                                                0
text                What a great addition to the Funk Zone!  Grab ...
date                                              2016-10-13T22:50:47
biz_name                                         Helena Avenue Bakery
biz_address                                     131 Anacapa St, Ste C
biz_city                                                Santa Barbara
biz_state                                                          CA
biz_postal_code                                                 93101
biz_latitude        

In [12]:
input_data = data
if ARGS.TESTING:
    input_data = data[:20]
logger.info(f"[COLLECT] {len(input_data)=}")

[32m2024-08-09 08:41:17.052[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1m[COLLECT] len(input_data)=5240[0m


In [13]:
input_data.columns

Index(['review_id', 'user_id', 'business_id', 'review_stars', 'useful',
       'funny', 'cool', 'text', 'date', 'biz_name', 'biz_address', 'biz_city',
       'biz_state', 'biz_postal_code', 'biz_latitude', 'biz_longitude',
       'biz_stars', 'biz_review_count', 'biz_is_open', 'biz_attributes',
       'biz_categories', 'biz_hours'],
      dtype='object')

# Prepare documents

In [14]:
from llama_index.core import Document

documents = []
embedding_visible_metadata = ["review_stars", "biz_name", "biz_address", "biz_city", "biz_state", "biz_categories"]
excluded_embed_metadata_keys = [k for k in input_data.columns if k not in embedding_visible_metadata]

for i, row in tqdm(input_data.iterrows(), total=len(input_data)):
    record = row.to_dict()
    text = record['text']
    metadata = {k: v for k, v in record.items() if k not in ('text')}
    
    doc = Document(
        text=text,
        metadata=metadata,
        excluded_embed_metadata_keys=excluded_embed_metadata_keys,
        excluded_llm_metadata_keys=excluded_embed_metadata_keys
    )
    documents.append(doc)

logger.info(f"[COLLECT] {len(documents)=}")

  0%|          | 0/5240 [00:00<?, ?it/s]

[32m2024-08-09 08:41:18.332[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m20[0m - [1m[COLLECT] len(documents)=5240[0m


#### Check document embedding text

In [15]:
from llama_index.core.schema import MetadataMode

In [16]:
document = documents[0]
print(document.get_content(metadata_mode=MetadataMode.EMBED))

review_stars: 5
biz_name: Helena Avenue Bakery
biz_address: 131 Anacapa St, Ste C
biz_city: Santa Barbara
biz_state: CA
biz_categories: ['Food', 'Restaurants', 'Salad', 'Coffee & Tea', 'Breakfast & Brunch', 'Sandwiches', 'Bakeries']

What a great addition to the Funk Zone!  Grab a bite, grab some tastings, life is good. Right next door to the Santa Barbara Wine Collective, in fact it actually shares the same tables.  We had a fabulous savory croissant.


# Set up LLM

In [17]:
llm, embed_model = cfg.setup_llm()

Some weights of BertModel were not initialized from the model checkpoint at Snowflake/snowflake-arctic-embed-m-v1.5 and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [18]:
print(cfg.llm_cfg.model_dump_json(indent=2))

{
  "llm_provider": "togetherai",
  "llm_model_name": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
  "embedding_provider": "huggingface",
  "embedding_model_name": "Snowflake/snowflake-arctic-embed-m-v1.5",
  "embedding_model_dim": 768,
  "ollama__host": "192.168.100.14",
  "ollama__port": 11434
}


In [19]:
from llama_index.core import Settings
Settings.embed_model = embed_model
Settings.llm = llm

# Vector Store

In [20]:
import qdrant_client
from llama_index.core import VectorStoreIndex
from llama_index.core import StorageContext
from llama_index.vector_stores.qdrant import QdrantVectorStore

from src.run.orchestrator import RunOrchestrator

In [21]:
qdrantdb = qdrant_client.QdrantClient(
    host="localhost",
    port=6333
)
aqdrantdb = qdrant_client.AsyncQdrantClient(
    host="localhost",
    port=6333
)

RunOrchestrator.setup_db(cfg, qdrantdb)

db_collection = qdrantdb.get_collection(cfg.db_collection)
vector_store = QdrantVectorStore(
    client=qdrantdb,
    collection_name=cfg.db_collection,
    aclient=aqdrantdb,
    prefer_grpc=True
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

[32m2024-08-09 08:41:26.436[0m | [1mINFO    [0m | [36msrc.run.orchestrator[0m:[36msetup_db[0m:[36m34[0m - [1mUse existing Qdrant collection: review_rec_bot__huggingface__Snowflake_snowflake_arctic_embed_m_v1_5__005_use_smaller_embedding_model[0m
WARNI [llama_index.vector_stores.qdrant.base] Both client and aclient are provided. If using `:memory:` mode, the data between clients is not synced.


# Index Embeddings

In [22]:
import time
import pickle
from multiprocessing import set_start_method
from llama_index.core.node_parser import SemanticSplitterNodeParser
# from llama_index.core.node_parser import SentenceSplitter

In [23]:
chunker = SemanticSplitterNodeParser
chunker_cfg = {
    "buffer_size": 1,
    "breakpoint_percentile_threshold": 95,
    "embed_model": embed_model
}

# chunker = SentenceSplitter
# chunker_cfg = {
#     "chunk_size": 512,
#     "chunk_overlap": 10
# }

In [24]:
t0 = time.perf_counter()
# TODO: TO understand the differences between points_count and indexed_vector_counts.
# Here indexed_vector_counts = 0
db_collection_count = db_collection.points_count

if db_collection_count > 0 and ARGS.RECREATE_INDEX == False:
    logger.info(f"Loading index from existing DB...")
    with open(cfg.nodes_persist_fp, 'rb') as f:
        logger.info(f"Loading cached `nodes` at {cfg.nodes_persist_fp}...")
        nodes = pickle.load(f)
else:
    logger.info(f"Creating new DB index...")
    from llama_index.core.extractors import TitleExtractor
    from llama_index.core.ingestion import IngestionPipeline, IngestionCache

    # create the pipeline with transformations
    pipeline = IngestionPipeline(
        transformations=[
            chunker(**chunker_cfg),
            embed_model,
        ],
        vector_store = vector_store
    )

    num_workers = None
    # TODO: I can get the preprocessing completed much quicker if I push the initial steps through a multiprocessing loop and then separately create the embeddings using the built-in batching already provided in the SentenceTransformer encode method.
    # Ref: https://github.com/run-llama/llama_index/issues/10104#issuecomment-1899401584
    # Currently setting num_workers leads to code simple hang
    # Ref: https://github.com/run-llama/llama_index/issues/10104
    # num_workers = os.cpu_count() - 1
    # os.environ['TOKENIZERS_PARALLELISM'] = 'true'
    # set_start_method("spawn", force=True)  # it hangs without this line
    logger.info(f"Running Ingestion Pipeline with {num_workers=}...")
    nodes = await pipeline.arun(documents=documents, num_workers=num_workers, show_progress=True)
    logger.info(f"Persisting nodes to {cfg.nodes_persist_fp}...")
    with open(cfg.nodes_persist_fp, 'wb') as f:
        pickle.dump(nodes, f)
index = VectorStoreIndex.from_vector_store(vector_store, storage_context=storage_context)
t1 = time.perf_counter()

[32m2024-08-09 08:41:26.558[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mLoading index from existing DB...[0m
[32m2024-08-09 08:41:26.559[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m9[0m - [1mLoading cached `nodes` at data/005_use_smaller_embedding_model/nodes.pkl...[0m


In [25]:
logger.info(f"Indexing {len(documents)} into VectorStoreIndex took {t1 - t0:,.0f}s")
logger.info(f"[COLLECT] {len(nodes)=}")

[32m2024-08-09 08:41:32.180[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m1[0m - [1mIndexing 5240 into VectorStoreIndex took 6s[0m
[32m2024-08-09 08:41:32.181[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m2[0m - [1m[COLLECT] len(nodes)=10219[0m


# Analyze Chunks

In [26]:
for i, node in enumerate(nodes[:5]):
    print(f"\n\n==========Node {i+1}============")
    print(node.metadata)
    print(node.get_text())



{'review_id': 'L0jv8c2FbpWSlfNC6bbUEA', 'user_id': 'bFPdtzu11Oi0f92EAcjqmg', 'business_id': 'IDtLPgUrqorrpqSLdfMhZQ', 'review_stars': 5, 'useful': 0, 'funny': 0, 'cool': 0, 'date': '2016-10-13T22:50:47', 'biz_name': 'Helena Avenue Bakery', 'biz_address': '131 Anacapa St, Ste C', 'biz_city': 'Santa Barbara', 'biz_state': 'CA', 'biz_postal_code': '93101', 'biz_latitude': 34.4144445, 'biz_longitude': -119.6906718, 'biz_stars': 4.0, 'biz_review_count': 389, 'biz_is_open': 1, 'biz_attributes': {'AcceptsInsurance': None, 'Alcohol': "u'none'", 'Ambience': "{'touristy': False, 'hipster': True, 'romantic': False, 'divey': False, 'intimate': False, 'trendy': True, 'upscale': False, 'classy': False, 'casual': True}", 'BYOB': None, 'BestNights': None, 'BikeParking': 'True', 'BusinessAcceptsBitcoin': None, 'BusinessAcceptsCreditCards': 'True', 'BusinessParking': "{'garage': False, 'street': True, 'validated': False, 'lot': False, 'valet': False}", 'ByAppointmentOnly': None, 'Caters': 'True', 'Coa

# Construct Retriever

In [27]:
from llama_index.retrievers.bm25 import BM25Retriever
import Stemmer

retriever = BM25Retriever.from_defaults(
    nodes=nodes,
    similarity_top_k=10,
    stemmer=Stemmer.Stemmer("english"),
    language="english",
)

DEBUG [bm25s] Building index from IDs objects


## Test retrieval

In [28]:
from llama_index.core.response.notebook_utils import display_source_node

In [29]:
query = "where to find some cold brew coffee?"
retrieval_results = await retriever.aretrieve(query)
for node in retrieval_results:
    display_source_node(node, source_length=1000)
    print(node.get_content(metadata_mode=MetadataMode.EMBED))
    print("\n")

**Node ID:** c219b275-7042-42e0-9741-0e4e0e537912<br>**Similarity:** 6.007098197937012<br>**Text:** Pricey ( a ham and cheese croissant was $5.50)  as with anywhere in Santa Barbra but very delicious. Cute space ( a bit hard to find ). The green eggs and ham  breakfast biscuit is to die for.  ($10 I think) The Santa Barbra tri tip sandwich was excellent! Cold brew coffee was good.<br>

review_stars: 4
biz_name: Helena Avenue Bakery
biz_address: 131 Anacapa St, Ste C
biz_city: Santa Barbara
biz_state: CA
biz_categories: ['Food', 'Restaurants', 'Salad', 'Coffee & Tea', 'Breakfast & Brunch', 'Sandwiches', 'Bakeries']

Pricey ( a ham and cheese croissant was $5.50)  as with anywhere in Santa Barbra but very delicious. Cute space ( a bit hard to find ). The green eggs and ham  breakfast biscuit is to die for.  ($10 I think) The Santa Barbra tri tip sandwich was excellent! Cold brew coffee was good.




**Node ID:** 9594faec-4e66-4b78-9c35-42c8b37a33cb<br>**Similarity:** 5.8539934158325195<br>**Text:** This review is for the delicious breakfast and coffee. The Green Breakfast Bowl and the Biscuit sandwich were both perfectly cooked, fresh, flavorful. We were really impressed with the quality of the cappuccino and the cold brew coffee.<br>

review_stars: 5
biz_name: Helena Avenue Bakery
biz_address: 131 Anacapa St, Ste C
biz_city: Santa Barbara
biz_state: CA
biz_categories: ['Food', 'Restaurants', 'Salad', 'Coffee & Tea', 'Breakfast & Brunch', 'Sandwiches', 'Bakeries']

This review is for the delicious breakfast and coffee. The Green Breakfast Bowl and the Biscuit sandwich were both perfectly cooked, fresh, flavorful. We were really impressed with the quality of the cappuccino and the cold brew coffee.




**Node ID:** 5c81703a-7c5a-416b-8b4e-a85adfc7e7d4<br>**Similarity:** 5.81143045425415<br>**Text:** If it's cold, come grab a coffee instead. They offer all the great styles and flavors of coffee as any other coffee house you'll find.<br>

review_stars: 5
biz_name: Mike's Ice Cream
biz_address: 129 2nd Ave N
biz_city: Nashville
biz_state: TN
biz_categories: ['Ice Cream & Frozen Yogurt', 'Coffee & Tea', 'Restaurants', 'Sandwiches', 'Food']

If it's cold, come grab a coffee instead. They offer all the great styles and flavors of coffee as any other coffee house you'll find.




**Node ID:** f4c371ab-f4d1-4365-8c83-db3dfdd6c74c<br>**Similarity:** 4.635417938232422<br>**Text:** Barista was fast at least. Good latte and cold brew. I should have just ordered a pastry though.<br>

review_stars: 3
biz_name: Helena Avenue Bakery
biz_address: 131 Anacapa St, Ste C
biz_city: Santa Barbara
biz_state: CA
biz_categories: ['Food', 'Restaurants', 'Salad', 'Coffee & Tea', 'Breakfast & Brunch', 'Sandwiches', 'Bakeries']

Barista was fast at least. Good latte and cold brew. I should have just ordered a pastry though.




**Node ID:** 3eeaa309-8f13-406c-b1d7-ac1d15413b62<br>**Similarity:** 4.553103446960449<br>**Text:** Service at the register was friendly. Cold brew was sooo good as were the sandwiches we got. Will most definitely be back.<br>

review_stars: 5
biz_name: Helena Avenue Bakery
biz_address: 131 Anacapa St, Ste C
biz_city: Santa Barbara
biz_state: CA
biz_categories: ['Food', 'Restaurants', 'Salad', 'Coffee & Tea', 'Breakfast & Brunch', 'Sandwiches', 'Bakeries']

Service at the register was friendly. Cold brew was sooo good as were the sandwiches we got. Will most definitely be back.




**Node ID:** 412cb959-804e-415c-b175-ba86cd11f5c6<br>**Similarity:** 4.432224273681641<br>**Text:** I'm a fan of dark roast but it's usually not available. My advice brew different flavors of coffee and keep it hot.<br>

review_stars: 4
biz_name: Wawa
biz_address: 3604 Chestnut St
biz_city: Philadelphia
biz_state: PA
biz_categories: ['Restaurants', 'Automotive', 'Delis', 'Gas Stations', 'Food', 'Coffee & Tea', 'Sandwiches', 'Convenience Stores']

I'm a fan of dark roast but it's usually not available. My advice brew different flavors of coffee and keep it hot.




**Node ID:** 0e0911f0-02dd-46e1-8f3c-012af00cb649<br>**Similarity:** 4.331442832946777<br>**Text:** Eggs over medium where cold and NOT cooked right.<br>

review_stars: 1
biz_name: Caviar & Bananas
biz_address: 2031 Broadway
biz_city: Nashville
biz_state: TN
biz_categories: ['Coffee & Tea', 'Restaurants', 'Wine Bars', 'Bars', 'Nightlife', 'American (Traditional)', 'Event Planning & Services', 'Food', 'Caterers', 'Breakfast & Brunch', 'Cafes', 'Diners']

Eggs over medium where cold and NOT cooked right.




**Node ID:** b927a5c0-6676-4eb7-9642-58481c3052c1<br>**Similarity:** 4.330219268798828<br>**Text:** Great place to grab a bite. Food was great, service was awesome and beer was super cold! Miranda from hogwarts took great care of us. We where super excited to find out they had dart boards and pool tables at this location.<br>

review_stars: 5
biz_name: Twin Peaks
biz_address: 6880 E 82nd St
biz_city: Indianapolis
biz_state: IN
biz_categories: ['Sports Bars', 'American (New)', 'American (Traditional)', 'Nightlife', 'Bars', 'Restaurants']

Great place to grab a bite. Food was great, service was awesome and beer was super cold! Miranda from hogwarts took great care of us. We where super excited to find out they had dart boards and pool tables at this location.




**Node ID:** 1feb8c9e-01f4-4edb-80df-25865bdeac94<br>**Similarity:** 4.188203811645508<br>**Text:** I don't always like their beers, mainly because I'm not necessarily a fan of all the styles of beer that they brew here, but I do recognize that this place has the best execution of the things they brew. In other words, while I may not like everything they serve, they do it all well. 

Some of my favorites here include their coffee porter and their pumpkin ale (the best pumpkin offering in the city). I recently also had their pumpkin porter which was amazing.

I really like their space as well, but I find the tasting bar to be too small and getting served (or even bellying up to the bar) can sometimes be a challenge. It's also disappointing that you can't always get a pint of beer here.<br>

review_stars: 4
biz_name: Bier Brewery and Tap Room
biz_address: 5133 E 65th St
biz_city: Indianapolis
biz_state: IN
biz_categories: ['Food', 'Beer', 'Wine & Spirits', 'Breweries']

I don't always like their beers, mainly because I'm not necessarily a fan of all the styles of beer that they brew here, but I do recognize that this place has the best execution of the things they brew. In other words, while I may not like everything they serve, they do it all well. 

Some of my favorites here include their coffee porter and their pumpkin ale (the best pumpkin offering in the city). I recently also had their pumpkin porter which was amazing.

I really like their space as well, but I find the tasting bar to be too small and getting served (or even bellying up to the bar) can sometimes be a challenge. It's also disappointing that you can't always get a pint of beer here.




**Node ID:** 69b3293e-61f2-45d2-bc7a-bf86d0ebb573<br>**Similarity:** 4.049711227416992<br>**Text:** Don't be discouraged by the line since it moves quickly. We got the green eggs and ham, savory scone, cold brew, and mimosas! Their patio is delicious to sit at and enjoy the food!<br>

review_stars: 5
biz_name: Helena Avenue Bakery
biz_address: 131 Anacapa St, Ste C
biz_city: Santa Barbara
biz_state: CA
biz_categories: ['Food', 'Restaurants', 'Salad', 'Coffee & Tea', 'Breakfast & Brunch', 'Sandwiches', 'Bakeries']

Don't be discouraged by the line since it moves quickly. We got the green eggs and ham, savory scone, cold brew, and mimosas! Their patio is delicious to sit at and enjoy the food!




## Retrieval Evaluation

### Synthetic

In [34]:
with open(cfg.nodes_persist_fp, 'rb') as f:
    nodes = pickle.load(f)

In [35]:
from src.run.eval import RetrievalEvaluator

In [36]:
retrieval_evaluator = RetrievalEvaluator()
retrieval_evaluator.generate_synthetic_dataset(cfg, nodes)

[32m2024-08-09 08:41:58.517[0m | [1mINFO    [0m | [36msrc.run.eval.retrieval[0m:[36mgenerate_synthetic_dataset[0m:[36m79[0m - [1mLoading retrieval_eval_nodes from data/006_rerun with question respon/retrieval_synthetic_eval_dataset.json...[0m
[32m2024-08-09 08:41:58.518[0m | [1mINFO    [0m | [36msrc.run.eval.retrieval[0m:[36mgenerate_synthetic_dataset[0m:[36m85[0m - [1mLoading existing synthetic retrieval eval dataset at data/006_rerun with question respon/retrieval_synthetic_eval_dataset.json...[0m


In [37]:
retrieval_evaluator.retrieval_eval_dataset.queries

{'8153c3b6-b306-4f71-9e23-a87bc180b5b6': "What are the parking options available near LaFitte's in New Orleans?",
 '1f107c4a-786e-4ef1-8d2b-a8eb4728691e': 'Was the delivery of the items successful and undamaged?',
 '0caa7202-6545-4b74-8e31-096b6c2387bf': 'What grocery items are popular in Nashville?',
 '937c391c-e5d4-4107-aa13-16c37ce7564f': 'What are some negative reviews about a restaurant?',
 '7048705d-867f-444e-84c3-ffe4b13d3f5c': 'What ice cream flavors are popular during winter?',
 '64a80a8c-ec06-458b-abc7-d63d9946640c': 'Where can I find ice cream in waffle cones in Nashville?',
 '2bddb04c-6550-487a-8530-8a68107a1cea': 'What are the sushi options available at Tuna Bar?',
 '6eb26925-f9a2-4cd2-9da2-6d1a83538a5a': 'What are the best breakfast sandwiches in town?',
 'ab2ffb47-265f-4054-9a75-9695e492c207': 'What are the reasons customers have stopped using Chase credit cards?',
 'dbef72e3-a00b-48f7-a8b8-0ee79c954bcd': "What are the reviews for Tony's restaurant in Alton?",
 '2d073a91

In [38]:
retrieval_evaluator.retrieval_eval_dataset.relevant_docs

{'8153c3b6-b306-4f71-9e23-a87bc180b5b6': ['a0725bda-b426-4e7e-8757-ad47bb224c3d'],
 '1f107c4a-786e-4ef1-8d2b-a8eb4728691e': ['e2cbc17a-773c-49bd-9430-d66e5554e3e8'],
 '0caa7202-6545-4b74-8e31-096b6c2387bf': ['f8ed749d-5b97-4a1a-a61a-4188242ad928'],
 '937c391c-e5d4-4107-aa13-16c37ce7564f': ['730ff7c8-741e-4983-a359-5e7f93548501'],
 '7048705d-867f-444e-84c3-ffe4b13d3f5c': ['996c0ebb-3af2-444a-8eba-d4d2b17c2caa'],
 '64a80a8c-ec06-458b-abc7-d63d9946640c': ['a152030d-54ed-48e2-b6ef-52d2bb6ef8c2'],
 '2bddb04c-6550-487a-8530-8a68107a1cea': ['5fb29114-756d-445c-9ab5-8ddffedd2018'],
 '6eb26925-f9a2-4cd2-9da2-6d1a83538a5a': ['afc3a1b4-2892-43f8-b6ae-b857636f1486'],
 'ab2ffb47-265f-4054-9a75-9695e492c207': ['567ae8af-966b-482b-9189-73935a365e4d'],
 'dbef72e3-a00b-48f7-a8b8-0ee79c954bcd': ['44c12937-f4c9-4bcb-a223-9436e2740669'],
 '2d073a91-1173-421d-9bc3-605b1e825e7f': ['34608595-a7fa-4c23-9588-991d39f5cd68'],
 '0c4185e8-2e78-4ae1-9d24-106d52163e0a': ['87e53bdd-71aa-426c-87a6-8c31f3f9134a'],
 '40

In [39]:
retrieval_eval_results_df, retrieval_eval_results_full_df = await retrieval_evaluator.aevaluate(cfg, retriever)

In [40]:
# TODO: Why all zeros?
retrieval_eval_results_df

Unnamed: 0,retrievers,hit_rate,mrr,precision,recall,ap,ndcg
0,top_10_retrieval_eval,0.5,0.321726,0.05,0.5,0.321726,0.079969


In [41]:
retrieval_eval_results_full_df

Unnamed: 0,query,expected_ids,retrieved_texts,hit_rate,mrr,precision,recall,ap,ndcg
0,What are the parking options available near La...,[a0725bda-b426-4e7e-8757-ad47bb224c3d],"[LaFitte's is a new venture of A J Tusa, owner...",1.0,1.0,0.1,1.0,1.0,0.220092
1,Was the delivery of the items successful and u...,[e2cbc17a-773c-49bd-9430-d66e5554e3e8],[Delivery: Order was 45 minutes late and there...,0.0,0.0,0.0,0.0,0.0,0.0
2,What grocery items are popular in Nashville?,[f8ed749d-5b97-4a1a-a61a-4188242ad928],[Note: the food section of this review is for...,1.0,0.125,0.1,1.0,0.125,0.069431
3,What are some negative reviews about a restaur...,[730ff7c8-741e-4983-a359-5e7f93548501],[I'm really surprised by the negative reviews ...,0.0,0.0,0.0,0.0,0.0,0.0
4,What ice cream flavors are popular during winter?,[996c0ebb-3af2-444a-8eba-d4d2b17c2caa],[I felt that I didnt have enough time to sampl...,0.0,0.0,0.0,0.0,0.0,0.0
5,Where can I find ice cream in waffle cones in ...,[a152030d-54ed-48e2-b6ef-52d2bb6ef8c2],[Delicious ice cream in waffle cones. Refreshi...,1.0,1.0,0.1,1.0,1.0,0.220092
6,What are the sushi options available at Tuna Bar?,[5fb29114-756d-445c-9ab5-8ddffedd2018],[4.0 - 4.5 star Review. I compare all sushi re...,0.0,0.0,0.0,0.0,0.0,0.0
7,What are the best breakfast sandwiches in town?,[afc3a1b4-2892-43f8-b6ae-b857636f1486],[Possibly the best breakfast sandwich EVER. O...,0.0,0.0,0.0,0.0,0.0,0.0
8,What are the reasons customers have stopped us...,[567ae8af-966b-482b-9189-73935a365e4d],[I have only had one credit card previous to t...,0.0,0.0,0.0,0.0,0.0,0.0
9,What are the reviews for Tony's restaurant in ...,[44c12937-f4c9-4bcb-a223-9436e2740669],[Tony's is a fine restaurant in the Alton area...,0.0,0.0,0.0,0.0,0.0,0.0


#### Error Analysis

In [42]:
retrieval_eval_irrelevance_df = (
    retrieval_eval_results_full_df
    .loc[lambda df: df['hit_rate'].lt(1)]
    .sort_values(['hit_rate', 'mrr', 'precision', 'recall', 'ap', 'ndcg'])
)
retrieval_eval_irrelevance_df

Unnamed: 0,query,expected_ids,retrieved_texts,hit_rate,mrr,precision,recall,ap,ndcg
1,Was the delivery of the items successful and u...,[e2cbc17a-773c-49bd-9430-d66e5554e3e8],[Delivery: Order was 45 minutes late and there...,0.0,0.0,0.0,0.0,0.0,0.0
3,What are some negative reviews about a restaur...,[730ff7c8-741e-4983-a359-5e7f93548501],[I'm really surprised by the negative reviews ...,0.0,0.0,0.0,0.0,0.0,0.0
4,What ice cream flavors are popular during winter?,[996c0ebb-3af2-444a-8eba-d4d2b17c2caa],[I felt that I didnt have enough time to sampl...,0.0,0.0,0.0,0.0,0.0,0.0
6,What are the sushi options available at Tuna Bar?,[5fb29114-756d-445c-9ab5-8ddffedd2018],[4.0 - 4.5 star Review. I compare all sushi re...,0.0,0.0,0.0,0.0,0.0,0.0
7,What are the best breakfast sandwiches in town?,[afc3a1b4-2892-43f8-b6ae-b857636f1486],[Possibly the best breakfast sandwich EVER. O...,0.0,0.0,0.0,0.0,0.0,0.0
8,What are the reasons customers have stopped us...,[567ae8af-966b-482b-9189-73935a365e4d],[I have only had one credit card previous to t...,0.0,0.0,0.0,0.0,0.0,0.0
9,What are the reviews for Tony's restaurant in ...,[44c12937-f4c9-4bcb-a223-9436e2740669],[Tony's is a fine restaurant in the Alton area...,0.0,0.0,0.0,0.0,0.0,0.0
14,What are the best gift ideas for men?,[8d977ad0-5575-4640-9b16-92797aa0c430],[I came in on March 11th because I needed a gi...,0.0,0.0,0.0,0.0,0.0,0.0
16,What are the best cafes with cats and coffee?,[809102e9-9e7f-4321-b767-b63f5fbb2b9a],[Cat cafes! It's all the rage. I have no pets ...,0.0,0.0,0.0,0.0,0.0,0.0
17,What activities can I do with my son that he w...,[0731c00d-5bae-428d-94ad-91a0755d62da],[He latter represented my son very well and I'...,0.0,0.0,0.0,0.0,0.0,0.0


In [43]:
for i, row in retrieval_eval_irrelevance_df.reset_index(drop=True).iterrows():
    print(f"\n\n============Error #{i+1}=============\n\n")
    print(f"Query:\n{row.query}\n")
    expected_contexts = [json.loads(record.payload['_node_content'])['text'] for record in qdrantdb.retrieve(cfg.db_collection, ids=row.expected_ids)]
    expected_contexts = '\n\n'.join(expected_contexts)
    print(f"Expected Contexts:\n{expected_contexts}\n")
    contexts = ""
    for i, context in enumerate(row.retrieved_texts):
        contexts = f"{contexts}\n\nContext #{i+1}: {context}"
    print(f"Retrieved Contexts:\n{contexts}\n")





Query:
Was the delivery of the items successful and undamaged?

Expected Contexts:
Received without damage. I was relieved n happy I had them handle.

Retrieved Contexts:


Context #1: Delivery: Order was 45 minutes late and there was an item missing. 

Context #2: The staff was every friendly and attentive. The wedding and reception was beautiful and a huge success. Everyone had a great time.

Context #3: I'm so happy it's been revived. Prime spot across from Vandy in Midtown!  Love it!  Way to go and will pray for continued success!!

Context #4: BIG thank you to China Dragon that catered lunch for our teachers and staff! It made our teacher appreciation lunch a huge success. 

Context #5: The little deli belongs in little Italy. Wonderful subs, pastas, desserts and deli items. TOP NOTCH subs.  Always busy so don't be a douche and give a 1 star review because your delivery took too long. Call ahead for pick up.


Context #6: Good delivery
.

Context #7: The gumbo was delicious an

# Response

In [44]:
from llama_index.core import get_response_synthesizer
from llama_index.core.postprocessor import SimilarityPostprocessor
from llama_index.postprocessor.flag_embedding_reranker import FlagEmbeddingReranker

from src.features.append_reference.custom_query_engine import ManualAppendReferenceQueryEngine

In [45]:
node_postprocessors = []

if cfg.retrieval_cfg.retrieval_similarity_cutoff is not None:
    node_postprocessors.append(SimilarityPostprocessor(similarity_cutoff=cfg.retrieval_cfg.retrieval_similarity_cutoff))

reranker = FlagEmbeddingReranker(model=cfg.retrieval_cfg.rerank_model_name, top_n=cfg.retrieval_cfg.rerank_top_k)
node_postprocessors.append(reranker)

response_synthesizer = get_response_synthesizer()
query_engine = ManualAppendReferenceQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
    node_postprocessors=node_postprocessors,
)

## Test Query Engine

In [46]:
from llama_index.core.response.notebook_utils import (
    display_source_node,
    display_response,
)

In [47]:
question = "where to find good cold brew coffee?"
response = query_engine.query(question)
display_response(response, show_source=True, show_metadata=True, show_source_metadata=True)

**`Final Response:`** You can find good cold brew coffee at Helena Avenue Bakery, specifically in Santa Barbara, California.


Sources:
- [None](None)





#### Referenced Paragraphs
Article: **None**

> ...Pricey ( a ham and cheese croissant was $5.50)  as with anywhere in Santa Barbra but very delicious. Cute space ( a bit hard to find ). The green eggs and ham  breakfast biscuit is to die for.  ($10 I think) The Santa Barbra tri tip sandwich was excellent! Cold brew coffee was good. ...

---

**`Source Node 1/5`**

**Node ID:** c219b275-7042-42e0-9741-0e4e0e537912<br>**Similarity:** 0.7123664021492004<br>**Text:** Pricey ( a ham and cheese croissant was $5.50)  as with anywhere in Santa Barbra but very delicio...<br>**Metadata:** {'review_id': 'Q3fPo_x6xKxafAzy1hFITg', 'user_id': 'ha2Lv7WnxvdYnRsqwL1apQ', 'business_id': 'IDtLPgUrqorrpqSLdfMhZQ', 'review_stars': 4, 'useful': 0, 'funny': 0, 'cool': 0, 'date': '2018-07-17T00:20:41', 'biz_name': 'Helena Avenue Bakery', 'biz_address': '131 Anacapa St, Ste C', 'biz_city': 'Santa Barbara', 'biz_state': 'CA', 'biz_postal_code': '93101', 'biz_latitude': 34.4144445, 'biz_longitude': -119.6906718, 'biz_stars': 4.0, 'biz_review_count': 389, 'biz_is_open': 1, 'biz_attributes': {'AcceptsInsurance': None, 'Alcohol': "u'none'", 'Ambience': "{'touristy': False, 'hipster': True, 'romantic': False, 'divey': False, 'intimate': False, 'trendy': True, 'upscale': False, 'classy': False, 'casual': True}", 'BYOB': None, 'BestNights': None, 'BikeParking': 'True', 'BusinessAcceptsBitcoin': None, 'BusinessAcceptsCreditCards': 'True', 'BusinessParking': "{'garage': False, 'street': True, 'validated': False, 'lot': False, 'valet': False}", 'ByAppointmentOnly': None, 'Caters': 'True', 'CoatCheck': None, 'Corkage': None, 'DogsAllowed': 'True', 'DriveThru': None, 'GoodForDancing': None, 'GoodForKids': 'True', 'GoodForMeal': "{'dessert': False, 'latenight': False, 'lunch': True, 'dinner': False, 'brunch': True, 'breakfast': True}", 'HappyHour': 'False', 'HasTV': 'False', 'Music': None, 'NoiseLevel': "u'average'", 'OutdoorSeating': 'True', 'RestaurantsAttire': "'casual'", 'RestaurantsDelivery': 'None', 'RestaurantsGoodForGroups': 'True', 'RestaurantsPriceRange2': '2', 'RestaurantsReservations': 'False', 'RestaurantsTableService': 'False', 'RestaurantsTakeOut': 'True', 'Smoking': None, 'WheelchairAccessible': 'True', 'WiFi': "u'no'"}, 'biz_categories': ['Food', 'Restaurants', 'Salad', 'Coffee & Tea', 'Breakfast & Brunch', 'Sandwiches', 'Bakeries'], 'biz_hours': {'Friday': '8:0-14:0', 'Monday': '0:0-0:0', 'Saturday': '8:0-14:0', 'Sunday': '8:0-14:0', 'Thursday': '8:0-14:0', 'Tuesday': '8:0-14:0', 'Wednesday': '8:0-14:0'}}<br>

---

**`Source Node 2/5`**

**Node ID:** 3eeaa309-8f13-406c-b1d7-ac1d15413b62<br>**Similarity:** -1.094619631767273<br>**Text:** Service at the register was friendly. Cold brew was sooo good as were the sandwiches we got. Will...<br>**Metadata:** {'review_id': '-6iyNp8Vs9kF0IRGXZvsHA', 'user_id': 'oq6j9F5Oy51JyogMYh1P4g', 'business_id': 'IDtLPgUrqorrpqSLdfMhZQ', 'review_stars': 5, 'useful': 0, 'funny': 0, 'cool': 0, 'date': '2020-09-06T08:11:20', 'biz_name': 'Helena Avenue Bakery', 'biz_address': '131 Anacapa St, Ste C', 'biz_city': 'Santa Barbara', 'biz_state': 'CA', 'biz_postal_code': '93101', 'biz_latitude': 34.4144445, 'biz_longitude': -119.6906718, 'biz_stars': 4.0, 'biz_review_count': 389, 'biz_is_open': 1, 'biz_attributes': {'AcceptsInsurance': None, 'Alcohol': "u'none'", 'Ambience': "{'touristy': False, 'hipster': True, 'romantic': False, 'divey': False, 'intimate': False, 'trendy': True, 'upscale': False, 'classy': False, 'casual': True}", 'BYOB': None, 'BestNights': None, 'BikeParking': 'True', 'BusinessAcceptsBitcoin': None, 'BusinessAcceptsCreditCards': 'True', 'BusinessParking': "{'garage': False, 'street': True, 'validated': False, 'lot': False, 'valet': False}", 'ByAppointmentOnly': None, 'Caters': 'True', 'CoatCheck': None, 'Corkage': None, 'DogsAllowed': 'True', 'DriveThru': None, 'GoodForDancing': None, 'GoodForKids': 'True', 'GoodForMeal': "{'dessert': False, 'latenight': False, 'lunch': True, 'dinner': False, 'brunch': True, 'breakfast': True}", 'HappyHour': 'False', 'HasTV': 'False', 'Music': None, 'NoiseLevel': "u'average'", 'OutdoorSeating': 'True', 'RestaurantsAttire': "'casual'", 'RestaurantsDelivery': 'None', 'RestaurantsGoodForGroups': 'True', 'RestaurantsPriceRange2': '2', 'RestaurantsReservations': 'False', 'RestaurantsTableService': 'False', 'RestaurantsTakeOut': 'True', 'Smoking': None, 'WheelchairAccessible': 'True', 'WiFi': "u'no'"}, 'biz_categories': ['Food', 'Restaurants', 'Salad', 'Coffee & Tea', 'Breakfast & Brunch', 'Sandwiches', 'Bakeries'], 'biz_hours': {'Friday': '8:0-14:0', 'Monday': '0:0-0:0', 'Saturday': '8:0-14:0', 'Sunday': '8:0-14:0', 'Thursday': '8:0-14:0', 'Tuesday': '8:0-14:0', 'Wednesday': '8:0-14:0'}}<br>

---

**`Source Node 3/5`**

**Node ID:** f4c371ab-f4d1-4365-8c83-db3dfdd6c74c<br>**Similarity:** -1.8642116785049438<br>**Text:** Barista was fast at least. Good latte and cold brew. I should have just ordered a pastry though.<br>**Metadata:** {'review_id': 'LuLzCVNRkAPjVaRVA__V2A', 'user_id': '7gDOm5IJarR2QtOlbY695A', 'business_id': 'IDtLPgUrqorrpqSLdfMhZQ', 'review_stars': 3, 'useful': 2, 'funny': 0, 'cool': 0, 'date': '2018-03-04T17:37:14', 'biz_name': 'Helena Avenue Bakery', 'biz_address': '131 Anacapa St, Ste C', 'biz_city': 'Santa Barbara', 'biz_state': 'CA', 'biz_postal_code': '93101', 'biz_latitude': 34.4144445, 'biz_longitude': -119.6906718, 'biz_stars': 4.0, 'biz_review_count': 389, 'biz_is_open': 1, 'biz_attributes': {'AcceptsInsurance': None, 'Alcohol': "u'none'", 'Ambience': "{'touristy': False, 'hipster': True, 'romantic': False, 'divey': False, 'intimate': False, 'trendy': True, 'upscale': False, 'classy': False, 'casual': True}", 'BYOB': None, 'BestNights': None, 'BikeParking': 'True', 'BusinessAcceptsBitcoin': None, 'BusinessAcceptsCreditCards': 'True', 'BusinessParking': "{'garage': False, 'street': True, 'validated': False, 'lot': False, 'valet': False}", 'ByAppointmentOnly': None, 'Caters': 'True', 'CoatCheck': None, 'Corkage': None, 'DogsAllowed': 'True', 'DriveThru': None, 'GoodForDancing': None, 'GoodForKids': 'True', 'GoodForMeal': "{'dessert': False, 'latenight': False, 'lunch': True, 'dinner': False, 'brunch': True, 'breakfast': True}", 'HappyHour': 'False', 'HasTV': 'False', 'Music': None, 'NoiseLevel': "u'average'", 'OutdoorSeating': 'True', 'RestaurantsAttire': "'casual'", 'RestaurantsDelivery': 'None', 'RestaurantsGoodForGroups': 'True', 'RestaurantsPriceRange2': '2', 'RestaurantsReservations': 'False', 'RestaurantsTableService': 'False', 'RestaurantsTakeOut': 'True', 'Smoking': None, 'WheelchairAccessible': 'True', 'WiFi': "u'no'"}, 'biz_categories': ['Food', 'Restaurants', 'Salad', 'Coffee & Tea', 'Breakfast & Brunch', 'Sandwiches', 'Bakeries'], 'biz_hours': {'Friday': '8:0-14:0', 'Monday': '0:0-0:0', 'Saturday': '8:0-14:0', 'Sunday': '8:0-14:0', 'Thursday': '8:0-14:0', 'Tuesday': '8:0-14:0', 'Wednesday': '8:0-14:0'}}<br>

---

**`Source Node 4/5`**

**Node ID:** 5c81703a-7c5a-416b-8b4e-a85adfc7e7d4<br>**Similarity:** -2.0586793422698975<br>**Text:** If it's cold, come grab a coffee instead. They offer all the great styles and flavors of coffee a...<br>**Metadata:** {'review_id': 'GHAY9_Kpk-1hcq4Ehfmu1w', 'user_id': 'HpXm_E_MRQdN_Rv4cARviA', 'business_id': 'oaboaRBUgGjbo2kfUIKDLQ', 'review_stars': 5, 'useful': 1, 'funny': 0, 'cool': 1, 'date': '2016-02-07T16:02:23', 'biz_name': "Mike's Ice Cream", 'biz_address': '129 2nd Ave N', 'biz_city': 'Nashville', 'biz_state': 'TN', 'biz_postal_code': '37201', 'biz_latitude': 36.1626492, 'biz_longitude': -86.7759733, 'biz_stars': 4.5, 'biz_review_count': 593, 'biz_is_open': 1, 'biz_attributes': {'AcceptsInsurance': None, 'Alcohol': "u'none'", 'Ambience': "{'romantic': False, 'intimate': False, 'touristy': False, 'hipster': False, 'divey': False, 'classy': False, 'trendy': False, 'upscale': False, 'casual': True}", 'BYOB': None, 'BestNights': None, 'BikeParking': 'False', 'BusinessAcceptsBitcoin': None, 'BusinessAcceptsCreditCards': 'True', 'BusinessParking': "{'garage': False, 'street': True, 'validated': False, 'lot': False, 'valet': False}", 'ByAppointmentOnly': 'False', 'Caters': 'False', 'CoatCheck': None, 'Corkage': None, 'DogsAllowed': 'False', 'DriveThru': 'False', 'GoodForDancing': None, 'GoodForKids': 'True', 'GoodForMeal': None, 'HappyHour': None, 'HasTV': 'False', 'Music': None, 'NoiseLevel': "u'average'", 'OutdoorSeating': 'None', 'RestaurantsAttire': "'casual'", 'RestaurantsDelivery': 'None', 'RestaurantsGoodForGroups': 'True', 'RestaurantsPriceRange2': '2', 'RestaurantsReservations': 'False', 'RestaurantsTableService': None, 'RestaurantsTakeOut': 'True', 'Smoking': None, 'WheelchairAccessible': None, 'WiFi': "'free'"}, 'biz_categories': ['Ice Cream & Frozen Yogurt', 'Coffee & Tea', 'Restaurants', 'Sandwiches', 'Food'], 'biz_hours': {'Friday': '8:0-0:0', 'Monday': '8:0-23:0', 'Saturday': '8:0-0:30', 'Sunday': '8:0-23:0', 'Thursday': '8:0-23:0', 'Tuesday': '8:0-23:0', 'Wednesday': '8:0-23:0'}}<br>

---

**`Source Node 5/5`**

**Node ID:** 9594faec-4e66-4b78-9c35-42c8b37a33cb<br>**Similarity:** -2.2148277759552<br>**Text:** This review is for the delicious breakfast and coffee. The Green Breakfast Bowl and the Biscuit s...<br>**Metadata:** {'review_id': '4sGHeBRdn36yOX3KszhzCA', 'user_id': 'iGhGzpTg0c3J_tuTi9TOzQ', 'business_id': 'IDtLPgUrqorrpqSLdfMhZQ', 'review_stars': 5, 'useful': 1, 'funny': 1, 'cool': 1, 'date': '2018-07-28T19:12:02', 'biz_name': 'Helena Avenue Bakery', 'biz_address': '131 Anacapa St, Ste C', 'biz_city': 'Santa Barbara', 'biz_state': 'CA', 'biz_postal_code': '93101', 'biz_latitude': 34.4144445, 'biz_longitude': -119.6906718, 'biz_stars': 4.0, 'biz_review_count': 389, 'biz_is_open': 1, 'biz_attributes': {'AcceptsInsurance': None, 'Alcohol': "u'none'", 'Ambience': "{'touristy': False, 'hipster': True, 'romantic': False, 'divey': False, 'intimate': False, 'trendy': True, 'upscale': False, 'classy': False, 'casual': True}", 'BYOB': None, 'BestNights': None, 'BikeParking': 'True', 'BusinessAcceptsBitcoin': None, 'BusinessAcceptsCreditCards': 'True', 'BusinessParking': "{'garage': False, 'street': True, 'validated': False, 'lot': False, 'valet': False}", 'ByAppointmentOnly': None, 'Caters': 'True', 'CoatCheck': None, 'Corkage': None, 'DogsAllowed': 'True', 'DriveThru': None, 'GoodForDancing': None, 'GoodForKids': 'True', 'GoodForMeal': "{'dessert': False, 'latenight': False, 'lunch': True, 'dinner': False, 'brunch': True, 'breakfast': True}", 'HappyHour': 'False', 'HasTV': 'False', 'Music': None, 'NoiseLevel': "u'average'", 'OutdoorSeating': 'True', 'RestaurantsAttire': "'casual'", 'RestaurantsDelivery': 'None', 'RestaurantsGoodForGroups': 'True', 'RestaurantsPriceRange2': '2', 'RestaurantsReservations': 'False', 'RestaurantsTableService': 'False', 'RestaurantsTakeOut': 'True', 'Smoking': None, 'WheelchairAccessible': 'True', 'WiFi': "u'no'"}, 'biz_categories': ['Food', 'Restaurants', 'Salad', 'Coffee & Tea', 'Breakfast & Brunch', 'Sandwiches', 'Bakeries'], 'biz_hours': {'Friday': '8:0-14:0', 'Monday': '0:0-0:0', 'Saturday': '8:0-14:0', 'Sunday': '8:0-14:0', 'Thursday': '8:0-14:0', 'Tuesday': '8:0-14:0', 'Wednesday': '8:0-14:0'}}<br>

{'c219b275-7042-42e0-9741-0e4e0e537912': {'review_id': 'Q3fPo_x6xKxafAzy1hFITg',
  'user_id': 'ha2Lv7WnxvdYnRsqwL1apQ',
  'business_id': 'IDtLPgUrqorrpqSLdfMhZQ',
  'review_stars': 4,
  'useful': 0,
  'funny': 0,
  'cool': 0,
  'date': '2018-07-17T00:20:41',
  'biz_name': 'Helena Avenue Bakery',
  'biz_address': '131 Anacapa St, Ste C',
  'biz_city': 'Santa Barbara',
  'biz_state': 'CA',
  'biz_postal_code': '93101',
  'biz_latitude': 34.4144445,
  'biz_longitude': -119.6906718,
  'biz_stars': 4.0,
  'biz_review_count': 389,
  'biz_is_open': 1,
  'biz_attributes': {'AcceptsInsurance': None,
   'Alcohol': "u'none'",
   'Ambience': "{'touristy': False, 'hipster': True, 'romantic': False, 'divey': False, 'intimate': False, 'trendy': True, 'upscale': False, 'classy': False, 'casual': True}",
   'BYOB': None,
   'BestNights': None,
   'BikeParking': 'True',
   'BusinessAcceptsBitcoin': None,
   'BusinessAcceptsCreditCards': 'True',
   'BusinessParking': "{'garage': False, 'street': True, 'v

## Response Evaluation

In [48]:
from src.run.eval import ResponseEvaluator

In [49]:
response_evaluator = ResponseEvaluator()

### Synthetic

In [50]:
response_eval_documents, response_synthetic_eval_dataset = response_evaluator.generate_synthetic_dataset(cfg, documents)

[32m2024-08-09 08:43:22.919[0m | [1mINFO    [0m | [36msrc.run.eval.response[0m:[36mgenerate_synthetic_dataset[0m:[36m39[0m - [1mSampling 20 documents for response evaluation...[0m
[32m2024-08-09 08:43:22.936[0m | [1mINFO    [0m | [36msrc.run.eval.response[0m:[36mgenerate_synthetic_dataset[0m:[36m95[0m - [1mLoading existing synthetic response eval dataset at data/006_rerun with question respon/response_synthetic_eval_dataset.json...[0m


In [51]:
response_synthetic_eval_prediction_dataset = await response_synthetic_eval_dataset.amake_predictions_with(
    predictor=query_engine, batch_size=cfg.batch_size, show_progress=True
)

Batch processing of predictions: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:08<00:00,  1.23it/s]


In [52]:
response_synthetic_mean_scores_df, response_synthetic_deep_eval_df = response_evaluator.evaluate_labelled_rag_dataset(
    response_synthetic_eval_dataset,
    response_synthetic_eval_prediction_dataset,
    dataset_name="synthetic",
    judge_model=cfg.eval_cfg.response_eval_llm_model,
    cache_dp=cfg.notebook_cache_dp
)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:37<00:00,  3.75s/it]


In [53]:
response_synthetic_mean_scores_df

rag,base_rag
metrics,Unnamed: 1_level_1
mean_correctness_score,3.0
mean_relevancy_score,0.8
mean_faithfulness_score,0.8


In [54]:
response_synthetic_deep_eval_df

Unnamed: 0,query,answer,relevancy_score,correctness_score,faithfulness_score,contexts
0,What are the best Japanese restaurants in Nash...,\n\nIt seems that The Green Pheasant is a stan...,1.0,4.0,1.0,"[Best new restaurant in Nashville, hands down...."
1,What are the customer service experiences at R...,\n\nIt appears that the customer service exper...,1.0,3.0,1.0,[I was so disappointed by this Macaroni Grill!...
2,What are the best wine bars in New Orleans?,\nIt seems that Copper Vine is a highly-recomm...,1.0,4.0,0.0,[Wow!!! One of my top three new favorite place...
3,What are the best dishes to try at Copper Vine...,"\n\nBased on the rave reviews, it seems that t...",1.0,3.0,1.0,"[The food, the atmosphere, and the service was..."
4,What unique dessert options are available at M...,\n\nIt seems that Mike's Ice Cream in Nashvill...,0.0,3.0,0.0,[If you're looking for creamy ice cream to com...
5,What specialty drinks are available at ROAST C...,\n\nThe menu at ROAST Coffeehouse and Wine Bar...,1.0,3.0,1.0,[Hope everybody had a nice Valentine's Day yes...
6,What are the best places to eat near a sports ...,\n\nIf you're looking for a great dining exper...,1.0,2.0,1.0,[This is the best sports bar and restaurant yo...
7,What are the happy hour deals at Bar One in Ph...,"\n\nDecent discounts on drinks, and a fairly v...",1.0,2.0,1.0,[Vaguely remember coming here when the bar wen...
8,What are the best sports bars in Indianapolis?,\n\nIt seems that Twin Peaks has multiple loca...,0.0,2.0,1.0,[One of the best sports bars we've been too an...
9,What are the best nightlife spots in New Orleans?,\nNew Orleans is a vibrant city with a rich ni...,1.0,4.0,1.0,[New and cool spot in downtown New Orleans!\nG...


#### Error Analysis

In [55]:
response_synthetic_eval_dataset_dict = dict()
for example in response_synthetic_eval_dataset.examples:
    response_synthetic_eval_dataset_dict[example.query] = {
        "reference_answer": example.reference_answer,
        "reference_contexts": example.reference_contexts,
    }

In [56]:
response_synthetic_deep_eval_df.sort_values(['relevancy_score', 'correctness_score', 'faithfulness_score'])

Unnamed: 0,query,answer,relevancy_score,correctness_score,faithfulness_score,contexts
8,What are the best sports bars in Indianapolis?,\n\nIt seems that Twin Peaks has multiple loca...,0.0,2.0,1.0,[One of the best sports bars we've been too an...
4,What unique dessert options are available at M...,\n\nIt seems that Mike's Ice Cream in Nashvill...,0.0,3.0,0.0,[If you're looking for creamy ice cream to com...
6,What are the best places to eat near a sports ...,\n\nIf you're looking for a great dining exper...,1.0,2.0,1.0,[This is the best sports bar and restaurant yo...
7,What are the happy hour deals at Bar One in Ph...,"\n\nDecent discounts on drinks, and a fairly v...",1.0,2.0,1.0,[Vaguely remember coming here when the bar wen...
1,What are the customer service experiences at R...,\n\nIt appears that the customer service exper...,1.0,3.0,1.0,[I was so disappointed by this Macaroni Grill!...
3,What are the best dishes to try at Copper Vine...,"\n\nBased on the rave reviews, it seems that t...",1.0,3.0,1.0,"[The food, the atmosphere, and the service was..."
5,What specialty drinks are available at ROAST C...,\n\nThe menu at ROAST Coffeehouse and Wine Bar...,1.0,3.0,1.0,[Hope everybody had a nice Valentine's Day yes...
2,What are the best wine bars in New Orleans?,\nIt seems that Copper Vine is a highly-recomm...,1.0,4.0,0.0,[Wow!!! One of my top three new favorite place...
0,What are the best Japanese restaurants in Nash...,\n\nIt seems that The Green Pheasant is a stan...,1.0,4.0,1.0,"[Best new restaurant in Nashville, hands down...."
9,What are the best nightlife spots in New Orleans?,\nNew Orleans is a vibrant city with a rich ni...,1.0,4.0,1.0,[New and cool spot in downtown New Orleans!\nG...


In [57]:
response_synthetic_eval_irrelevance_df = (
    response_synthetic_deep_eval_df
    .loc[lambda df: df['relevancy_score'].lt(1)]
    .sort_values(['relevancy_score', 'correctness_score', 'faithfulness_score'])
)

for i, row in response_synthetic_eval_irrelevance_df.reset_index(drop=True).iterrows():
    print(f"\n\n==============Error #{i+1}===============\n\n")
    print(f"Query:\n{row.query}\n")
    contexts = '\n\n'.join(row.contexts)
    print(f"Context:\n{contexts}\n")
    print(f"Answer:\n{row.answer}\n----\n")
    expected = response_synthetic_eval_dataset_dict.get(row.query)
    if not expected:
        logger.error(f"Could not find query {row.query} in synthetic_response_eval_dataset_dict!")
        continue
    expected_answer = expected['reference_answer']
    print(f"Expected Answer:\n{expected_answer}\n")
    expected_contexts = expected['reference_contexts']
    print(f"Expected Contexts:\n{expected_contexts}\n")





Query:
What are the best sports bars in Indianapolis?

Context:
One of the best sports bars we've been too and always go to one if they have one in the town we are in

I'm a big sports bar guy and a sports bar food guy and don't expect a sports bar to be 5 star dining. Let me answer a question that people ask me when I said I ate there. "Is it okay to bring your wife and kids there to watch a game?" The servers do wear low cut shirts and shorts, but nothing that was any different than other themed sports bars. When I was there, there were groups of women together and also couples there. 

Typical bar food but good layout for watching sports.

Another "sports bar" that can't figure out what it is: a sports bar (it has gobs of TVS with sundry sports playing);  a horrible "music" bar (indeed horrible "music" was playing so loudly that you couldn't focus on the cold beer, sports, or girls); or a bootie bar (several of the waitresses were more interested in working the Customers, than w

### Manually Curated
Ref: https://docs.llamaindex.ai/en/stable/examples/llama_dataset/ragdataset_submission_template/#1c-creating-a-labelledragdataset-from-scratch-with-manually-constructed-examples

In [58]:
response_curated_eval_dataset = response_evaluator.generate_curated_dataset(cfg)

[32m2024-08-09 08:48:29.347[0m | [1mINFO    [0m | [36msrc.run.eval.response[0m:[36mgenerate_curated_dataset[0m:[36m124[0m - [1mPersisting curated response eval dataset at data/007_bm25_standalone/response_curated_eval_dataset.json...[0m


In [59]:
response_curated_eval_prediction_dataset = await response_curated_eval_dataset.amake_predictions_with(
    predictor=query_engine, batch_size=cfg.batch_size, show_progress=True
)

Batch processing of predictions: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.30s/it]


In [60]:
response_curated_mean_scores_df, response_curated_deep_eval_df = response_evaluator.evaluate_labelled_rag_dataset(
    response_curated_eval_dataset,
    response_curated_eval_prediction_dataset,
    dataset_name="curated",
    judge_model=cfg.eval_cfg.response_eval_llm_model,
    cache_dp=cfg.notebook_cache_dp
)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:29<00:00, 29.09s/it]


In [61]:
response_curated_mean_scores_df

rag,base_rag
metrics,Unnamed: 1_level_1
mean_correctness_score,2.0
mean_relevancy_score,1.0
mean_faithfulness_score,1.0


In [62]:
response_curated_deep_eval_df

Unnamed: 0,query,answer,relevancy_score,correctness_score,faithfulness_score,contexts
0,What are the recommended restaurants nearby Sa...,"\nThe Funk Zone area, where this bakery is loc...",1.0,2.0,1.0,[Finally some good food in downtown Santa Barb...


#### Answers for target questions

In [63]:
from IPython.display import Markdown

In [64]:
from src.run.eval.manual_eval_dataset import MANUAL_EVAL_QA

In [65]:
for question, expected_answer in MANUAL_EVAL_QA:
    display(Markdown(f"\n\n### Question: {question}\n"))
    response = query_engine.query(question)
    display_response(response)



### Question: What are the recommended restaurants nearby Santa Barbara?


**`Final Response:`** The Funk Zone area, where this bakery is located, is a trendy spot with a few other restaurants and bars. It's also close to the wharf, which suggests there are likely other dining options in the area.


Sources:
- [None](None)

### Error Analysis

In [66]:
response_curated_eval_dataset_dict = dict()
for example in response_curated_eval_dataset.examples:
    response_curated_eval_dataset_dict[example.query] = {
        "reference_answer": example.reference_answer,
        "reference_contexts": example.reference_contexts,
    }

In [67]:
response_curated_deep_eval_df.sort_values(['relevancy_score', 'correctness_score', 'faithfulness_score'])

Unnamed: 0,query,answer,relevancy_score,correctness_score,faithfulness_score,contexts
0,What are the recommended restaurants nearby Sa...,"\nThe Funk Zone area, where this bakery is loc...",1.0,2.0,1.0,[Finally some good food in downtown Santa Barb...


In [68]:
response_curated_eval_irrelevance_df = (
    response_curated_deep_eval_df
    .loc[lambda df: df['relevancy_score'].lt(1)]
    .sort_values(['relevancy_score', 'correctness_score', 'faithfulness_score'])
)

for i, row in response_curated_eval_irrelevance_df.reset_index(drop=True).iterrows():
    print(f"\n\n==============Error #{i+1}===============\n\n")
    print(f"Query:\n{row.query}\n")
    contexts = '\n\n'.join(row.contexts)
    print(f"Context:\n{contexts}\n")
    print(f"Answer:\n{row.answer}\n----\n")
    expected = response_curated_eval_dataset_dict.get(row.query)
    if not expected:
        logger.error(f"Could not find query {row.query} in synthetic_response_eval_dataset_dict!")
        continue
    expected_answer = expected['reference_answer']
    print(f"Expected Answer:\n{expected_answer}\n")
    expected_contexts = expected['reference_contexts']
    print(f"Expected Contexts:\n{expected_contexts}\n")

# Persist run metadata

In [69]:
from src.run.utils import parse_collect_log, flatten_dict

In [70]:
collect_info = parse_collect_log(collect_fp)

In [71]:
if ARGS.LOG_TO_MLFLOW:
    import mlflow

    logger.info("Logging [COLLECT] info to MLflow...")
    mlflow.log_params(collect_info)
    logger.info("Logging config to MLflow...")
    mlflow.log_params(flatten_dict(cfg.model_dump(), "cfg", sep='.'))
    logger.info(f"Logging Retrieval Synthetic Eval Results to MLflow...")
    retrieval_evaluator.log_to_mlflow(cfg)
    logger.info(f"Logging Response Eval Results to MLflow...")
    response_evaluator.log_to_mlflow(
        cfg,
        'synthetic',
        response_synthetic_mean_scores_df,
        response_synthetic_deep_eval_df
    )
    response_evaluator.log_to_mlflow(
        cfg,
        'curated',
        response_curated_mean_scores_df,
        response_curated_deep_eval_df
    )

[32m2024-08-09 08:49:55.594[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mLogging [COLLECT] info to MLflow...[0m
[32m2024-08-09 08:49:55.620[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m6[0m - [1mLogging config to MLflow...[0m
  Expected `int` but got `str` - serialized value may not be as expected
  Expected `int` but got `str` - serialized value may not be as expected
  Expected `int` but got `str` - serialized value may not be as expected
  Expected `int` but got `str` - serialized value may not be as expected
  Expected `int` but got `str` - serialized value may not be as expected
  Expected `int` but got `str` - serialized value may not be as expected
  return self.__pydantic_serializer__.to_python(
[32m2024-08-09 08:49:55.653[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m8[0m - [1mLogging Retrieval Synthetic Eval Results to MLflow...[0m
[32m2024-08-09 08:49:56.160[0m | [1mINFO    [0m | [36m__m

# Clean up

In [72]:
if ARGS.LOG_TO_MLFLOW:
    mlflow.end_run()

# Archive