In [1]:
import nest_asyncio

nest_asyncio.apply()

In [2]:
import os
import sys
import time

import gradio as gr
import qdrant_client
import Stemmer
import torch
from dotenv import load_dotenv
from llama_index.agent.openai import OpenAIAgent
from llama_index.core import (
    Settings,
    StorageContext,
    VectorStoreIndex,
    get_response_synthesizer,
)
from llama_index.core.base.llms.types import ChatMessage, MessageRole
from llama_index.core.postprocessor import SimilarityPostprocessor
from llama_index.core.response_synthesizers.type import ResponseMode
from llama_index.core.retrievers import QueryFusionRetriever, VectorIndexRetriever
from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.llms.openai import OpenAI
from llama_index.postprocessor.flag_embedding_reranker import FlagEmbeddingReranker
from llama_index.retrievers.bm25 import BM25Retriever
from llama_index.vector_stores.qdrant import QdrantVectorStore
from loguru import logger

sys.path.insert(0, "..")


from src.features.append_reference.custom_query_engine import (
    ManualAppendReferenceQueryEngine,
)
from src.features.synthesize_recommendation.custom_tree_summarize import (
    CUSTOM_TREE_SUMMARIZE_PROMPT_SEL,
)
from src.run.args import RunInputArgs
from src.run.cfg import RunConfig
from src.run.orchestrator import RunOrchestrator

load_dotenv()

USE_GPU = True

if not USE_GPU:
    os.environ["CUDA_VISIBLE_DEVICES"] = ""

logger.info(f"{torch.cuda.is_available()=}")

ARGS = RunInputArgs(
    EXPERIMENT_NAME="Review Rec Bot - Yelp Review Rec Bot",
    RUN_NAME="026_chatbot_interface",
    RUN_DESCRIPTION="""
# Objective

# Implementation

# Changelog
""",
    TESTING=False,
    LOG_TO_MLFLOW=False,
    OBSERVABILITY=True,
    RECREATE_INDEX=False,
    RECREATE_RETRIEVAL_EVAL_DATASET=False,
    RECREATE_RESPONSE_EVAL_DATASET=False,
    DEBUG=False,
)

logger.info(ARGS)

cfg = RunConfig()

dir_prefix = "../notebooks"
cfg.storage_context_persist_dp = (
    f"{dir_prefix}/data/018_finetuned_embedding_reindex/storage_context"
)
cfg.db_collection = "review_rec_bot__018_finetuned_embedding_reindex__huggingface____data_finetune_embedding_finetuned_model"
cfg.llm_cfg.embedding_model_name = (
    "../notebooks/data/finetune_embedding/finetuned_model"
)

cfg.init(ARGS)

logger.info(cfg)

llm, embed_model = cfg.setup_llm()

logger.info(cfg.llm_cfg.model_dump_json(indent=2))

Settings.embed_model = embed_model
Settings.llm = llm

qdrantdb = qdrant_client.QdrantClient(host="localhost", port=6333)
aqdrantdb = qdrant_client.AsyncQdrantClient(host="localhost", port=6333)

RunOrchestrator.setup_db(cfg, qdrantdb)

db_collection = qdrantdb.get_collection(cfg.db_collection)
vector_store = QdrantVectorStore(
    client=qdrantdb,
    collection_name=cfg.db_collection,
    aclient=aqdrantdb,
    enable_hybrid=False,
    prefer_grpc=True,
)


logger.info(f"Loading Storage Context from {cfg.storage_context_persist_dp}...")
docstore = SimpleDocumentStore.from_persist_dir(
    persist_dir=cfg.storage_context_persist_dp
)
storage_context = StorageContext.from_defaults(
    docstore=docstore, vector_store=vector_store
)
nodes = list(docstore.docs.values())

logger.info(f"[COLLECT] {len(nodes)=}")

logger.info(f"Configuring Vector Retriever...")
index = VectorStoreIndex.from_vector_store(
    vector_store, storage_context=storage_context
)
vector_retriever = VectorIndexRetriever(
    index=index,
    vector_store_query_mode="mmr",
    similarity_top_k=cfg.retrieval_cfg.retrieval_dense_top_k,
    # sparse_top_k=cfg.retrieval_cfg.retrieval_sparse_top_k,
)

logger.info(f"Configuring BM25 Retriever...")
bm25_retriever = BM25Retriever.from_defaults(
    nodes=nodes,
    similarity_top_k=cfg.retrieval_cfg.retrieval_sparse_top_k,
    stemmer=Stemmer.Stemmer("english"),
    language="english",
)

logger.info(f"Configuring Query Fusion Retriever...")
query_gen_prompt = """
You are a helpful assistant that expands an input query into new strings that aim to increase the recall of an information retrieval system. The strings can be queries or paragraphs or sentences.
You should apply different techniques to create new strings. Here are some example techniques:
- Technique 1 - Optimize for full-text search: Rephrase the input query to contain only important keywords. Remove all stopwords and low information words. Example input query: "What are some places to enjoy cold brew coffee in Hanoi?" -> Expected output:  "cold brew coffee hanoi"
- Technique 2 - Optimize for similarity-based vector retrieval: Create a fake user review that should contain the answer for the question. Example input query: "What are some good Pho restaurants in Singapore?" -> Expected output query: "I found So Pho offerring a variety of choices to enjoy not Pho but some other Vietnamese dishes like bun cha. The price is reasonable."

Generate at least {num_queries} new strings by iterating over the technique in order. For example, your first generated string should always use technique 1, second technique 2. If run of of techniques then re-iterate from the start.

Return one string on each line, related to the input query.

Only return the strings. Never include the chosen technique.

Input Query: {query}\n
New strings:\n
"""

llm = OpenAI(
    model=cfg.eval_cfg.response_eval_llm_model,
    **cfg.eval_cfg.response_eval_llm_model_config,
)

retriever = QueryFusionRetriever(
    [vector_retriever, bm25_retriever],
    llm=llm,
    similarity_top_k=cfg.retrieval_cfg.retrieval_top_k,
    num_queries=2,  # set this to 1 to disable query generation
    mode="reciprocal_rerank",
    use_async=True,
    verbose=True,
    query_gen_prompt=query_gen_prompt,
)

logger.info(f"Setting up Post-Retriever Processor...")
node_postprocessors = []

if cfg.retrieval_cfg.retrieval_similarity_cutoff is not None:
    node_postprocessors.append(
        SimilarityPostprocessor(
            similarity_cutoff=cfg.retrieval_cfg.retrieval_similarity_cutoff
        )
    )

reranker = FlagEmbeddingReranker(
    model=cfg.retrieval_cfg.rerank_model_name,
    top_n=cfg.retrieval_cfg.rerank_top_k,
    use_fp16=True,
)
node_postprocessors.append(reranker)

response_synthesizer = get_response_synthesizer(
    response_mode=ResponseMode.TREE_SUMMARIZE,
    summary_template=CUSTOM_TREE_SUMMARIZE_PROMPT_SEL,
)
query_engine = ManualAppendReferenceQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
    node_postprocessors=node_postprocessors,
)

logger.info(f"Registerring Query Engine as Tool...")
query_engine_tool = QueryEngineTool(
    query_engine=query_engine,
    metadata=ToolMetadata(
        name="reco_review",
        description=(
            "useful for when you want to find places to visit"
            " based on end-user reviews. Takes input in a question"
            " format, e.g.: What are the best Vietnamese restaurants in Texas?"
        ),
    ),
)

tools = [query_engine_tool]

logger.info(f"Launching Chat Demo UI...")

[32m2024-08-11 15:45:16.057[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m49[0m - [1mtorch.cuda.is_available()=True[0m
[32m2024-08-11 15:45:16.058[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m70[0m - [1m{
  "EXPERIMENT_NAME": "Review Rec Bot - Yelp Review Rec Bot",
  "RUN_NAME": "026_chatbot_interface",
  "RUN_DESCRIPTION": "\n# Objective\n\n# Implementation\n\n# Changelog\n",
  "TESTING": false,
  "DEBUG": false,
  "OBSERVABILITY": true,
  "LOG_TO_MLFLOW": false,
  "RECREATE_RETRIEVAL_EVAL_DATASET": false,
  "RECREATE_RESPONSE_EVAL_DATASET": false,
  "RECREATE_INDEX": false
}[0m
[32m2024-08-11 15:45:16.058[0m | [1mINFO    [0m | [36msrc.run.cfg[0m:[36minit[0m:[36m148[0m - [1mStarting Observability server with Phoenix...[0m
INFO:phoenix.config:📋 Ensuring phoenix working directory: /home/dvquys/.phoenix
[32m2024-08-11 15:45:20.260[0m | [1mINFO    [0m | [36msrc.run.cfg[0m:[36minit[0m:[36m175[0m - [1mNotebook-generated

🌍 To view the Phoenix app in your browser, visit http://localhost:6006/
📖 For more information on how to use Phoenix, check out https://docs.arize.com/phoenix


[32m2024-08-11 15:45:21.213[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m89[0m - [1m{
  "llm_provider": "openai",
  "llm_model_name": "gpt-4o-mini",
  "embedding_provider": "huggingface",
  "embedding_model_name": "../notebooks/data/finetune_embedding/finetuned_model",
  "embedding_model_dim": 768,
  "ollama__host": "192.168.100.14",
  "ollama__port": 11434
}[0m
[32m2024-08-11 15:45:21.248[0m | [1mINFO    [0m | [36msrc.run.orchestrator[0m:[36msetup_db[0m:[36m37[0m - [1mUse existing Qdrant collection: review_rec_bot__018_finetuned_embedding_reindex__huggingface____data_finetune_embedding_finetuned_model[0m
WARNI [llama_index.vector_stores.qdrant.base] Both client and aclient are provided. If using `:memory:` mode, the data between clients is not synced.
[32m2024-08-11 15:45:21.254[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m109[0m - [1mLoading Storage Context from ../notebooks/data/018_finetuned_embedding_reindex/storage_cont

In [3]:
agent_system_prompt = """
You're a helpful assistant who excels at recommending places to go.

Always return the referenced paragraphs at the end of your answer to users. Format them nicely if need to.
"""

In [20]:
agent = OpenAIAgent.from_tools(
    tools,
    verbose=True,
    system_prompt=agent_system_prompt,
    callback_manager=
)

In [32]:
step_output

TaskStepOutput(output=AgentChatResponse(response='None', sources=[ToolOutput(content="\nBased on the provided reviews and context about the Vietnamese Food Truck in Tampa Bay, here are the recommendations ranked by relevance to the user's query about the best Vietnamese restaurants:\n\n### 1. **Vietnamese Food Truck**\n   - **Strengths**: \n     - Consistently high ratings (mostly 5 stars) from multiple reviewers who praise the pho for its authentic flavor and aromatic broth.\n     - Offers a variety of dishes including pho, egg rolls, and banh mi, with many reviewers noting the freshness and quality of the ingredients.\n     - Positive comments about the service and the husband-and-wife team running the food truck, adding a personal touch to the dining experience.\n   - **Comparison**: This option stands out as the most recommended based on the overwhelming positive feedback, especially for pho, which is a staple of Vietnamese cuisine. It is ideal for those seeking authentic flavors a

In [21]:
query = "find me some vietnamese restaurants"

task = agent.create_task(query)

step_output = await agent.arun_step(task.task_id)

Added user message to memory: find me some vietnamese restaurants
=== Calling Function ===
Calling function: reco_review with args: {"input":"What are the best Vietnamese restaurants?"}
Generated queries:
best Vietnamese restaurants
Got output: 
Based on the provided reviews and context about the Vietnamese Food Truck in Tampa Bay, here are the recommendations ranked by relevance to the user's query about the best Vietnamese restaurants:

### 1. **Vietnamese Food Truck**
   - **Strengths**: 
     - Consistently high ratings (mostly 5 stars) from multiple reviewers who praise the pho for its authentic flavor and aromatic broth.
     - Offers a variety of dishes including pho, egg rolls, and banh mi, with many reviewers noting the freshness and quality of the ingredients.
     - Positive comments about the service and the husband-and-wife team running the food truck, adding a personal touch to the dining experience.
   - **Comparison**: This option stands out as the most recommended base

In [27]:
step_output

TaskStepOutput(output=AgentChatResponse(response='None', sources=[ToolOutput(content="\nBased on the provided reviews and context about the Vietnamese Food Truck in Tampa Bay, here are the recommendations ranked by relevance to the user's query about the best Vietnamese restaurants:\n\n### 1. **Vietnamese Food Truck**\n   - **Strengths**: \n     - Consistently high ratings (mostly 5 stars) from multiple reviewers who praise the pho for its authentic flavor and aromatic broth.\n     - Offers a variety of dishes including pho, egg rolls, and banh mi, with many reviewers noting the freshness and quality of the ingredients.\n     - Positive comments about the service and the husband-and-wife team running the food truck, adding a personal touch to the dining experience.\n   - **Comparison**: This option stands out as the most recommended based on the overwhelming positive feedback, especially for pho, which is a staple of Vietnamese cuisine. It is ideal for those seeking authentic flavors a

In [36]:
step_output.output.sources[0].tool_name

'reco_review'

In [38]:
dir(step_output.output.sources[0])

['Config',
 '__abstractmethods__',
 '__annotations__',
 '__class__',
 '__class_vars__',
 '__config__',
 '__custom_root_type__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__exclude_fields__',
 '__fields__',
 '__fields_set__',
 '__format__',
 '__ge__',
 '__get_validators__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__include_fields__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__json_encoder__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__post_root_validators__',
 '__pre_root_validators__',
 '__pretty__',
 '__private_attributes__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__repr_args__',
 '__repr_name__',
 '__repr_str__',
 '__rich_repr__',
 '__schema_cache__',
 '__setattr__',
 '__setstate__',
 '__signature__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__try_update_forward_refs__',
 '__validators__',
 '_abc_impl',
 '_calculate_keys',
 '_copy_and_set_values',
 '_decompose_class',
 '_enfo

In [39]:
step_output.output.sources[0].raw_input

{'input': 'What are the best Vietnamese restaurants?'}

In [18]:
step_output = await agent.arun_step(task.task_id)

In [19]:
step_output

TaskStepOutput(output=AgentChatResponse(response="Here are some top recommendations for Vietnamese restaurants, particularly focusing on the **Vietnamese Food Truck**, which has received great reviews:\n\n### 1. **Vietnamese Food Truck**\n   - **Rating:** 5 stars (multiple reviews)\n   - **Strengths:**\n     - **Authenticity:** Many reviewers praised the pho for its authentic flavor, comparing it favorably to experiences in Vietnam.\n     - **Variety:** The menu includes a range of options such as pho, banh mi, and egg rolls, all of which have received high marks for taste and quality.\n     - **Freshness:** Reviewers noted the freshness of the ingredients, particularly the handmade egg rolls.\n     - **Convenience:** The food truck offers online ordering with timely updates, making it easy for customers to pick up their meals.\n   - **Ideal For:** Those seeking authentic Vietnamese cuisine with a focus on pho and a variety of other traditional dishes. Perfect for casual dining or take

In [12]:
query = "hello"

response = await agent.astream_chat(query)
# response.print_response_stream()

logger.info(f"{response.is_function}, {len(response.sources)=}")
if response.is_function or len(response.sources) > 0:
    logger.info(f"This is a function tool.")
    logger.info(f"{response.is_function}")
    logger.info(f"{response.sources=}")
printed = ""
for s in response.response_gen:
    # time.sleep(0.1)
    printed += s
    yield printed

Added user message to memory: hello
Added user message to memory: hello


Exception ignored in: <function BaseEventLoop.__del__ at 0x7678386051c0>
Traceback (most recent call last):
  File "/home/dvquys/frostmourne/study/vietai-genai03/final-project/review-rec-bot/.venv/lib/python3.11/asyncio/base_events.py", line 692, in __del__
    if not self.is_closed():
           ^^^^^^^^^^^^^^^^
  File "/home/dvquys/frostmourne/study/vietai-genai03/final-project/review-rec-bot/.venv/lib/python3.11/asyncio/base_events.py", line 689, in is_closed
    return self._closed
           ^^^^^^^^^^^^
AttributeError: '_UnixSelectorEventLoop' object has no attribute '_closed'
Exception ignored in: <function Task.__del__ at 0x7678385daf20>
Traceback (most recent call last):
  File "/home/dvquys/frostmourne/study/vietai-genai03/final-project/review-rec-bot/.venv/lib/python3.11/asyncio/tasks.py", line 141, in __del__
    self._loop.call_exception_handler(context)
    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: 'NoneType' object has no attribute 'call_exception_handler'
Except

Hello
!
 It
 seems
 like
 you're
 just
 saying
 hi
.
 If
 there's
 anything
 specific
 you'd
 like
 to
 know
 or
 talk
 about
,
 feel
 free
 to
 let
 me
 know
!
