# Imports

In [None]:
import packages
from configs import settings, const, components
from configs.settings import logger
import asyncio, os, time, yaml, json, datetime, copy, random
from typing import Any, AsyncGenerator, Generator, Callable, Literal, Optional, TypeAlias, Union
from tqdm import tqdm
from pprint import pprint

from toolkit.llm.llama_index import (
	agents, cores, deploys as dpls, evaluation, messages, models, 
	observability, types, utils as utils_llama_index, workflows as wfs
)
from toolkit.llm.llama_index.data import loading, querying, storing

from features.agents.car.tools import VehicleDB
from features.agents.tools import map

from toolkit.utils import utils, typer as t
from toolkit.utils.llm import measure_performance, main as utils_llm
from toolkit.utils.utils import rp_print

# Measure Performance

In [None]:
def run_query_engine(
  query: str, 
  query_engine: querying.BaseQueryEngine, 
  print_in_streaming=True
):
    start_time = time.time()
    first_token_time = None
    total_response = ""

    if not query_engine._response_synthesizer._streaming:
        response = query_engine.query(query)
        total_time = time.time() - start_time
        return response, None, total_time

    else:
        response = query_engine.query(query)
        
        for i, token in enumerate(response.response_gen):
            if i == 0:
                first_token_time = time.time() - start_time
            
            total_response += token
            if print_in_streaming:
                print(token, end="", flush=True)
        
        total_time = time.time() - start_time
        
        
        return total_response, first_token_time, total_time

# Example usage:
# query = "What did the author do growing up?"
query = "What are some notes before driving the car?"
# response, first_token_time, total_time = run_query_engine(query, query_engine, print_in_streaming=False)

# print(f"Time to first token: {first_token_time:.4f} seconds")
# print(f"Total response time: {total_time:.4f} seconds")

In [None]:
query = "What are some notes before driving the car?"

metadata = measure_performance.Metadata(
	dataset="car_manual",
	algorithm="basic",
	model=chosen_model,
	inference_server="Ollama",
)

is_run_loop = False # <-- Set to True to run the loop

if is_run_loop:
	for i in range(0, 20):
		result = measure_performance.execute_query(query_engine, run_query_engine, query, metadata=metadata)
else:
	result = measure_performance.execute_query(query_engine, run_query_engine, query, metadata=metadata)


In [None]:
measure_performance.json_to_csv(
	json_file_path=f"{packages.APP_PATH}/data/logs/performance_logs.json",
	csv_file_path=f"{packages.APP_PATH}/data/logs/performance_logs.csv"
)

# Tmp

In [None]:
import packages

In [None]:
from toolkit.llm.langchain import document_loaders

file_path = f"{packages.APP_PATH}/data/org/test/manual_toyota_corolla_cross_2023.pdf"
loader = document_loaders.UnstructuredPDFLoader(file_path)
documents = loader.load()

docs_content = documents[0].page_content
docs_first_half = docs_content[:len(docs_content)//2]
docs_second_half = docs_content[len(docs_content)//2:]

print(docs_second_half)

In [None]:
import packages

with open(f'{packages.APP_PATH}/data/org/raw-manual_toyota_corolla_cross_2023.txt', 'r') as file:
	content = file.read()

# Get total length of content
total_length = len(content)

# Calculate the size of each part
part_size = total_length // 4

# Create 4 variables with roughly equal parts
part1 = content[0:part_size]
part2 = content[part_size:part_size*2]
part3 = content[part_size*2:part_size*3]
part4 = content[part_size*3:]

## Timing

### LLM

In [None]:
llm = models.OpenAI(api_base="http://localhost:8767/v1")

In [None]:
token_generator = utils_llama_index.interact_model(
	llm=llm,
  prompt="Tell me a joke", mode="astream", user_query=None,
  measure_performance=True,
)

async for token in await token_generator:
  print(token, end="", flush=True)


In [None]:
# response = llm.complete("Tell me a joke")  # Uses completions endpoint

response = llm.chat([
	messages.ChatMessage(role=messages.MessageRole.USER, content="Hello")
])

# response = await utils_llama_index.interact_model(
# 	llm=llm,
# 	user_query="Tell me a joke",
# 	mode="chat"
# )

rp_print(response)

### Pipeline

In [None]:
prompts_agent_car = settings.prompts_agent_car

with open(f"{packages.APP_PATH}/use_cases/dev/features/rag/prompts.yaml", 'r') as file:
  prompts_rag = yaml.safe_load(file)


#### user_query_category

In [None]:
prompt_categorize_query = prompts_agent_car["CategorizeQuestion"]["dev"]

# user_query = "Is the car trunk opened?"
user_query = "How can parents prevent children from accidentally opening doors or windows while driving?"

examples = await components.retriever_user_query_category.aretrieve(user_query)
examples = str(await utils_llama_index.extract_retriever_results(examples))

user_query_category: t.UserQueryCategory = await utils_llama_index.interact_model(
	prompt=prompt_categorize_query, mode="chat", user_query=user_query,
	examples=examples,
	llm=llm,
)
user_query_category = await utils_llm.post_process_llm_output(
	user_query_category, mode=["remove_quotes", "remove_brackets", "remove_tokens"],
)

rp_print(user_query_category)

#### RAG

In [None]:
queries = [
	"What are the main safety procedures to follow before driving the vehicle?",
	"What is the correct driving posture recommended in the manual?",
]

user_query = queries[0]

retrieved_data = await components.retriever_car_manual.aretrieve(user_query)
retrieved_data_texts = await utils_llama_index.extract_text(retrieved_data)

response = await utils_llama_index.interact_model(
	prompt=prompts_rag["generate_result"], system_prompt=prompts_rag["system"]["car_manual"],
	mode="achat", 
	user_question=user_query, retrieved_data=retrieved_data_texts,
	# llm=llm,
)

response = await utils_llm.post_process_llm_output(
	response, mode=["remove_tokens"],
)
rp_print(response)

In [None]:
components.retriever_car_manual._vector_store.client.__dir__()

In [None]:
rp_print(components.retriever_car_manual.__dir__())

In [None]:
await apis_rag.do_querying(user_query=user_query)

#### separate_tasks

In [None]:
user_query = "Is the car locked? Is the car trunk opened?. Increase front wiper speed"

prompt_separate_tasks = prompts_agent_car["control"]["SeparateTasks"]["dev"]

tasks = await utils_llama_index.interact_model(
	prompt=prompt_separate_tasks, mode="achat", user_query=user_query,
	llm=llm,
)
tasks = await utils_llm.post_process_llm_output(
	tasks, mode=["remove_tokens"],
)
tasks = await utils_llm.parse_json(tasks)

rp_print(tasks)

#### control

In [None]:
tools = agents.add_tools(
	[agents.FunctionTool.from_defaults(async_fn=VehicleDB.db_mongo_vehicle.process_user_query)],
)

llm_control = llm
llm_control.system_prompt = prompts_agent_car["control"]["System_VehicleDB"]["dev"]

# agent = agents.OpenAIAgent.from_tools(
# 	tools=tools,
# 	verbose=True,
# 	llm=llm,
# 	system_prompt=prompts_agent_car["control"]["System_VehicleDB"]["dev"], # System prompt
# )

agent = agents.AgentRunner.from_llm(
		llm=llm,
		tools=tools, # type: ignore
		verbose=True,
		context=prompts_agent_car["control"]["System_VehicleDB"]["dev"], # System prompt
	)

#*==============================================================================

user_query = "Is the car locked?"

await utils_llama_index.interact_agent(
	agent=agent,
	user_query=user_query,
	mode="achat",
)

In [None]:
rp_print(llm.metadata)

# Setup

In [None]:
"""
HF_API_KEY

docker run --runtime nvidia --gpus all \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --env "HUGGING_FACE_HUB_TOKEN=HF_API_KEY" \
    -p 8000:8000 \
    --ipc=host \
    vllm/vllm-openai:latest \
    --model mistralai/Mistral-7B-v0.1
"""

# Sources

- **getting_started**
  - https://docs.llamaindex.ai/en/stable/#introduction ✅
  - https://docs.llamaindex.ai/en/stable/getting_started/starter_example/ ✅
  - https://docs.llamaindex.ai/en/stable/getting_started/concepts/ ✅
  - https://docs.llamaindex.ai/en/stable/getting_started/installation/ ✅
  - https://docs.llamaindex.ai/en/stable/getting_started/customization/ ⌛

- **Models**
  - **Understanding**
    - https://docs.llamaindex.ai/en/stable/understanding/using_llms/using_llms/ ✅
  - **Module Guides**
    - https://docs.llamaindex.ai/en/stable/module_guides/models/llms/
    - https://docs.llamaindex.ai/en/stable/module_guides/models/embeddings/
    - https://docs.llamaindex.ai/en/stable/module_guides/models/prompts/

- **Data**
  - **Understanding**
    - https://docs.llamaindex.ai/en/stable/understanding/loading/loading/ ✅
    - https://docs.llamaindex.ai/en/stable/module_guides/loading/ingestion_pipeline/ ⌛
    - https://docs.llamaindex.ai/en/stable/understanding/loading/llamahub/ ✅
    - https://docs.llamaindex.ai/en/stable/understanding/indexing/indexing/ ✅
    - https://docs.llamaindex.ai/en/stable/understanding/storing/storing/ ✅
    - https://docs.llamaindex.ai/en/stable/understanding/querying/querying/ ✅
    - https://docs.llamaindex.ai/en/stable/understanding/agent/llamaparse/ ✅

  - **Loading**: https://docs.llamaindex.ai/en/stable/module_guides/loading/
    - https://docs.llamaindex.ai/en/stable/module_guides/loading/documents_and_nodes/
      - https://docs.llamaindex.ai/en/stable/module_guides/loading/documents_and_nodes/usage_documents/
      - https://docs.llamaindex.ai/en/stable/module_guides/loading/documents_and_nodes/usage_nodes/
      - https://docs.llamaindex.ai/en/stable/module_guides/loading/documents_and_nodes/usage_metadata_extractor/
    - https://docs.llamaindex.ai/en/stable/module_guides/loading/simpledirectoryreader/
    - https://docs.llamaindex.ai/en/stable/module_guides/loading/connector/
      - https://docs.llamaindex.ai/en/stable/module_guides/loading/connector/usage_pattern/
      - https://docs.llamaindex.ai/en/stable/module_guides/loading/connector/llama_parse/
      - https://docs.llamaindex.ai/en/stable/module_guides/loading/connector/modules/
    - https://docs.llamaindex.ai/en/stable/module_guides/loading/node_parsers/
      - https://docs.llamaindex.ai/en/stable/module_guides/loading/node_parsers/modules/
    - https://docs.llamaindex.ai/en/stable/module_guides/loading/ingestion_pipeline/
      - https://docs.llamaindex.ai/en/stable/module_guides/loading/ingestion_pipeline/transformations/

  - **Indexing**: https://docs.llamaindex.ai/en/stable/module_guides/indexing/
    - https://docs.llamaindex.ai/en/stable/module_guides/indexing/index_guide/
    - https://docs.llamaindex.ai/en/stable/module_guides/indexing/vector_store_index/
    - https://docs.llamaindex.ai/en/stable/module_guides/indexing/lpg_index_guide/
    - https://docs.llamaindex.ai/en/stable/module_guides/indexing/document_management/
    - https://docs.llamaindex.ai/en/stable/module_guides/indexing/llama_cloud_index/
    - https://docs.llamaindex.ai/en/stable/module_guides/indexing/metadata_extraction/
    - https://docs.llamaindex.ai/en/stable/module_guides/indexing/modules/

  - **Storing**: https://docs.llamaindex.ai/en/stable/module_guides/storing/
    - https://docs.llamaindex.ai/en/stable/module_guides/storing/vector_stores/
    - https://docs.llamaindex.ai/en/stable/module_guides/storing/docstores/
    - https://docs.llamaindex.ai/en/stable/module_guides/storing/index_stores/
    - https://docs.llamaindex.ai/en/stable/module_guides/storing/chat_stores/
    - https://docs.llamaindex.ai/en/stable/module_guides/storing/kv_stores/
    - https://docs.llamaindex.ai/en/stable/module_guides/storing/save_load/
    - https://docs.llamaindex.ai/en/stable/module_guides/storing/customization/

  - **Querying**: https://docs.llamaindex.ai/en/stable/module_guides/querying/
    - **Query Engines**: https://docs.llamaindex.ai/en/stable/module_guides/deploying/query_engine/
      - https://docs.llamaindex.ai/en/stable/module_guides/deploying/query_engine/usage_pattern/
      - https://docs.llamaindex.ai/en/stable/module_guides/deploying/query_engine/response_modes/
      - https://docs.llamaindex.ai/en/stable/module_guides/deploying/query_engine/streaming/
      - https://docs.llamaindex.ai/en/stable/module_guides/deploying/query_engine/modules/
      - https://docs.llamaindex.ai/en/stable/module_guides/deploying/query_engine/supporting_modules/
    - **Chat Engines**: https://docs.llamaindex.ai/en/stable/module_guides/deploying/chat_engines/
      - https://docs.llamaindex.ai/en/stable/module_guides/deploying/chat_engines/usage_pattern/
      - https://docs.llamaindex.ai/en/stable/module_guides/deploying/chat_engines/modules/
    - **Retrieval**: https://docs.llamaindex.ai/en/stable/module_guides/querying/retriever/
      - https://docs.llamaindex.ai/en/stable/module_guides/querying/retriever/retrievers/
      - https://docs.llamaindex.ai/en/stable/module_guides/querying/retriever/retriever_modes/
    - **Node Postprocessors**: https://docs.llamaindex.ai/en/stable/module_guides/querying/node_postprocessors/
      - https://docs.llamaindex.ai/en/stable/module_guides/querying/node_postprocessors/node_postprocessors/
    - **Response Synthesis**: https://docs.llamaindex.ai/en/stable/module_guides/querying/response_synthesizers/
      - https://docs.llamaindex.ai/en/stable/module_guides/querying/response_synthesizers/response_synthesizers/
    - **Routing**: https://docs.llamaindex.ai/en/stable/module_guides/querying/router/
    - **Structured Outputs**: https://docs.llamaindex.ai/en/stable/module_guides/querying/structured_outputs/
      - https://docs.llamaindex.ai/en/stable/module_guides/querying/structured_outputs/output_parser/
      - https://docs.llamaindex.ai/en/stable/module_guides/querying/structured_outputs/pydantic_program/

- **Agents**
  - **Understanding**
    - https://docs.llamaindex.ai/en/stable/understanding/agent/basic_agent/ ✅
    - https://docs.llamaindex.ai/en/stable/understanding/agent/local_models/ ✅
    - https://docs.llamaindex.ai/en/stable/understanding/agent/rag_agent/ ✅
    - https://docs.llamaindex.ai/en/stable/understanding/agent/memory/ ✅
    - https://docs.llamaindex.ai/en/stable/understanding/agent/tools/ ✅
  - **Module Guides**
    - https://medium.com/llamaindex-blog/data-agents-eed797d7972f ⌛
    - https://docs.llamaindex.ai/en/stable/module_guides/deploying/agents/ ✅
    - https://docs.llamaindex.ai/en/stable/module_guides/deploying/agents/usage_pattern/ ✅
    - https://docs.llamaindex.ai/en/stable/module_guides/deploying/agents/agent_runner/ ✅
    - https://docs.llamaindex.ai/en/stable/module_guides/deploying/agents/modules/ ⌛
  - **Examples**
    - https://docs.llamaindex.ai/en/stable/examples/agent/openai_agent/ ✅
    - https://docs.llamaindex.ai/en/stable/examples/agent/openai_agent_with_query_engine/ ✅
    - https://docs.llamaindex.ai/en/stable/examples/agent/openai_agent_retrieval/ ✅
    - https://docs.llamaindex.ai/en/stable/examples/agent/openai_agent_query_cookbook/ ⌛
    - https://docs.llamaindex.ai/en/stable/examples/agent/openai_agent_query_plan/ ✅
    - https://docs.llamaindex.ai/en/stable/examples/agent/openai_agent_context_retrieval/ ✅
    - https://docs.llamaindex.ai/en/stable/examples/query_engine/recursive_retriever_agents/ ⌛
    - https://docs.llamaindex.ai/en/stable/examples/agent/agent_builder/ ✅
    - https://docs.llamaindex.ai/en/stable/examples/agent/openai_agent_parallel_function_calling/ ✅
    - https://docs.llamaindex.ai/en/stable/examples/agent/mistral_agent/ ✅
    - https://docs.llamaindex.ai/en/stable/examples/agent/react_agent/ ✅
    - https://docs.llamaindex.ai/en/stable/examples/agent/react_agent_with_query_engine/ ✅
    - https://docs.llamaindex.ai/en/stable/examples/agent/custom_agent/ ⌛
  - https://docs.llamaindex.ai/en/stable/examples/objects/object_index/ ⌛
    - https://docs.llamaindex.ai/en/stable/examples/agent/agent_runner/query_pipeline_agent/ ⌛
    - https://docs.llamaindex.ai/en/stable/examples/agent/agent_runner/agent_runner/ ✅
    - https://docs.llamaindex.ai/en/stable/examples/agent/structured_planner/ ✅
    - https://docs.llamaindex.ai/en/stable/examples/agent/agent_runner/agent_runner_rag_controllable/ ⌛
  - https://docs.llamaindex.ai/en/stable/module_guides/deploying/agents/tools/ ⌛

- **Workflows**
  - **Understanding**
    - https://docs.llamaindex.ai/en/stable/understanding/workflows/ ✅
    - https://docs.llamaindex.ai/en/stable/understanding/workflows/basic_flow/ ✅
    - https://docs.llamaindex.ai/en/stable/understanding/workflows/branches_and_loops/ ✅
    - https://docs.llamaindex.ai/en/stable/understanding/workflows/state/ ✅
    - https://docs.llamaindex.ai/en/stable/understanding/workflows/stream/ ✅
    - https://docs.llamaindex.ai/en/stable/understanding/workflows/concurrent_execution/ ⌛
    - https://docs.llamaindex.ai/en/stable/understanding/workflows/subclass/ ✅
    - https://docs.llamaindex.ai/en/stable/understanding/workflows/nested/ ✅
    - https://docs.llamaindex.ai/en/stable/understanding/workflows/observability/
    - https://docs.llamaindex.ai/en/stable/understanding/workflows/unbound_functions/
  - **Module Guides**
    - https://docs.llamaindex.ai/en/stable/module_guides/workflow/ ✅
  - **Examples**
    - https://docs.llamaindex.ai/en/stable/examples/workflow/workflows_cookbook/ ✅
    - https://docs.llamaindex.ai/en/stable/examples/workflow/self_discover_workflow/ ⌛
  - **RAG**
    - https://docs.llamaindex.ai/en/stable/examples/workflow/rag/
    - https://docs.llamaindex.ai/en/stable/examples/workflow/citation_query_engine/
    - https://docs.llamaindex.ai/en/stable/examples/workflow/corrective_rag_pack/
    - https://docs.llamaindex.ai/en/stable/examples/workflow/advanced_text_to_sql/
    - https://docs.llamaindex.ai/en/stable/examples/workflow/JSONalyze_query_engine/
    - https://docs.llamaindex.ai/en/stable/examples/workflow/long_rag_pack/
    - https://docs.llamaindex.ai/en/stable/examples/workflow/multi_step_query_engine/
    - https://docs.llamaindex.ai/en/stable/examples/workflow/router_query_engine/
    - https://docs.llamaindex.ai/en/stable/examples/workflow/sub_question_query_engine/
  - **Agents**
    - https://docs.llamaindex.ai/en/stable/examples/workflow/function_calling_agent/ ✅
    - https://docs.llamaindex.ai/en/stable/examples/workflow/react_agent/
  - **Techniques**
    - https://docs.llamaindex.ai/en/stable/examples/workflow/parallel_execution/ ✅
    - https://docs.llamaindex.ai/en/stable/examples/workflow/human_in_the_loop_story_crafting/ ✅
    - https://docs.llamaindex.ai/en/stable/examples/workflow/reflection/ ✅
    - https://docs.llamaindex.ai/en/stable/examples/workflow/multi_strategy_workflow/
    - https://docs.llamaindex.ai/en/stable/examples/workflow/self_discover_workflow/

- **Llama Deploy**
  - **Module Guides**
    - https://docs.llamaindex.ai/en/stable/module_guides/llama_deploy/
    - https://docs.llamaindex.ai/en/stable/module_guides/llama_deploy/10_getting_started/ ✅
      - https://docs.llamaindex.ai/en/stable/module_guides/llama_deploy/20_core_components/ ✅
      - https://docs.llamaindex.ai/en/stable/module_guides/llama_deploy/30_manual_orchestration/ ✅
      - https://docs.llamaindex.ai/en/stable/module_guides/llama_deploy/40_python_sdk/ ✅
      - https://docs.llamaindex.ai/en/stable/module_guides/llama_deploy/50_llamactl/
  - **[git] run-llama/llama_deploy**
    - https://github.com/run-llama/llama_deploy
      - https://github.com/run-llama/llama_deploy/tree/main/examples
        - https://github.com/run-llama/llama_deploy/tree/main/examples/quick_start ✅
        - https://github.com/run-llama/llama_deploy/tree/main/examples/python_fullstack

  - **llama_agents**
    - https://www.llamaindex.ai/blog/introducing-llama-agents-a-powerful-framework-for-building-production-multi-agent-ai-systems

- **understanding**: https://docs.llamaindex.ai/en/stable/understanding/
  - https://docs.llamaindex.ai/en/stable/understanding/putting_it_all_together/ ⌛
  - https://docs.llamaindex.ai/en/stable/understanding/tracing_and_debugging/tracing_and_debugging/ ⌛
  - https://docs.llamaindex.ai/en/stable/understanding/evaluating/evaluating/ ⌛

- **use_cases**: https://docs.llamaindex.ai/en/stable/use_cases/ ⌛
  - https://docs.llamaindex.ai/en/stable/use_cases/extraction/ ⌛
  - https://docs.llamaindex.ai/en/stable/use_cases/q_and_a/ ⌛
  - https://docs.llamaindex.ai/en/stable/use_cases/chatbots/ ⌛
  - https://docs.llamaindex.ai/en/stable/use_cases/agents/ ⌛
  - https://docs.llamaindex.ai/en/stable/use_cases/multimodal/ ⌛
  - https://docs.llamaindex.ai/en/stable/use_cases/fine_tuning/ ⌛

- **module_guides**: https://docs.llamaindex.ai/en/stable/module_guides/
  - **deploying**
    - https://docs.llamaindex.ai/en/stable/module_guides/deploying/query_engine/
    - https://docs.llamaindex.ai/en/stable/module_guides/deploying/chat_engines/
    - https://docs.llamaindex.ai/en/stable/module_guides/deploying/agents/
  - https://docs.llamaindex.ai/en/stable/module_guides/supporting_modules/settings/

- **Others**:
  - https://llamahub.ai/
  - https://docs.llamaindex.ai/en/stable/examples/index_structs/knowledge_graph/KnowledgeGraphDemo/

- **Third-parties**
  - **Qdrant**
    - https://docs.llamaindex.ai/en/stable/examples/vector_stores/QdrantIndexDemo/ ✅
    - https://docs.llamaindex.ai/en/stable/examples/vector_stores/qdrant_hybrid/ 
    - https://docs.llamaindex.ai/en/stable/examples/vector_stores/QdrantIndexDemo/