# Imports

In [None]:
import packages
from configs import settings, const, components
from configs.settings import logger
import asyncio, os, time, yaml, json, datetime, copy, random
from typing import Any, AsyncGenerator, Generator, Callable, Literal, Optional, TypeAlias, Union
from tqdm import tqdm
from pprint import pprint

from toolkit.llm.llama_index import (
	agents, cores, deploys as dpls, evaluation, messages, models, 
	observability, types, utils as utils_llama_index, workflows as wfs
)
from toolkit.llm.llama_index.data import loading, querying, storing

from features.agents.car.tools import VehicleDB
from features.agents.tools import map

from toolkit.utils import utils, typer as t
from toolkit.utils.llm import measure_performance, main as utils_llm
from toolkit.utils.utils import rp_print

# <span style="color: #f59e0b;">Dev</span>
<span style="color: red;">There is no need to run this unless you are testing the framework.</span>

In [None]:
def multiply(a: float, b: float) -> float:
	"""
	Multiply two float numbers and returns the product.
	"""
	return a * b

def add(a: float, b: float) -> float:
	"""
	Add two float numbers and returns the sum.
	"""
	return a + b

def useless(a: float, b: float) -> float:
	"""
	Toy useless function.
	"""
	pass

tool_multiply = agents.FunctionTool.from_defaults(fn=multiply)
tool_add = agents.FunctionTool.from_defaults(fn=add)
tools_useless = [
	agents.FunctionTool.from_defaults(fn=useless, name=f"useless_{str(idx)}")
	for idx in range(28)
]

tools = agents.add_tools(
	tool_multiply,
	tool_add,
	[tools_useless],
	# query_plan_tool,
	# other agents
)
tools_map = {tool.metadata.name: tool for tool in tools}

tools_obj_index = storing.ObjectIndex.from_objects(
	tools, index_cls=storing.VectorStoreIndex,
)
tool_retriever = tools_obj_index.as_retriever(similarity_top_k=2)

agent = agents.AgentRunner.from_llm(
	# tools=tools, # use this
	tool_retriever=tool_retriever, # or this
	llm=cores.Settings.llm,
	verbose=True,
)

# agent.achat()

# agent = agents.FunctionCallingAgent.from_tools(
# 	tools=tools,
# 	llm=cores.Settings.llm,
# 	verbose=True,
# 	system_prompt=None,
# 	allow_parallel_tool_calls=True,
# )

# worker = agents.FunctionCallingAgentWorker.from_tools(tools=tools, verbose=True)
# agent = agents.StructuredPlannerAgent(worker, tools=tools, verbose=True) # take longer to respond

# agent = agents.ReActAgent.from_tools(
#   tools=tools, 
# 	llm=cores.Settings.llm,
# 	verbose=True,
# 	context=None, # System prompt
# )

# agent_worker_openai = agents.OpenAIAgentWorker.from_tools(tools=tools, verbose=True)
# agent_openai = agents.AgentRunner(agent_worker_openai)

In [None]:
queries = [
	"What is 2123 * 215123",
	"What is 20+(2*4)? Use a tool to calculate every step.",
	"What is (121 * 3) + 42?",
	"What is (121 * 3) + (5 * 8)?",
	("What is 121 * 2? Once you have the answer, "
   "use that number to write a story about a group of mice."),
	"What's 212 multiplied by 122? Make sure to use Tools",
	"What's 212 added to 122 ? Make sure to use Tools",	
]

In [None]:
# Usage example:
result = await utils_llama_index.interact_agent(
    agent=agent,
    user_query=queries[0],
    mode="achat",
)

final_result = await utils_llama_index.handle_agent_response(result)

In [None]:
# Step-Wise Execution
task = agent.create_task(queries[2])
step_output = agent.run_step(task.task_id)

if not step_output.is_last:
	step_output = agent.run_step(task.task_id)

result = agent.finalize_response(task.task_id)
result = str(result)

print(f"\n{'-'*80}\n")
pprint(result)

# print(f"Tasks: {task.list_tasks()}")
# print(f"Completed steps: {task.get_completed_steps(task.task_id)}")

## My Agent Worker

In [None]:
class AgentWorkerCustom(agents.CustomSimpleAgentWorker):
	"""
	Agent worker.
	Continues iterating until there's no errors / task is done.
 	"""

	def __init__(
		self,
		tools: t.List[agents.BaseTool],
		**kwargs: Any,
	) -> None:
		"""
		Init params.
   	"""

		self.tools = tools

		# Validate tools

		super().__init__(
			tools=self.tools,
			**kwargs,
		)

	def _initialize_state(
		self, task: agents.Task, **kwargs: t.Any,
	) -> t.Dict[str, t.Any]:
		"""
  	Initialize state.
   	"""
		return {
			"count": 0,
			"current_reasoning": [],
		}
	
	def _run_step(
		self, state: t.Dict[str, t.Any], task: agents.Task, 
  	input: t.Optional[str] = None,
	) -> t.Tuple[agents.AgentChatResponse, bool]:
		"""
		Run step.
   	"""
		new_input = state.get("new_input", task.input)

		response = self._get_response()
		
		# Append to current reasoning
		state["current_reasoning"].extend(
			[
				("user", new_input),
				("assistant", str(response)),
			]
		)
	
	def _get_response(
		self, user_query: str,
	):
		return "Hello"
	


## JSON-based

### Basic dataset

In [None]:
path_data = f"{packages.APP_PATH}/data/org/simple"
path_persist = f"{packages.APP_PATH}/data/org/test/persist"

if len(os.listdir(path_persist)) < 1:
	logger.info(f"Loading data from {path_data}")
	documents = loading.SimpleDirectoryReader(path_data).load_data()
	index = storing.VectorStoreIndex.from_documents(
		documents=documents, show_progress=True,
	)
	index.storage_context.persist(persist_dir=path_persist)
else:
	logger.info(f"Loading data from {path_persist}")
	storage_context = storing.StorageContext.from_defaults(persist_dir=path_persist)
	index = storing.load_index_from_storage(storage_context)

query_engine = index.as_query_engine()


### Car manual

In [None]:
file_path = f"{packages.APP_PATH}/data/org/test/manual_toyota_corolla_cross_2023.pdf"
path_persist = f"{packages.APP_PATH}/data/org/test/persist"

if len(os.listdir(path_persist)) < 1:
	logger.info(f"Loading data from {file_path}")
	loader = loading.PyMuPDFReader()
	documents = loader.load_data(file_path)
	index = storing.VectorStoreIndex.from_documents(
		documents=documents, show_progress=True,
	)
	index.storage_context.persist(persist_dir=path_persist)
else:
	logger.info(f"Loading data from {path_persist}")
	storage_context = storing.StorageContext.from_defaults(persist_dir=path_persist)
	index = storing.load_index_from_storage(storage_context)

retriever = querying.VectorIndexRetriever(
	index=index, similarity_top_k=10,
)

node_postprocessors = [
	querying.SimilarityPostprocessor(similarity_cutoff=0.4),
]
response_synthesizer = querying.get_response_synthesizer(
	response_mode=querying.ResponseMode.COMPACT, streaming=True
)
query_engine = querying.RetrieverQueryEngine(
	retriever=retriever,
	node_postprocessors=node_postprocessors,
	response_synthesizer=response_synthesizer,
)

## Qdrant

### Read directory

In [None]:
path_data = f"{packages.APP_PATH}/data/org/test"
path_persist = f"{packages.APP_PATH}/data/test/persist"

parser = loading.LlamaParse(
	api_key=os.getenv("LLAMA_CLOUD_API_KEY"),
	result_type="markdown",
	verbose=True,
	# num_workers=4,
	# language="en",
)
file_extractor = {
	".pdf": parser,
	".txt": parser,
}
documents = loading.SimpleDirectoryReader(
	input_dir=path_data, # or
	# input_files=[""],
	# file_extractor=file_extractor,
).load_data()


### Read file

In [None]:
file_path = f"{packages.APP_PATH}/data/org/test/manual_toyota_corolla_cross_2023.pdf"

collection_name = settings.chosen_qdrant_collections["car_manual"] # car_manual_OpenAI, car_manual_HF

index = storing.get_index(
	type_index="qdrant",
	client=settings.qdrant_client,
	collection_name=collection_name,
)

if index is None:
	logger.info(f"Creating data for collection: `{collection_name}`")

	#* FILE_PATH -> DOCUMENTS ----------------------------------------------------
	reader = loading.MyReader(file_path)
	documents = reader.load_data()
	#*----------------------------------------------------------------------------

	vector_store = storing.get_vector_store(
		client=settings.qdrant_client,
		collection_name=collection_name,
		type_store="qdrant",
	)

	storage_context = storing.StorageContext.from_defaults(vector_store=vector_store)
 
	index = storing.VectorStoreIndex.from_documents(
		documents=documents,
		storage_context=storage_context,
		show_progress=True,
	)

query_toolkit = querying.get_query_toolkit_from_vector_store_index(
	vector_store_index=index,
)
retriever = query_toolkit["retriever"]

# Add more doc: index.insert(doc)

# <span style="color: #1AC38DFF;">App</span>

Application-specific development workspace below


## Car Manual

<span style="color:rgb(50, 99, 213);">You simply need to execute this for RAG.</span>


In [None]:
file_path = f"{packages.APP_PATH}/data/org/test/manual_toyota_corolla_cross_2023.pdf"

collection_name = settings.chosen_qdrant_collections["car_manual"] # car_manual_OpenAI, car_manual_HF

index = storing.get_index(
	type_index="qdrant",
	client=settings.qdrant_client,
	collection_name=collection_name,
)

if index is None:
	logger.info(f"Creating data for collection: `{collection_name}`")

	#* FILE_PATH -> DOCUMENTS ----------------------------------------------------
	reader = loading.MyReader(file_path)
	documents = reader.load_data()
	#*----------------------------------------------------------------------------

	vector_store = storing.get_vector_store(
		client=settings.qdrant_client,
		collection_name=collection_name,
		type_store="qdrant",
	)

	storage_context = storing.StorageContext.from_defaults(vector_store=vector_store)
 
	index = storing.VectorStoreIndex.from_documents(
		documents=documents,
		storage_context=storage_context,
		show_progress=True,
	)

query_toolkit = querying.get_query_toolkit_from_vector_store_index(
	vector_store_index=index,
)
retriever = query_toolkit["retriever"]

# Add more doc: index.insert(doc)

## User Query Categorization

### Process

In [None]:
path_json_car_control = f"{packages.APP_PATH}/data/QnAs/car_control.json"
data_json_car_control = utils.read_json_file(path_json_car_control)

path_json_car_manual = f"{packages.APP_PATH}/data/QnAs/car_manual.json"
data_json_car_manual = utils.read_json_file(path_json_car_manual)

In [None]:
# Initialize result list
result = []

# Process car control data
for category_data in data_json_car_control:
    for question in category_data['questions']:
        result.append({
            "user_query": question,
            "user_query_category": "car_control"
        })

# Process car manual data
for manual_data in data_json_car_manual:
    result.append({
        "user_query": manual_data['question'],
        "user_query_category": "car_manual"
    })

utils.save_to_json(result, f"{packages.APP_PATH}/data/QnAs/user_query_category.json")

### VectorDB

In [None]:
file_path = f"{packages.APP_PATH}/data/QnAs/user_query_category.json"

collection_name = settings.chosen_qdrant_collections["user_query_category"]

index = storing.get_index(
	type_index="qdrant",
	client=settings.qdrant_client,
	collection_name=collection_name,
)

if index is None:
	logger.info(f"Creating data for collection: `{collection_name}`")

	#* FILE_PATH -> DOCUMENTS ----------------------------------------------------
	data_lst = utils.read_json_file(file_path)
	data_lst = [str(item) for item in data_lst]
	documents = [loading.Document(text=t) for t in data_lst]
	#*----------------------------------------------------------------------------
	
	vector_store = storing.get_vector_store(
		client=settings.qdrant_client,
		collection_name=collection_name,
		type_store="qdrant",
	)

	storage_context = storing.StorageContext.from_defaults(vector_store=vector_store)
 
	index = storing.VectorStoreIndex.from_documents(
		documents=documents,
		storage_context=storage_context,
		show_progress=True,
	)

query_toolkit = querying.get_query_toolkit_from_vector_store_index(
	vector_store_index=index, similarity_top_k=5
)
retriever = query_toolkit["retriever"]

# Add more doc: index.insert(doc)

In [None]:
file_paths = [
	# f"{packages.APP_PATH}/data/QnAs/user_query_category1.json",
	f"{packages.APP_PATH}/data/QnAs/user_query_category-[confirmation].json",
]

for file_path in file_paths:
	data_json = utils.read_json_file(file_path)
	data_json = [str(item) for item in data_json]
	documents = [loading.Document(text=t) for t in data_json]

	for doc in tqdm(documents):
		index.insert(doc)

In [None]:
data_json[0]

In [None]:
text = "{'user_query': 'Is the car locked? Is the car trunk opened?', 'user_query_category': 'car_control'}"
doc = loading.Document(text=text)
index.insert(doc)

In [None]:
# user_query = "Activate the AC mode. Increase front wiper speed. Help me Unlock doors"
user_query = "Yes"
# result = retriever.retrieve(user_query)
result = components.retriever_user_query_category.retrieve(user_query)

(await utils_llama_index.extract_retriever_results(result))

## Car Information Field Paths

In [None]:
field_paths = VehicleDB.db_mongo_vehicle.get_all_field_paths("v123")["field_paths"]

In [None]:
collection_name = settings.chosen_qdrant_collections["car_info_field_paths"]

index = storing.get_index(
	type_index="qdrant",
	client=settings.qdrant_client,
	collection_name=collection_name,
)

if index is None:
	logger.info(f"Creating data for collection: `{collection_name}`")

	#* FILE_PATH -> DOCUMENTS ----------------------------------------------------
	data_lst = field_paths
	data_lst = [str(item) for item in data_lst]
	documents = [loading.Document(text=t) for t in data_lst]
	#*----------------------------------------------------------------------------
	
	vector_store = storing.get_vector_store(
		client=settings.qdrant_client,
		collection_name=collection_name,
		type_store="qdrant",
	)

	storage_context = storing.StorageContext.from_defaults(vector_store=vector_store)
 
	index = storing.VectorStoreIndex.from_documents(
		documents=documents,
		storage_context=storage_context,
		show_progress=True,
	)

query_toolkit = querying.get_query_toolkit_from_vector_store_index(
	vector_store_index=index, similarity_top_k=5
)
retriever = query_toolkit["retriever"]