# Imports

In [1]:
import os
import packages

from context.utils import typer as t

from toolkit.utils import utils
from toolkit.utils.utils import rp_print
from toolkit.utils.llm import main as utils_llm

import context.instances as inst
import context.consts as const
import context.settings.main as settings_main

from toolkit.llm.langchain.core import integration, utils as utils_lc
from toolkit.llm.langchain.data.indexing import (
  documents, document_loaders, text_splitters,
)
from toolkit.llm.langchain.execution import runnables, graphs, tools, agents
from toolkit.llm.langchain.models import prompts, llms, messages

[32m2025-02-12 07:51:12.918[0m | [1mINFO    [0m | [36mpackages[0m:[36m<module>[0m:[36m68[0m - [1mapps directory: /home/hieudt71/IvyEdge/apps[0m
[32m2025-02-12 07:51:12.919[0m | [1mINFO    [0m | [36mpackages[0m:[36m<module>[0m:[36m69[0m - [1mToolkit path: /home/hieudt71/IvyEdge/apps/toolkit[0m
[32m2025-02-12 07:51:12.920[0m | [1mINFO    [0m | [36mpackages[0m:[36m<module>[0m:[36m70[0m - [1mEnvironment files loaded:[0m
[32m2025-02-12 07:51:12.921[0m | [1mINFO    [0m | [36mpackages[0m:[36m<module>[0m:[36m72[0m - [1m  - /home/hieudt71/IvyEdge/apps/ports.env[0m
[32m2025-02-12 07:51:12.922[0m | [1mINFO    [0m | [36mpackages[0m:[36m<module>[0m:[36m72[0m - [1m  - /home/hieudt71/IvyEdge/apps/.env[0m


  from .autonotebook import tqdm as notebook_tqdm
[32m2025-02-12 07:51:17.183[0m | [1mINFO    [0m | [36mtoolkit.db.mongodb[0m:[36mconnect[0m:[36m436[0m - [1mSuccessfully connected to MongoDB database: [36mapp[0m[0m
[32m2025-02-12 07:51:17.185[0m | [1mINFO    [0m | [36mtoolkit.db.mongodb[0m:[36mget_collection[0m:[36m447[0m - [1mCreating manager for collection: [32mvehicle[0m[0m
[32m2025-02-12 07:51:17.187[0m | [1mINFO    [0m | [36mtoolkit.db.redix[0m:[36mconnect[0m:[36m415[0m - [1mSuccessfully connected to Redis database: [36m0[0m[0m
[32m2025-02-12 07:51:22.472[0m | [1mINFO    [0m | [36mtoolkit.llm.langchain.models.llms[0m:[36mcreate_llm[0m:[36m78[0m - [1m🔹 [38;5;208mLLM[0m [35mOpenAI[0m [32mgpt-4o-mini[0m[0m
[32m2025-02-12 07:51:22.678[0m | [1mINFO    [0m | [36mtoolkit.llm.langchain.models.llms[0m:[36mcreate_llm[0m:[36m78[0m - [1m🔹 [38;5;208mLLM[0m [35mGoogle[0m [32mgemini-2.0-flash-exp[0m[0m
[32m2025-02-12 

# Data

## Vehicle Owner's Manual

In [3]:
os.getenv("LD_PRELOAD")

'/lib/aarch64-linux-gnu/libGLdispatch.so.0'

In [4]:
prompt_extract_headers = """\
Analyze the following document and extract a list of its section headers or titles, inferring them from the content. Return the headers/titles as a Python list (e.g., ['header 1', 'header 2', 'header 3']) and nothing more.

Input: INPUT

Output:
"""

prompt_assign_headers_to_snippet = """\
Given a snippet of text from a larger document and a list of existing headers from that document, analyze the text (no code) and determine which of the provided headers (or which combination of headers) best applies to the content of the snippet. Return the appropriate header(s) as a Python list (e.g., ['Header 1'] or ['Header 2', 'Header 3']) . If none of the provided headers is a good fit, return an empty list ([]).

Input Snippet: SNIPPET

Input Headers: HEADERS

Output:
"""

loader = document_loaders.PyPDFLoader(
	file_path=f"{packages.APP_PATH}/data/projects/manual.pdf"
)
docs = loader.load()

text_splitter = text_splitters.RecursiveCharacterTextSplitter(
	chunk_size=1000, chunk_overlap=200, add_start_index=True,
)
splits = text_splitter.split_documents(docs)

In [None]:
# # Optional
# headers = inst.llm_main.invoke(prompt_extract_headers.replace("INPUT", docs[0].page_content))

# for split in utils.tqdm(splits):
# 	split_headers = inst.llm_main.invoke(prompt_assign_headers_to_snippet\
# 											.replace("SNIPPET", split.page_content)\
# 											.replace("HEADERS", headers.content))
# 	split.metadata["headers"] = split_headers.content

In [6]:
document_ids = inst.vector_stores_qdrant["vehicle"].add_documents(documents=splits)

## User Query Category

### Pre-Process

In [7]:
base_path = f"{packages.APP_PATH}/data/projects/vehicle"

# Process car control data
data_control = utils.read_json_file(f"{base_path}/questions/control.json")["questions"]
control_result = []
for category in data_control:
    for question in category["questions"]:
        control_result.append({
            "input": question,
            "output": "car_control"
        })
utils.save_to_json(control_result, f"{base_path}/examples/user_query_category/control.json")

# Process car manual data
data_manual = utils.read_json_file(f"{base_path}/questions/manual.json")
manual_result = [
    {"input": item["question"], "output": "car_manual"}
    for item in data_manual
    if "question" in item
]
utils.save_to_json(manual_result, f"{base_path}/examples/user_query_category/manual.json")

Data successfully saved to /home/hieudt71/IvyEdge/apps/data/projects/vehicle/examples/user_query_category/control.json
Data successfully saved to /home/hieudt71/IvyEdge/apps/data/projects/vehicle/examples/user_query_category/manual.json


### VectorDB

In [8]:
base_path = f"{packages.APP_PATH}/data/projects/vehicle/examples/user_query_category"
file_paths = os.listdir(base_path)
file_paths = [f"{base_path}/{file_path}" for file_path in file_paths]

for file_path in utils.tqdm(file_paths):
		data_lst = utils.read_json_file(file_path)
		data_lst = [str(item) for item in data_lst]
		docs = [documents.Document(page_content=t) for t in data_lst]
		
		splits = docs

		document_ids = inst.vector_stores_qdrant["user_query_category"].add_documents(documents=splits)

100%|██████████| 5/5 [00:22<00:00,  4.58s/it]


In [None]:
# Test
retriever = inst.vector_stores_qdrant["user_query_category"].as_retriever()

tests = [
	"Activate the AC mode. Increase front wiper speed. Help me Unlock doors",
	"Yes",
]

user_query = tests[1]

result = retriever.invoke(user_query)

rp_print(result)

## Vehicle Properties Field Paths

In [9]:
from toolkit.db.mongodb import MongoDBCollectionManager, MongoDBDatabaseManager
import json
import asyncio

"""Test inserting vehicle data into MongoDB."""
print("\n=== Testing Vehicle Data Insert ===")

# Initialize database connection
db = MongoDBDatabaseManager(
		username="root",
		password="example",
		host="localhost",
		port="27017",
		db_name="app"
)

# Get the vehicles collection
vehicles_collection = db.get_collection("vehicles")

# Clean up: drop the collection first
print("\nCleaning up: dropping existing collection...")
drop_result = await vehicles_collection.drop_collection()
print(f"Drop collection result: {drop_result}")

# Insert the JSON file
print("\nInserting vehicle data...")
path = f"{packages.ROOT_PATH}/apps/data/projects/vehicle/vehicle_data.json"
result = await vehicles_collection.insert_json_file(path, doc_id="v123")
print(f"Insert result: {result}")

if result["success"]:
		# Verify the insertion
		print("\nVerifying insertion...")
		doc = await vehicles_collection.get_full_document(result["inserted_id"])
		print(f"\nVerification - Retrieved document:")
		print(json.dumps(doc, indent=2, default=MongoDBCollectionManager.json_encode))

field_paths = await vehicles_collection.get_all_field_paths("v123")
field_paths = field_paths["field_paths"]
data_lst = field_paths

data_lst = [str(item) for item in data_lst]
docs = [documents.Document(page_content=t) for t in data_lst]

splits = docs

document_ids = inst.vector_stores_qdrant["vehicle_properties_field_paths"].add_documents(documents=splits)

await db.close()

[32m2025-02-12 07:54:55.830[0m | [1mINFO    [0m | [36mtoolkit.db.mongodb[0m:[36mconnect[0m:[36m436[0m - [1mSuccessfully connected to MongoDB database: [36mapp[0m[0m
[32m2025-02-12 07:54:55.832[0m | [1mINFO    [0m | [36mtoolkit.db.mongodb[0m:[36mget_collection[0m:[36m447[0m - [1mCreating manager for collection: [32mvehicles[0m[0m
[32m2025-02-12 07:54:55.856[0m | [1mINFO    [0m | [36mtoolkit.db.mongodb[0m:[36mdrop_collection[0m:[36m21[0m - [1mSuccessfully dropped collection: vehicles[0m
[32m2025-02-12 07:54:55.858[0m | [1mINFO    [0m | [36mtoolkit.db.mongodb[0m:[36minsert_json_file[0m:[36m362[0m - [1mReading JSON file from: /home/hieudt71/IvyEdge/apps/data/projects/vehicle/vehicle_data.json[0m
[32m2025-02-12 07:54:55.879[0m | [1mINFO    [0m | [36mtoolkit.db.mongodb[0m:[36minsert_json_file[0m:[36m377[0m - [1mSuccessfully inserted document with ID: v123[0m
[32m2025-02-12 07:54:55.880[0m | [1mINFO    [0m | [36mtoolkit.db.m


=== Testing Vehicle Data Insert ===

Cleaning up: dropping existing collection...
Drop collection result: {'success': True, 'message': 'Dropped collection: vehicles'}

Inserting vehicle data...
Insert result: {'success': True, 'inserted_id': 'v123', 'message': 'Successfully inserted document from /home/hieudt71/IvyEdge/apps/data/projects/vehicle/vehicle_data.json'}

Verifying insertion...

Verification - Retrieved document:
{
  "success": true,
  "document": {
    "_id": "v123",
    "vin": "1HGCM82633A123456",
    "metadata": {
      "make": "Toyota",
      "model": "Camry",
      "year": 2024,
      "type": "sedan",
      "color": "silver",
      "features": [
        "hybrid",
        "automatic",
        "sunroof",
        "lane_assist",
        "adaptive_cruise"
      ],
      "registration": {
        "plate": "ABC123",
        "state": "CA",
        "expirationDate": "2024-12-31T00:00:00Z"
      }
    },
    "currentState": {
      "lastUpdated": "2024-02-05T10:30:00Z",
      "i

[32m2025-02-12 07:55:12.340[0m | [1mINFO    [0m | [36mtoolkit.db.mongodb[0m:[36mclose[0m:[36m455[0m - [1mMongoDB connection closed[0m
