In [1]:
import logging
import os
import asyncio

from ogmyrag.report_retrieval.report_retrieval import ReportRetrievalManager
from ogmyrag.report_retrieval.retrieval_storage import RetrievalAsyncStorageManager
from ogmyrag.report_scraper.models import ReportType
from ogmyrag.my_logging import configure_logger
from ogmyrag.storage import PineconeStorage
from ogmyrag.report_retrieval.report_chunker import rag_answer_with_company_detection

from dotenv import load_dotenv

retrieval_logger = configure_logger(name='retrieval',log_level=logging.INFO, log_file='logs/retrieval.log')
retrieval_logger.info("\n" + "=" * 80)

load_dotenv(override=True)

mongo_db_uri = os.getenv("MONGO_DB_URI_JJ","")
pinecone_api_key = os.getenv("PINECONE_API_KEY_JJ","")
genai_api_key = os.getenv("GENAI_API_KEY_JJ","")
openai_api_key = os.getenv("OPENAI_API_KEY", "")

INDEX_NAME = "company-disclosures-index"
EMBED_MODEL = "text-embedding-3-small"
DIMENSION = 1536
GENAI_MODEL = "gemini-2.5-pro"
#GENAI_MODEL = "gemini-2.5-flash"
OPENAI_MODEL = "gpt-5-nano"

db_name = "FYP"
storage = RetrievalAsyncStorageManager(mongo_uri=mongo_db_uri, db_name=db_name)


pine = PineconeStorage(
    index_name = INDEX_NAME,
    pinecone_api_key = pinecone_api_key,
    pinecone_environment = "us-east-1",
    pinecone_cloud = "aws",
    pinecone_metric = "cosine",
    pinecone_dimensions = DIMENSION,
    openai_api_key = openai_api_key
)

manager = ReportRetrievalManager(
    storage = storage,
    pine = pine,
    genai_model = GENAI_MODEL,
    genai_api_key = genai_api_key,
    openai_api_key = openai_api_key,
    #dry_run = False
)

2025-08-26 12:01:44,935 - retrieval - INFO - 
2025-08-26 12:01:44,945 - retrieval - INFO - Connected to MongoDB database: FYP


## Process Financial Reports (PDF)

In [2]:
await manager.parse_report(
    company = "FARM_FRESH_BERHAD",
    report_type = ReportType.ANNUAL,
    year = 2024,
    #forced_process = True
)

2025-08-26 10:17:08,733 - retrieval - INFO - Already processed and up to date.
2025-08-26 10:17:08,734 - retrieval - INFO - Skipping processing, using existing content.
2025-08-26 10:17:08,735 - retrieval - INFO - Extracting all the processed content.
2025-08-26 10:17:08,884 - retrieval - INFO - Combining all the processed content.
2025-08-26 10:17:08,885 - retrieval - INFO - Processed content ready.
2025-08-26 10:17:08,889 - retrieval - INFO - Saved processed report to ./processed_report/FARM_FRESH_BERHAD/FARM_FRESH_BERHAD_ANNUAL_2024.md


In [6]:
await manager.parse_report(
    company = "EDELTEQ HOLDINGS BERHAD",
    report_type = ReportType.IPO,
    #forced_process = True
)

2025-08-26 10:21:32,314 - retrieval - INFO - Already processed and up to date.
2025-08-26 10:21:32,316 - retrieval - INFO - Skipping processing, using existing content.
2025-08-26 10:21:32,317 - retrieval - INFO - Extracting all the processed content.
2025-08-26 10:21:32,470 - retrieval - INFO - Combining all the processed content.
2025-08-26 10:21:32,471 - retrieval - INFO - Processed content ready.
2025-08-26 10:21:32,473 - retrieval - INFO - Saved processed report to ./processed_report/EDELTEQ_HOLDINGS_BERHAD/EDELTEQ_HOLDINGS_BERHAD_IPO.md


In [3]:
await manager.parse_report(
    company = "VETECE_HOLDINGS_BERHAD",
    report_type = ReportType.IPO,
    #forced_process = True
)

2025-08-26 09:35:51,387 - retrieval - INFO - Already processed and up to date.
2025-08-26 09:35:51,388 - retrieval - INFO - Skipping processing, using existing content.
2025-08-26 09:35:51,388 - retrieval - INFO - Extracting all the processed content.
2025-08-26 09:35:51,533 - retrieval - INFO - Combining all the processed content.
2025-08-26 09:35:51,534 - retrieval - INFO - Processed content ready.
2025-08-26 09:35:51,538 - retrieval - INFO - Saved processed report to ./processed_report/VETECE_HOLDINGS_BERHAD/VETECE_HOLDINGS_BERHAD_IPO.md


In [4]:
await manager.parse_report(
    company = "CABNET_HOLDINGS_BERHAD",
    report_type = ReportType.IPO,
    #forced_process = True
)

2025-08-26 09:35:58,782 - retrieval - INFO - Already processed and up to date.
2025-08-26 09:35:58,783 - retrieval - INFO - Skipping processing, using existing content.
2025-08-26 09:35:58,784 - retrieval - INFO - Extracting all the processed content.
2025-08-26 09:35:58,951 - retrieval - INFO - Combining all the processed content.
2025-08-26 09:35:58,951 - retrieval - INFO - Processed content ready.
2025-08-26 09:35:58,955 - retrieval - INFO - Saved processed report to ./processed_report/CABNET_HOLDINGS_BERHAD/CABNET_HOLDINGS_BERHAD_IPO.md


In [7]:
await manager.parse_report(
    company = "AUTOCOUNT_DOTCOM_BERHAD",
    report_type = ReportType.IPO,
    #forced_process = True
)

2025-08-26 10:21:36,639 - retrieval - INFO - Already processed and up to date.
2025-08-26 10:21:36,640 - retrieval - INFO - Skipping processing, using existing content.
2025-08-26 10:21:36,641 - retrieval - INFO - Extracting all the processed content.
2025-08-26 10:21:36,791 - retrieval - INFO - Combining all the processed content.
2025-08-26 10:21:36,792 - retrieval - INFO - Processed content ready.
2025-08-26 10:21:36,794 - retrieval - INFO - Saved processed report to ./processed_report/AUTOCOUNT_DOTCOM_BERHAD/AUTOCOUNT_DOTCOM_BERHAD_IPO.md


In [8]:
await manager.parse_report(
    company = "ICT ZONE ASIA BERHAD",
    report_type = ReportType.IPO,
    #forced_process = True
)

2025-08-26 10:21:37,748 - retrieval - INFO - Already processed and up to date.
2025-08-26 10:21:37,748 - retrieval - INFO - Skipping processing, using existing content.
2025-08-26 10:21:37,749 - retrieval - INFO - Extracting all the processed content.
2025-08-26 10:21:37,854 - retrieval - INFO - Combining all the processed content.
2025-08-26 10:21:37,855 - retrieval - INFO - Processed content ready.
2025-08-26 10:21:37,858 - retrieval - INFO - Saved processed report to ./processed_report/ICT_ZONE_ASIA_BERHAD/ICT_ZONE_ASIA_BERHAD_IPO.md


## Simple User Query

In [13]:
out = await rag_answer_with_company_detection(
    pine,
    query = "Who is Joyce Wong Ai May? What is his/her position?",
    top_k = 5
)

2025-08-26 12:06:16,903 - retrieval - INFO - RAG start | query='Who is Joyce Wong Ai May? What is his/her position?' | top_k=5
2025-08-26 12:06:17,518 - retrieval - INFO - Catalog companies (6): ['AUTOCOUNT_DOTCOM_BERHAD', 'CABNET_HOLDINGS_BERHAD', 'EDELTEQ_HOLDINGS_BERHAD', 'FARM_FRESH_BERHAD', 'ICT_ZONE_ASIA_BERHAD', 'VETECE_HOLDINGS_BERHAD']
2025-08-26 12:06:21,739 - retrieval - INFO - Detected company: None
2025-08-26 12:06:21,741 - retrieval - INFO - Search query unchanged.
2025-08-26 12:06:21,741 - retrieval - INFO - Query filter: {}
2025-08-26 12:06:27,529 - retrieval - INFO - Final answer: 

Joyce Wong Ai May is the Independent Non-Executive Director of Edelteq Holdings Berhad, appointed to the Board on 1 August 2022.


2025-08-26 12:06:27,531 - retrieval - INFO - Token usage | detect={'prompt_tokens': 358, 'completion_tokens': 544, 'total_tokens': 902} | answer={'prompt_tokens': 1430, 'completion_tokens': 809, 'total_tokens': 2239} | total=3141


In [12]:
out = await rag_answer_with_company_detection(
    pine,
    query = "Who is Liew Soung Yue?",
    top_k = 5
)

2025-08-26 12:06:05,144 - retrieval - INFO - RAG start | query='Who is Liew Soung Yue?' | top_k=5
2025-08-26 12:06:05,721 - retrieval - INFO - Catalog companies (6): ['AUTOCOUNT_DOTCOM_BERHAD', 'CABNET_HOLDINGS_BERHAD', 'EDELTEQ_HOLDINGS_BERHAD', 'FARM_FRESH_BERHAD', 'ICT_ZONE_ASIA_BERHAD', 'VETECE_HOLDINGS_BERHAD']
2025-08-26 12:06:11,125 - retrieval - INFO - Detected company: None
2025-08-26 12:06:11,126 - retrieval - INFO - Search query unchanged.
2025-08-26 12:06:11,127 - retrieval - INFO - Query filter: {}
2025-08-26 12:06:16,889 - retrieval - INFO - Final answer: 

Dr. Liew Soung Yue is an Independent Non-Executive Director of AutoCount Dotcom Berhad. He was appointed to the Board on 6 April 2022, serves as Chairman of the Nomination Committee, and is a member of the Audit and Risk Management Committee and Remuneration Committee. He holds a PhD in Information Engineering from The Chinese University of Hong Kong (1999).


2025-08-26 12:06:16,890 - retrieval - INFO - Token usage | 

In [9]:
out = await rag_answer_with_company_detection(
    pine,
    query = "Who are the board of directors of AutoCount? Tell more story or background about each of them.",
    top_k = 5
)

2025-08-26 12:05:12,416 - retrieval - INFO - RAG start | query='Who are the board of directors of AutoCount? Tell more story or background about each of them.' | top_k=5
2025-08-26 12:05:12,986 - retrieval - INFO - Catalog companies (6): ['AUTOCOUNT_DOTCOM_BERHAD', 'CABNET_HOLDINGS_BERHAD', 'EDELTEQ_HOLDINGS_BERHAD', 'FARM_FRESH_BERHAD', 'ICT_ZONE_ASIA_BERHAD', 'VETECE_HOLDINGS_BERHAD']
2025-08-26 12:05:24,694 - retrieval - INFO - Detected company: 'AUTOCOUNT_DOTCOM_BERHAD'
2025-08-26 12:05:24,696 - retrieval - INFO - Search query normalized: 'Who are the board of directors of AutoCount? Tell more story or background about each of them.' → 'Who are the board of directors? Tell more story or background about each of them.'
2025-08-26 12:05:24,696 - retrieval - INFO - Query filter: {'from_company': 'AUTOCOUNT_DOTCOM_BERHAD'}
2025-08-26 12:05:37,620 - retrieval - INFO - Final answer: 

Board of Directors (names and roles per the documents)

- Choo Chin Peng (CCP) — Executive Director / Ch

In [10]:
out = await rag_answer_with_company_detection(
    pine,
    query = "What are the mission and vision of autocount dotcom berhad?",
    top_k = 5
)

2025-08-26 12:05:37,638 - retrieval - INFO - RAG start | query='What are the mission and vision of autocount dotcom berhad?' | top_k=5
2025-08-26 12:05:38,214 - retrieval - INFO - Catalog companies (6): ['AUTOCOUNT_DOTCOM_BERHAD', 'CABNET_HOLDINGS_BERHAD', 'EDELTEQ_HOLDINGS_BERHAD', 'FARM_FRESH_BERHAD', 'ICT_ZONE_ASIA_BERHAD', 'VETECE_HOLDINGS_BERHAD']
2025-08-26 12:05:44,491 - retrieval - INFO - Detected company: 'AUTOCOUNT_DOTCOM_BERHAD'
2025-08-26 12:05:44,491 - retrieval - INFO - Search query normalized: 'What are the mission and vision of autocount dotcom berhad?' → 'What are the mission and vision?'
2025-08-26 12:05:44,492 - retrieval - INFO - Query filter: {'from_company': 'AUTOCOUNT_DOTCOM_BERHAD'}
2025-08-26 12:05:49,079 - retrieval - INFO - Final answer: 

Not found in provided documents.


2025-08-26 12:05:49,080 - retrieval - INFO - Token usage | detect={'prompt_tokens': 359, 'completion_tokens': 1186, 'total_tokens': 1545} | answer={'prompt_tokens': 1383, 'completion_token

In [11]:
out = await rag_answer_with_company_detection(
    pine,
    query = "What the company CABNET do?",
    top_k = 5
)

2025-08-26 12:05:49,090 - retrieval - INFO - RAG start | query='What the company CABNET do?' | top_k=5
2025-08-26 12:05:49,670 - retrieval - INFO - Catalog companies (6): ['AUTOCOUNT_DOTCOM_BERHAD', 'CABNET_HOLDINGS_BERHAD', 'EDELTEQ_HOLDINGS_BERHAD', 'FARM_FRESH_BERHAD', 'ICT_ZONE_ASIA_BERHAD', 'VETECE_HOLDINGS_BERHAD']
2025-08-26 12:06:00,430 - retrieval - INFO - Detected company: 'CABNET_HOLDINGS_BERHAD'
2025-08-26 12:06:00,431 - retrieval - INFO - Search query normalized: 'What the company CABNET do?' → 'What the company do?'
2025-08-26 12:06:00,432 - retrieval - INFO - Query filter: {'from_company': 'CABNET_HOLDINGS_BERHAD'}
2025-08-26 12:06:05,128 - retrieval - INFO - Final answer: 

Cabnet Holdings Berhad provides building management solutions, including structured cabling works and ELV systems for buildings and facilities, and IT services. Their activities cover design, supply, build, testing and commissioning, plus project management, training, maintenance and aftersales servi

In [8]:
out = await rag_answer_with_company_detection(
    pine,
    query = "What is the vision and mission of farm fresh? Elaborate more.",
    top_k = 8
)

2025-08-26 12:04:26,908 - retrieval - INFO - RAG start | query='What is the vision and mission of farm fresh? Elaborate more.' | top_k=8
2025-08-26 12:04:27,469 - retrieval - INFO - Catalog companies (6): ['AUTOCOUNT_DOTCOM_BERHAD', 'CABNET_HOLDINGS_BERHAD', 'EDELTEQ_HOLDINGS_BERHAD', 'FARM_FRESH_BERHAD', 'ICT_ZONE_ASIA_BERHAD', 'VETECE_HOLDINGS_BERHAD']
2025-08-26 12:04:39,390 - retrieval - INFO - Detected company: 'FARM_FRESH_BERHAD'
2025-08-26 12:04:39,392 - retrieval - INFO - Search query normalized: 'What is the vision and mission of farm fresh? Elaborate more.' → 'What is the vision and mission? Elaborate more.'
2025-08-26 12:04:39,394 - retrieval - INFO - Query filter: {'from_company': 'FARM_FRESH_BERHAD'}
2025-08-26 12:04:53,710 - retrieval - INFO - Final answer: 

Vision:
- To be a sustainable and honest food company that places the wellbeing of consumers first, culminating in strong brand love. The company also articulates this as a vision of being sustainable, honest, and co