In [1]:
import logging
import os
import asyncio

from ogmyrag.report_retrieval.report_retrieval import ReportRetrievalManager
from ogmyrag.report_retrieval.retrieval_embedder import RetrievalEmbedder
from ogmyrag.report_retrieval.retrieval_extractor import RetrievalExtractor
from ogmyrag.report_retrieval.retrieval_storage import RetrievalAsyncStorageManager
from ogmyrag.report_scraper.models import ReportType
from ogmyrag.my_logging import configure_logger
from ogmyrag.storage import PineconeStorage
from ogmyrag.report_retrieval.report_chunker import rag_answer_with_company_detection

from dotenv import load_dotenv

retrieval_logger = configure_logger(name='retrieval',log_level=logging.INFO, log_file='logs/retrieval.log')
retrieval_logger.info("\n" + "=" * 80)

load_dotenv(override=True)

mongo_db_uri = os.getenv("MONGO_DB_URI_JJ","")
pinecone_api_key = os.getenv("PINECONE_API_KEY_JJ","")
genai_api_key = os.getenv("GENAI_API_KEY_JJ","")
openai_api_key = os.getenv("OPENAI_API_KEY", "")

INDEX_NAME = "company-disclosures-index"
EMBED_MODEL = "text-embedding-3-small"
DIMENSION = 1536
GENAI_MODEL = "gemini-2.5-pro"
#GENAI_MODEL = "gemini-2.5-flash"
OPENAI_MODEL = "gpt-5-nano"

db_name = "FYP"
storage = RetrievalAsyncStorageManager(mongo_uri=mongo_db_uri, db_name=db_name)

embedder = RetrievalEmbedder(
    openai_api_key = openai_api_key,
    pinecone_api_key = pinecone_api_key,
    embed_model = EMBED_MODEL,
    index_name = INDEX_NAME,
    dimension = DIMENSION
)

extractor = RetrievalExtractor(
    openai_api_key = openai_api_key,
    pinecone_api_key = pinecone_api_key,
    embed_model = EMBED_MODEL,
    index_name = INDEX_NAME
)

pine = PineconeStorage(
    index_name = INDEX_NAME,
    pinecone_api_key = pinecone_api_key,
    pinecone_environment = "us-east-1",
    pinecone_cloud = "aws",
    pinecone_metric = "cosine",
    pinecone_dimensions = DIMENSION,
    openai_api_key = openai_api_key
)

manager = ReportRetrievalManager(
    storage = storage,
    embedder = embedder,
    extractor = extractor,
    pine = pine,
    genai_model = GENAI_MODEL,
    genai_api_key = genai_api_key,
    openai_api_key = openai_api_key,
    #dry_run = False
)

2025-08-26 11:35:56,677 - retrieval - INFO - 
2025-08-26 11:35:56,686 - retrieval - INFO - Connected to MongoDB database: FYP


## Process Financial Reports (PDF)

In [2]:
await manager.parse_report(
    company = "FARM_FRESH_BERHAD",
    report_type = ReportType.ANNUAL,
    year = 2024,
    #forced_process = True
)

2025-08-26 10:17:08,733 - retrieval - INFO - Already processed and up to date.
2025-08-26 10:17:08,734 - retrieval - INFO - Skipping processing, using existing content.
2025-08-26 10:17:08,735 - retrieval - INFO - Extracting all the processed content.
2025-08-26 10:17:08,884 - retrieval - INFO - Combining all the processed content.
2025-08-26 10:17:08,885 - retrieval - INFO - Processed content ready.
2025-08-26 10:17:08,889 - retrieval - INFO - Saved processed report to ./processed_report/FARM_FRESH_BERHAD/FARM_FRESH_BERHAD_ANNUAL_2024.md


In [6]:
await manager.parse_report(
    company = "EDELTEQ HOLDINGS BERHAD",
    report_type = ReportType.IPO,
    #forced_process = True
)

2025-08-26 10:21:32,314 - retrieval - INFO - Already processed and up to date.
2025-08-26 10:21:32,316 - retrieval - INFO - Skipping processing, using existing content.
2025-08-26 10:21:32,317 - retrieval - INFO - Extracting all the processed content.
2025-08-26 10:21:32,470 - retrieval - INFO - Combining all the processed content.
2025-08-26 10:21:32,471 - retrieval - INFO - Processed content ready.
2025-08-26 10:21:32,473 - retrieval - INFO - Saved processed report to ./processed_report/EDELTEQ_HOLDINGS_BERHAD/EDELTEQ_HOLDINGS_BERHAD_IPO.md


In [3]:
await manager.parse_report(
    company = "VETECE_HOLDINGS_BERHAD",
    report_type = ReportType.IPO,
    #forced_process = True
)

2025-08-26 09:35:51,387 - retrieval - INFO - Already processed and up to date.
2025-08-26 09:35:51,388 - retrieval - INFO - Skipping processing, using existing content.
2025-08-26 09:35:51,388 - retrieval - INFO - Extracting all the processed content.
2025-08-26 09:35:51,533 - retrieval - INFO - Combining all the processed content.
2025-08-26 09:35:51,534 - retrieval - INFO - Processed content ready.
2025-08-26 09:35:51,538 - retrieval - INFO - Saved processed report to ./processed_report/VETECE_HOLDINGS_BERHAD/VETECE_HOLDINGS_BERHAD_IPO.md


In [4]:
await manager.parse_report(
    company = "CABNET_HOLDINGS_BERHAD",
    report_type = ReportType.IPO,
    #forced_process = True
)

2025-08-26 09:35:58,782 - retrieval - INFO - Already processed and up to date.
2025-08-26 09:35:58,783 - retrieval - INFO - Skipping processing, using existing content.
2025-08-26 09:35:58,784 - retrieval - INFO - Extracting all the processed content.
2025-08-26 09:35:58,951 - retrieval - INFO - Combining all the processed content.
2025-08-26 09:35:58,951 - retrieval - INFO - Processed content ready.
2025-08-26 09:35:58,955 - retrieval - INFO - Saved processed report to ./processed_report/CABNET_HOLDINGS_BERHAD/CABNET_HOLDINGS_BERHAD_IPO.md


In [7]:
await manager.parse_report(
    company = "AUTOCOUNT_DOTCOM_BERHAD",
    report_type = ReportType.IPO,
    #forced_process = True
)

2025-08-26 10:21:36,639 - retrieval - INFO - Already processed and up to date.
2025-08-26 10:21:36,640 - retrieval - INFO - Skipping processing, using existing content.
2025-08-26 10:21:36,641 - retrieval - INFO - Extracting all the processed content.
2025-08-26 10:21:36,791 - retrieval - INFO - Combining all the processed content.
2025-08-26 10:21:36,792 - retrieval - INFO - Processed content ready.
2025-08-26 10:21:36,794 - retrieval - INFO - Saved processed report to ./processed_report/AUTOCOUNT_DOTCOM_BERHAD/AUTOCOUNT_DOTCOM_BERHAD_IPO.md


In [8]:
await manager.parse_report(
    company = "ICT ZONE ASIA BERHAD",
    report_type = ReportType.IPO,
    #forced_process = True
)

2025-08-26 10:21:37,748 - retrieval - INFO - Already processed and up to date.
2025-08-26 10:21:37,748 - retrieval - INFO - Skipping processing, using existing content.
2025-08-26 10:21:37,749 - retrieval - INFO - Extracting all the processed content.
2025-08-26 10:21:37,854 - retrieval - INFO - Combining all the processed content.
2025-08-26 10:21:37,855 - retrieval - INFO - Processed content ready.
2025-08-26 10:21:37,858 - retrieval - INFO - Saved processed report to ./processed_report/ICT_ZONE_ASIA_BERHAD/ICT_ZONE_ASIA_BERHAD_IPO.md


## Simple User Query

In [2]:
out = await rag_answer_with_company_detection(
    pine,
    query = "Who is Joyce Wong Ai May? What is his/her position?",
    top_k = 5
)

2025-08-26 11:36:01,495 - retrieval - INFO - RAG start | query='Who is Joyce Wong Ai May? What is his/her position?' | top_k=5
2025-08-26 11:36:03,777 - retrieval - INFO - Catalog companies (6): ['AUTOCOUNT_DOTCOM_BERHAD', 'CABNET_HOLDINGS_BERHAD', 'EDELTEQ_HOLDINGS_BERHAD', 'FARM_FRESH_BERHAD', 'ICT_ZONE_ASIA_BERHAD', 'VETECE_HOLDINGS_BERHAD']
2025-08-26 11:36:10,143 - retrieval - INFO - Detected company: None
2025-08-26 11:36:10,144 - retrieval - INFO - Search query unchanged.
2025-08-26 11:36:10,145 - retrieval - INFO - Query filter: {}
2025-08-26 11:36:11,313 - retrieval - INFO - Top-5 chunks retrieved: 5
2025-08-26 11:36:11,314 - retrieval - INFO - Hit #1 | id=EDELTEQ_HOLDINGS_BERHAD_IPO_SECTION_5_CHUNK_115 | score=0.628 | company=EDELTEQ_HOLDINGS_BERHAD | section=5. INFORMATION ON PROMOTERS, SUBSTANTIAL SHAREHOLDERS, DIRECTORS AND KEY SENIOR MANAGEMENT | text=Subsequently, she co-founded Desa Janajaya Sdn Bhd, a company which is principally involved in agrotechnology solutions, i

In [3]:
out = await rag_answer_with_company_detection(
    pine,
    query = "Who is Liew Soung Yue?",
    top_k = 5
)

2025-08-26 11:36:23,114 - retrieval - INFO - RAG start | query='Who is Liew Soung Yue?' | top_k=5
2025-08-26 11:36:23,808 - retrieval - INFO - Catalog companies (6): ['AUTOCOUNT_DOTCOM_BERHAD', 'CABNET_HOLDINGS_BERHAD', 'EDELTEQ_HOLDINGS_BERHAD', 'FARM_FRESH_BERHAD', 'ICT_ZONE_ASIA_BERHAD', 'VETECE_HOLDINGS_BERHAD']
2025-08-26 11:36:28,266 - retrieval - INFO - Detected company: None
2025-08-26 11:36:28,267 - retrieval - INFO - Search query unchanged.
2025-08-26 11:36:28,268 - retrieval - INFO - Query filter: {}
2025-08-26 11:36:30,624 - retrieval - INFO - Top-5 chunks retrieved: 5
2025-08-26 11:36:30,625 - retrieval - INFO - Hit #1 | id=AUTOCOUNT_DOTCOM_BERHAD_IPO_SECTION_9_CHUNK_111 | score=0.602 | company=AUTOCOUNT_DOTCOM_BERHAD | section=9. INFORMATION ON PROMOTERS, SUBSTANTIAL SHAREHOLDERS, DIRECTORS AND KEY SENIOR MANAGEMENT | text=and Chief Operating Officer where she led and drove the successful implementation of the company’s vision and strategy and provided oversight and guida

In [11]:
out = await rag_answer_with_company_detection(
    pine,
    query = "Who are the Board of Directors of AutoCount",
    top_k = 5
)

2025-08-26 11:42:59,475 - retrieval - INFO - RAG start | query='Who are the Board of Directors of AutoCount' | top_k=5
2025-08-26 11:43:01,507 - retrieval - INFO - Catalog companies (6): ['AUTOCOUNT_DOTCOM_BERHAD', 'CABNET_HOLDINGS_BERHAD', 'EDELTEQ_HOLDINGS_BERHAD', 'FARM_FRESH_BERHAD', 'ICT_ZONE_ASIA_BERHAD', 'VETECE_HOLDINGS_BERHAD']
2025-08-26 11:43:18,719 - retrieval - INFO - Detected company: 'AUTOCOUNT_DOTCOM_BERHAD'
2025-08-26 11:43:18,720 - retrieval - INFO - Search query normalized: 'Who are the Board of Directors of AutoCount' → 'Who are the Board of Directors'
2025-08-26 11:43:18,721 - retrieval - INFO - Query filter: {'from_company': 'AUTOCOUNT_DOTCOM_BERHAD'}
2025-08-26 11:43:19,670 - retrieval - INFO - Top-5 chunks retrieved: 5
2025-08-26 11:43:19,671 - retrieval - INFO - Hit #1 | id=AUTOCOUNT_DOTCOM_BERHAD_IPO_SECTION_3_CHUNK_47 | score=0.577 | company=AUTOCOUNT_DOTCOM_BERHAD | section=3. PROSPECTUS SUMMARY | text=Our Directors and Key Senior Management are as follows: 

In [5]:
out = await rag_answer_with_company_detection(
    pine,
    query = "What are the mission and vision of autocount dotcom berhad?",
    top_k = 5
)

2025-08-26 11:37:12,686 - retrieval - INFO - RAG start | query='What are the mission and vision of autocount dotcom berhad?' | top_k=5
2025-08-26 11:37:13,241 - retrieval - INFO - Catalog companies (6): ['AUTOCOUNT_DOTCOM_BERHAD', 'CABNET_HOLDINGS_BERHAD', 'EDELTEQ_HOLDINGS_BERHAD', 'FARM_FRESH_BERHAD', 'ICT_ZONE_ASIA_BERHAD', 'VETECE_HOLDINGS_BERHAD']
2025-08-26 11:37:21,210 - retrieval - INFO - Detected company: 'AUTOCOUNT_DOTCOM_BERHAD'
2025-08-26 11:37:21,212 - retrieval - INFO - Search query normalized: 'What are the mission and vision of autocount dotcom berhad?' → 'What are the mission and vision?'
2025-08-26 11:37:21,212 - retrieval - INFO - Query filter: {'from_company': 'AUTOCOUNT_DOTCOM_BERHAD'}
2025-08-26 11:37:22,145 - retrieval - INFO - Top-5 chunks retrieved: 5
2025-08-26 11:37:22,146 - retrieval - INFO - Hit #1 | id=AUTOCOUNT_DOTCOM_BERHAD_IPO_SECTION_3_CHUNK_10 | score=0.339 | company=AUTOCOUNT_DOTCOM_BERHAD | section=3. PROSPECTUS SUMMARY | text=As we emphasise on pro

In [6]:
out = await rag_answer_with_company_detection(
    pine,
    query = "What the company CABNET do?",
    top_k = 5
)

2025-08-26 11:37:30,489 - retrieval - INFO - RAG start | query='What the company CABNET do?' | top_k=5
2025-08-26 11:37:31,046 - retrieval - INFO - Catalog companies (6): ['AUTOCOUNT_DOTCOM_BERHAD', 'CABNET_HOLDINGS_BERHAD', 'EDELTEQ_HOLDINGS_BERHAD', 'FARM_FRESH_BERHAD', 'ICT_ZONE_ASIA_BERHAD', 'VETECE_HOLDINGS_BERHAD']
2025-08-26 11:37:38,631 - retrieval - INFO - Detected company: 'CABNET_HOLDINGS_BERHAD'
2025-08-26 11:37:38,633 - retrieval - INFO - Search query normalized: 'What the company CABNET do?' → 'What the company do?'
2025-08-26 11:37:38,634 - retrieval - INFO - Query filter: {'from_company': 'CABNET_HOLDINGS_BERHAD'}
2025-08-26 11:37:44,161 - retrieval - INFO - Top-5 chunks retrieved: 5
2025-08-26 11:37:44,163 - retrieval - INFO - Hit #1 | id=CABNET_HOLDINGS_BERHAD_IPO_SECTION_2_CHUNK_23 | score=0.428 | company=CABNET_HOLDINGS_BERHAD | section=2. INFORMATION SUMMARY | text=Table: [Our subsidiary companies and their principal activities] (p. 4) - Subsidiary companies: Cabne

In [9]:
out = await rag_answer_with_company_detection(
    pine,
    query = "What is the vision and mission of farm fresh?",
    top_k = 8
)

2025-08-26 11:40:23,743 - retrieval - INFO - RAG start | query='What is the vision and mission of farm fresh?' | top_k=8
2025-08-26 11:40:25,972 - retrieval - INFO - Catalog companies (6): ['AUTOCOUNT_DOTCOM_BERHAD', 'CABNET_HOLDINGS_BERHAD', 'EDELTEQ_HOLDINGS_BERHAD', 'FARM_FRESH_BERHAD', 'ICT_ZONE_ASIA_BERHAD', 'VETECE_HOLDINGS_BERHAD']
2025-08-26 11:40:32,200 - retrieval - INFO - Detected company: 'FARM_FRESH_BERHAD'
2025-08-26 11:40:32,201 - retrieval - INFO - Search query normalized: 'What is the vision and mission of farm fresh?' → 'What is the vision and mission?'
2025-08-26 11:40:32,202 - retrieval - INFO - Query filter: {'from_company': 'FARM_FRESH_BERHAD'}
2025-08-26 11:40:33,635 - retrieval - INFO - Top-8 chunks retrieved: 8
2025-08-26 11:40:33,637 - retrieval - INFO - Hit #1 | id=FARM_FRESH_BERHAD_ANNUAL_2024_SECTION_1_CHUNK_34 | score=0.431 | company=FARM_FRESH_BERHAD | section=1. Overview of Farm Fresh Berhad | text=We have established and adhere to the following shared v

In [4]:
manager.answer_query(
    company = "FARM_FRESH_BERHAD",
    query = "What is the mission and vision?",
    top_k = 5,
    chat_model = OPENAI_MODEL
)

2025-08-03 11:40:56,916 - retrieval - INFO - Embedding query: 'What is the mission and vision?'
2025-08-03 11:40:58,284 - retrieval - INFO - Retrieved chunks: ['**Vision & mission statement:**', '*   **Our Promise (Vision):** "TO BE A SUSTAINABLE AND HONEST FOOD COMPANY THAT HAS AN INGRAINED CULTURE OF PLACING THE WELL-BEING OF CONSUMERS FIRST, CULMINATING IN STRONG BRAND LOVE." (pg 12)', '*   **Our Action Plan (Mission):** Farm Fresh achieves its brand promise by:', "**Business strategy and outlook:** The Group's strategy is centered on three core pillars: Expansion, Innovation, and Reputation. (pg 62-63)", '## Leadership & Governance']
2025-08-03 11:40:59,134 - retrieval - INFO - Response generated: The vision of the company is to be a sustainable and honest food company that has an ingrained culture of placing the well-being of consumers first, culminating in strong brand love.

The mission focuses on achieving this promise by implementing specific actions, although the detailed act

"The vision of the company is to be a sustainable and honest food company that has an ingrained culture of placing the well-being of consumers first, culminating in strong brand love.\n\nThe mission focuses on achieving this promise by implementing specific actions, although the detailed action plan is outlined in the company's strategy document."