In [1]:
import logging
import os
import asyncio

from ogmyrag.report_retrieval.report_retrieval import ReportRetrievalManager
from ogmyrag.report_retrieval.retrieval_storage import RetrievalAsyncStorageManager
from ogmyrag.report_scraper.models import ReportType
from ogmyrag.my_logging import configure_logger
from ogmyrag.storage import PineconeStorage
from ogmyrag.report_retrieval.report_chunker import rag_answer_with_company_detection

from dotenv import load_dotenv

retrieval_logger = configure_logger(name='retrieval',log_level=logging.INFO, log_file='logs/retrieval.log')
retrieval_logger.info("\n" + "=" * 80)

load_dotenv(override=True)

mongo_db_uri = os.getenv("MONGO_DB_URI_JJ","")
pinecone_api_key = os.getenv("PINECONE_API_KEY_JJ","")
genai_api_key = os.getenv("GENAI_API_KEY_JJ","")
openai_api_key = os.getenv("OPENAI_API_KEY", "")

INDEX_NAME = "company-disclosures-index"
EMBED_MODEL = "text-embedding-3-small"
DIMENSION = 1536
GENAI_MODEL = "gemini-2.5-pro"
#GENAI_MODEL = "gemini-2.5-flash"
OPENAI_MODEL = "gpt-5-nano"

db_name = "FYP"
storage = RetrievalAsyncStorageManager(mongo_uri=mongo_db_uri, db_name=db_name)


pine = PineconeStorage(
    index_name = INDEX_NAME,
    pinecone_api_key = pinecone_api_key,
    pinecone_environment = "us-east-1",
    pinecone_cloud = "aws",
    pinecone_metric = "cosine",
    pinecone_dimensions = DIMENSION,
    openai_api_key = openai_api_key
)

manager = ReportRetrievalManager(
    storage = storage,
    pine = pine,
    genai_model = GENAI_MODEL,
    genai_api_key = genai_api_key,
    openai_api_key = openai_api_key,
    #dry_run = False
)

2025-08-27 11:33:06,142 - retrieval - INFO - 
2025-08-27 11:33:06,152 - retrieval - INFO - Connected to MongoDB database: FYP


## Process Financial Reports (PDF)

In [2]:
await manager.parse_report(
    company = "FARM_FRESH_BERHAD",
    report_type = ReportType.ANNUAL,
    year = 2024,
    #forced_process = True
)

2025-08-27 11:33:13,582 - retrieval - INFO - Already processed and up to date.
2025-08-27 11:33:13,583 - retrieval - INFO - Skipping processing, using existing content.
2025-08-27 11:33:13,583 - retrieval - INFO - Extracting all the processed content.
2025-08-27 11:33:13,749 - retrieval - INFO - Combining all the processed content.
2025-08-27 11:33:13,750 - retrieval - INFO - Processed content ready.
2025-08-27 11:33:13,755 - retrieval - INFO - Saved processed report to ./processed_report/FARM_FRESH_BERHAD/FARM_FRESH_BERHAD_ANNUAL_2024.md


In [6]:
await manager.parse_report(
    company = "EDELTEQ HOLDINGS BERHAD",
    report_type = ReportType.IPO,
    #forced_process = True
)

2025-08-26 10:21:32,314 - retrieval - INFO - Already processed and up to date.
2025-08-26 10:21:32,316 - retrieval - INFO - Skipping processing, using existing content.
2025-08-26 10:21:32,317 - retrieval - INFO - Extracting all the processed content.
2025-08-26 10:21:32,470 - retrieval - INFO - Combining all the processed content.
2025-08-26 10:21:32,471 - retrieval - INFO - Processed content ready.
2025-08-26 10:21:32,473 - retrieval - INFO - Saved processed report to ./processed_report/EDELTEQ_HOLDINGS_BERHAD/EDELTEQ_HOLDINGS_BERHAD_IPO.md


In [3]:
await manager.parse_report(
    company = "VETECE_HOLDINGS_BERHAD",
    report_type = ReportType.IPO,
    #forced_process = True
)

2025-08-26 09:35:51,387 - retrieval - INFO - Already processed and up to date.
2025-08-26 09:35:51,388 - retrieval - INFO - Skipping processing, using existing content.
2025-08-26 09:35:51,388 - retrieval - INFO - Extracting all the processed content.
2025-08-26 09:35:51,533 - retrieval - INFO - Combining all the processed content.
2025-08-26 09:35:51,534 - retrieval - INFO - Processed content ready.
2025-08-26 09:35:51,538 - retrieval - INFO - Saved processed report to ./processed_report/VETECE_HOLDINGS_BERHAD/VETECE_HOLDINGS_BERHAD_IPO.md


In [4]:
await manager.parse_report(
    company = "CABNET_HOLDINGS_BERHAD",
    report_type = ReportType.IPO,
    #forced_process = True
)

2025-08-26 09:35:58,782 - retrieval - INFO - Already processed and up to date.
2025-08-26 09:35:58,783 - retrieval - INFO - Skipping processing, using existing content.
2025-08-26 09:35:58,784 - retrieval - INFO - Extracting all the processed content.
2025-08-26 09:35:58,951 - retrieval - INFO - Combining all the processed content.
2025-08-26 09:35:58,951 - retrieval - INFO - Processed content ready.
2025-08-26 09:35:58,955 - retrieval - INFO - Saved processed report to ./processed_report/CABNET_HOLDINGS_BERHAD/CABNET_HOLDINGS_BERHAD_IPO.md


In [7]:
await manager.parse_report(
    company = "AUTOCOUNT_DOTCOM_BERHAD",
    report_type = ReportType.IPO,
    #forced_process = True
)

2025-08-26 10:21:36,639 - retrieval - INFO - Already processed and up to date.
2025-08-26 10:21:36,640 - retrieval - INFO - Skipping processing, using existing content.
2025-08-26 10:21:36,641 - retrieval - INFO - Extracting all the processed content.
2025-08-26 10:21:36,791 - retrieval - INFO - Combining all the processed content.
2025-08-26 10:21:36,792 - retrieval - INFO - Processed content ready.
2025-08-26 10:21:36,794 - retrieval - INFO - Saved processed report to ./processed_report/AUTOCOUNT_DOTCOM_BERHAD/AUTOCOUNT_DOTCOM_BERHAD_IPO.md


In [8]:
await manager.parse_report(
    company = "ICT ZONE ASIA BERHAD",
    report_type = ReportType.IPO,
    #forced_process = True
)

2025-08-26 10:21:37,748 - retrieval - INFO - Already processed and up to date.
2025-08-26 10:21:37,748 - retrieval - INFO - Skipping processing, using existing content.
2025-08-26 10:21:37,749 - retrieval - INFO - Extracting all the processed content.
2025-08-26 10:21:37,854 - retrieval - INFO - Combining all the processed content.
2025-08-26 10:21:37,855 - retrieval - INFO - Processed content ready.
2025-08-26 10:21:37,858 - retrieval - INFO - Saved processed report to ./processed_report/ICT_ZONE_ASIA_BERHAD/ICT_ZONE_ASIA_BERHAD_IPO.md
