In [1]:
import logging
import os
import asyncio
from ogmyrag.report_scraper.storage import AsyncStorageManager
from ogmyrag.report_scraper.manager import ScraperManager
from ogmyrag.report_scraper.models  import ReportType
from ogmyrag.my_logging import configure_logger
from dotenv import load_dotenv

scraper_logger = configure_logger(name='scraper',log_level=logging.INFO, log_file='logs/scraper.log')
scraper_logger.info("\n" + "=" * 80)

load_dotenv(override=True)

mongo_db_uri = os.getenv("MONGO_DB_URI_JJ","")

db_name = "FYP"
storage = AsyncStorageManager(mongo_uri=mongo_db_uri, db_name=db_name)

manager = ScraperManager(
    storage_manager=storage,
    max_workers=5,
    dry_run=False,
)

2025-08-24 16:46:50,986 - scraper - INFO - 
2025-08-24 16:46:50,995 - scraper - INFO - Connected to MongoDB database: FYP


2025-08-24 16:47:23,333 - scraper - INFO - Downloading 1._ICT_Zone_Asia_-_Prospectus_(Part_1).pdf
2025-08-24 16:47:29,464 - scraper - INFO - Uploaded PDF bytes for 1._ICT_Zone_Asia_-_Prospectus_(Part_1).pdf → GridFS ID 68aad1a0a100cb3b1867d24a
2025-08-24 16:47:29,491 - scraper - INFO - Inserted metadata for 1._ICT_Zone_Asia_-_Prospectus_(Part_1).pdf → 68aad1a1a100cb3b1867d256
2025-08-24 16:47:29,515 - scraper - INFO - Downloading 2._ICT_Zone_Asia_-_Prospectus_(Part_2).pdf
2025-08-24 16:47:35,083 - scraper - INFO - Uploaded PDF bytes for 2._ICT_Zone_Asia_-_Prospectus_(Part_2).pdf → GridFS ID 68aad1a6a100cb3b1867d257
2025-08-24 16:47:35,111 - scraper - INFO - Inserted metadata for 2._ICT_Zone_Asia_-_Prospectus_(Part_2).pdf → 68aad1a7a100cb3b1867d262
2025-08-24 16:47:35,136 - scraper - INFO - Downloading 3._Detailed_Procedures_for_Application.pdf
2025-08-24 16:47:39,673 - scraper - INFO - Uploaded PDF bytes for 3._Detailed_Procedures_for_Application.pdf → GridFS ID 68aad1aba100cb3b1867d26

## Scrap Annual Reports by Company Name & Year

In [None]:
manager.run_one(
    rtype = ReportType.ANNUAL,
    year = 2024,
    company_name = "Autocount Dotcom Berhad",  # None for all companies
)

In [2]:
manager.run_one(
    rtype = ReportType.IPO,
    company_name = "Autocount Dotcom Berhad"
)

2025-08-16 16:29:46,862 - scraper - INFO - === Initial Public Offering ===
2025-08-16 16:29:51,675 - scraper - INFO - Using company code: 0276 (Autocount Dotcom Berhad)
2025-08-16 16:29:51,676 - scraper - INFO - Fetching Initial Public Offering links (Year: N/A, Company: Autocount Dotcom Berhad)...
2025-08-16 16:30:00,272 - scraper - INFO - Found 1 announcement links
2025-08-16 16:30:00,274 - scraper - INFO - Processing 1 links with 5 workers
2025-08-16 16:30:09,716 - scraper - INFO - Original Initial Public Offering for AUTOCOUNT_DOTCOM_BERHAD (3 PDFs) is in the database


In [None]:
manager.run_one(
    rtype = ReportType.IPO,
    company_name = "CABNET HOLDINGS BERHAD"
)

In [2]:
manager.run_one(
    rtype = ReportType.IPO,
    company_name = "VETECE HOLDINGS BERHAD"
)

2025-08-07 09:01:25,277 - scraper - INFO - === Initial Public Offering ===
2025-08-07 09:01:29,804 - scraper - INFO - Using company code: 0319 (VETECE HOLDINGS BERHAD)
2025-08-07 09:01:29,804 - scraper - INFO - Fetching Initial Public Offering links (Year: N/A, Company: VETECE HOLDINGS BERHAD)...
2025-08-07 09:01:38,403 - scraper - INFO - Found 1 announcement links
2025-08-07 09:01:38,404 - scraper - INFO - Processing 1 links with 5 workers
2025-08-07 09:02:01,990 - scraper - INFO - Original Initial Public Offering for VETECE_HOLDINGS_BERHAD (3 PDFs) is in the database


In [2]:
manager.run_one(
    rtype = ReportType.IPO,
    company_name = "EDELTEQ HOLDINGS BERHAD"
)

2025-08-09 22:53:48,040 - scraper - INFO - === Initial Public Offering ===
2025-08-09 22:53:52,542 - scraper - INFO - Using company code: 0278 (EDELTEQ HOLDINGS BERHAD)
2025-08-09 22:53:52,543 - scraper - INFO - Fetching Initial Public Offering links (Year: N/A, Company: EDELTEQ HOLDINGS BERHAD)...
2025-08-09 22:54:01,194 - scraper - INFO - Found 1 announcement links
2025-08-09 22:54:01,195 - scraper - INFO - Processing 1 links with 5 workers
2025-08-09 22:54:26,711 - scraper - INFO - Original Initial Public Offering for EDELTEQ_HOLDINGS_BERHAD (3 PDFs) is in the database


In [2]:
manager.run_one(
    rtype = ReportType.IPO,
    company_name = "ICT ZONE ASIA BERHAD"
)

2025-08-24 16:47:01,069 - scraper - INFO - === Initial Public Offering ===
2025-08-24 16:47:05,672 - scraper - INFO - Using company code: 0358 (ICT ZONE ASIA BERHAD)
2025-08-24 16:47:05,673 - scraper - INFO - Fetching Initial Public Offering links (Year: N/A, Company: ICT ZONE ASIA BERHAD)...
2025-08-24 16:47:14,383 - scraper - INFO - Found 1 announcement links
2025-08-24 16:47:14,384 - scraper - INFO - Processing 1 links with 5 workers
2025-08-24 16:47:39,704 - scraper - INFO - Original Initial Public Offering for ICT_ZONE_ASIA_BERHAD (3 PDFs) is in the database


In [None]:
manager.run_one(
    rtype = ReportType.ANNUAL,
    year = 2024,
    company_name = "FARM FRESH BERHAD"
)

In [5]:
manager.run_one(
    rtype = ReportType.IPO,
    company_name = "FARM FRESH BERHAD"
)

2025-08-24 16:42:17,351 - scraper - INFO - === Initial Public Offering ===
2025-08-24 16:42:21,965 - scraper - INFO - Using company code: 5306 (FARM FRESH BERHAD)
2025-08-24 16:42:21,966 - scraper - INFO - Fetching Initial Public Offering links (Year: N/A, Company: FARM FRESH BERHAD)...
2025-08-24 16:42:30,656 - scraper - INFO - Found 1 announcement links
2025-08-24 16:42:30,657 - scraper - INFO - Processing 1 links with 5 workers
2025-08-24 16:42:39,700 - scraper - INFO - Original Initial Public Offering for FARM_FRESH_BERHAD (4 PDFs) is in the database


In [None]:
manager.run_one(
    rtype = ReportType.IPO,
    company_name = "CABNET HOLDINGS BERHAD"
)

In [None]:
manager.run_one(
    rtype = ReportType.ANNUAL,
    year = 2025,
    company_name = "SKYWORLD DEVELOPMENT BERHAD",  # None for all companies
)

In [None]:
manager.run_one(
    rtype = ReportType.ANNUAL,
    year = 2024,
)

## Delete reports from MongoDB according to Company, Year, ReportType

In [None]:
deleted_count = await manager.storage.delete_reports(
        company = "Autocount Dotcom Berhad",
        report_type = ReportType.IPO,
        #year = 2024
)
