In [None]:
import logging
import os
import asyncio
from ogmyrag.report_scraper.storage import AsyncStorageManager
from motor.motor_asyncio import AsyncIOMotorClient
from ogmyrag.base import MongoStorageConfig
from ogmyrag.report_scraper.manager import ScraperManager
from ogmyrag.report_scraper.models  import ReportType
from ogmyrag.my_logging import configure_logger
from dotenv import load_dotenv

scraper_logger = configure_logger(name='scraper',log_level=logging.INFO, log_file='logs/scraper.log')
scraper_logger.info("\n" + "=" * 80)

load_dotenv(override=True)

mongo_db_uri = os.getenv("MONGO_DB_URI_JJ","")
db_name = "FYP"

mongo_storage_config: MongoStorageConfig = {
    "database_name": db_name
}

mongo_client = AsyncIOMotorClient(
    mongo_db_uri,
    serverSelectionTimeoutMS=5000
)
storage = AsyncStorageManager(mongo_client, mongo_storage_config)

manager = ScraperManager(
    storage_manager=storage,
    max_workers=5,
    dry_run=False,
)


2025-08-29 22:25:02,278 - scraper - INFO - 
2025-08-29 22:25:02,287 - scraper - INFO - Connected to MongoDB database: FYP


2025-08-29 22:26:09,614 - scraper - INFO - Downloading Vitrox_AR_2024_BURSA_Part1.pdf
2025-08-29 22:26:15,038 - scraper - INFO - Uploaded PDF bytes for Vitrox_AR_2024_BURSA_Part1.pdf → GridFS ID 68b1b886eddac1b4ec37e45e
2025-08-29 22:26:15,065 - scraper - INFO - Inserted metadata for Vitrox_AR_2024_BURSA_Part1.pdf → 68b1b887eddac1b4ec37e46b
2025-08-29 22:26:15,090 - scraper - INFO - Downloading Vitrox_AR_2024_BURSA_Part2.pdf
2025-08-29 22:26:20,266 - scraper - INFO - Uploaded PDF bytes for Vitrox_AR_2024_BURSA_Part2.pdf → GridFS ID 68b1b88beddac1b4ec37e46c
2025-08-29 22:26:20,296 - scraper - INFO - Inserted metadata for Vitrox_AR_2024_BURSA_Part2.pdf → 68b1b88ceddac1b4ec37e479
2025-08-29 22:26:20,323 - scraper - INFO - Downloading Vitrox_AR_2024_BURSA_Part3.pdf
2025-08-29 22:26:25,058 - scraper - INFO - Uploaded PDF bytes for Vitrox_AR_2024_BURSA_Part3.pdf → GridFS ID 68b1b890eddac1b4ec37e47a
2025-08-29 22:26:25,088 - scraper - INFO - Inserted metadata for Vitrox_AR_2024_BURSA_Part3.pd

## Scrap Annual Reports by Company Name & Year

In [2]:
manager.run_one(
    rtype = ReportType.ANNUAL,
    year = 2024,
    sector_name = "TECHNOLOGY"
    #company_name = "",  # None for all companies
)

2025-08-29 13:36:11,930 - scraper - INFO - === Annual Report & CG Report ===
2025-08-29 13:36:11,931 - scraper - INFO - Fetching Annual Report & CG Report links (Year: 2024, Company: ALL)...
2025-08-29 13:36:20,506 - scraper - INFO - Found 20 announcement links
2025-08-29 13:36:20,508 - scraper - INFO - Processing 5 links with 5 workers
2025-08-29 13:36:29,239 - scraper - INFO - Amended Annual Report & CG Report for ARK_RESOURCES_HOLDINGS_BERHAD (1 PDFs) is in the database
2025-08-29 13:36:29,310 - scraper - INFO - Amended Annual Report & CG Report for MALAYSIAN_PACIFIC_INDUSTRIES_BERHAD (2 PDFs) is in the database
2025-08-29 13:36:29,396 - scraper - INFO - Amended Annual Report & CG Report for KEY_ASIC_BERHAD (2 PDFs) is in the database
2025-08-29 13:36:33,669 - scraper - INFO - Amended Annual Report & CG Report for SEE_HUP_CONSOLIDATED_BERHAD (1 PDFs) is in the database
2025-08-29 13:36:33,737 - scraper - INFO - Amended Annual Report & CG Report for CTOS_DIGITAL_BERHAD (1 PDFs) is in

In [None]:
manager.run_one(
    rtype = ReportType.IPO,
    company_name = [
        "VETECE HOLDINGS BERHAD",
        "EDELTEQ HOLDINGS BERHAD", 
        "CABNET HOLDINGS BERHAD",
        "Autocount Dotcom Berhad",
        "ICT ZONE ASIA BERHAD"
    ]
)

2025-08-27 23:59:38,305 - scraper - INFO - === Initial Public Offering ===
2025-08-27 23:59:38,306 - scraper - INFO - Fetching concurrently for 5 companies (up to 5 links each)...
2025-08-27 23:59:42,916 - scraper - INFO - Using company code: 0278 (EDELTEQ HOLDINGS BERHAD)
2025-08-27 23:59:42,935 - scraper - INFO - Using company code: 0358 (ICT ZONE ASIA BERHAD)
2025-08-27 23:59:42,936 - scraper - INFO - Fetching Initial Public Offering links (Year: N/A, Company: EDELTEQ HOLDINGS BERHAD)...
2025-08-27 23:59:42,936 - scraper - INFO - Fetching Initial Public Offering links (Year: N/A, Company: ICT ZONE ASIA BERHAD)...
2025-08-27 23:59:43,003 - scraper - INFO - Using company code: 0319 (VETECE HOLDINGS BERHAD)
2025-08-27 23:59:43,004 - scraper - INFO - Fetching Initial Public Offering links (Year: N/A, Company: VETECE HOLDINGS BERHAD)...
2025-08-27 23:59:43,110 - scraper - INFO - Using company code: 0276 (Autocount Dotcom Berhad)
2025-08-27 23:59:43,110 - scraper - INFO - Fetching Initial

In [3]:
manager.run_one(
    rtype = ReportType.ANNUAL,
    year = 2024,
    company_name = [
        "VETECE HOLDINGS BERHAD",
        "EDELTEQ HOLDINGS BERHAD", 
        "CABNET HOLDINGS BERHAD",
        "Autocount Dotcom Berhad",
        "ICT ZONE ASIA BERHAD"
    ]
)

2025-08-28 00:04:42,403 - scraper - INFO - === Annual Report & CG Report ===
2025-08-28 00:04:42,404 - scraper - INFO - Fetching concurrently for 5 companies (up to 5 links each)...
2025-08-28 00:04:46,948 - scraper - INFO - Using company code: 0191 (CABNET HOLDINGS BERHAD)
2025-08-28 00:04:46,949 - scraper - INFO - Fetching Annual Report & CG Report links (Year: 2024, Company: CABNET HOLDINGS BERHAD)...
2025-08-28 00:04:47,150 - scraper - INFO - Using company code: 0319 (VETECE HOLDINGS BERHAD)
2025-08-28 00:04:47,156 - scraper - INFO - Using company code: 0278 (EDELTEQ HOLDINGS BERHAD)
2025-08-28 00:04:47,157 - scraper - INFO - Fetching Annual Report & CG Report links (Year: 2024, Company: VETECE HOLDINGS BERHAD)...
2025-08-28 00:04:47,158 - scraper - INFO - Fetching Annual Report & CG Report links (Year: 2024, Company: EDELTEQ HOLDINGS BERHAD)...
2025-08-28 00:04:47,342 - scraper - INFO - Using company code: 0358 (ICT ZONE ASIA BERHAD)
2025-08-28 00:04:47,349 - scraper - INFO - Usin

In [2]:
manager.run_one(
    rtype = ReportType.ANNUAL,
    year = 2024,
    company_name = "VITROX CORPORATION BERHAD",  # None for all companies
)

2025-08-29 22:25:47,087 - scraper - INFO - === Annual Report & CG Report ===
2025-08-29 22:25:51,772 - scraper - INFO - Using company code: 0097 (VITROX CORPORATION BERHAD)
2025-08-29 22:25:51,772 - scraper - INFO - Fetching Annual Report & CG Report links (Year: 2024, Company: VITROX CORPORATION BERHAD)...
2025-08-29 22:26:00,824 - scraper - INFO - Found 1 announcement links
2025-08-29 22:26:00,826 - scraper - INFO - Processing 1 links with 5 workers
2025-08-29 22:26:34,723 - scraper - INFO - Original Annual Report & CG Report for VITROX_CORPORATION_BERHAD (5 PDFs) is in the database


In [3]:
manager.run_one(
    rtype = ReportType.IPO,
    company_name = "VITROX CORPORATION BERHAD"
)

2025-08-29 22:27:23,884 - scraper - INFO - === Initial Public Offering ===
2025-08-29 22:27:28,740 - scraper - INFO - Using company code: 0097 (VITROX CORPORATION BERHAD)
2025-08-29 22:27:28,741 - scraper - INFO - Fetching Initial Public Offering links (Year: N/A, Company: VITROX CORPORATION BERHAD)...
2025-08-29 22:27:33,174 - scraper - INFO - Found 0 announcement links
2025-08-29 22:27:33,176 - scraper - INFO - No links for Initial Public Offering N/A


In [2]:
manager.run_one(
    rtype = ReportType.ANNUAL,
    year = 2024,
    company_name = "Autocount Dotcom Berhad",  # None for all companies
)

2025-08-28 00:02:55,525 - scraper - INFO - === Annual Report & CG Report ===
2025-08-28 00:03:00,116 - scraper - INFO - Using company code: 0276 (Autocount Dotcom Berhad)
2025-08-28 00:03:00,117 - scraper - INFO - Fetching Annual Report & CG Report links (Year: 2024, Company: Autocount Dotcom Berhad)...
2025-08-28 00:03:08,764 - scraper - INFO - Found 1 announcement links
2025-08-28 00:03:08,767 - scraper - INFO - Processing 1 links with 5 workers
2025-08-28 00:03:17,366 - scraper - INFO - [DRY RUN] Original ANNUAL → 3 PDFs for AUTOCOUNT_DOTCOM_BERHAD
2025-08-28 00:03:17,367 - scraper - INFO -   - ADBAR2024_-_Part_1.pdf
2025-08-28 00:03:17,370 - scraper - INFO -   - ADBAR2024_-_Part_2.pdf
2025-08-28 00:03:17,370 - scraper - INFO -   - ADB_-_CG_Report_2024_.pdf


In [2]:
manager.run_one(
    rtype = ReportType.IPO,
    company_name = "Autocount Dotcom Berhad"
)

2025-08-16 16:29:46,862 - scraper - INFO - === Initial Public Offering ===
2025-08-16 16:29:51,675 - scraper - INFO - Using company code: 0276 (Autocount Dotcom Berhad)
2025-08-16 16:29:51,676 - scraper - INFO - Fetching Initial Public Offering links (Year: N/A, Company: Autocount Dotcom Berhad)...
2025-08-16 16:30:00,272 - scraper - INFO - Found 1 announcement links
2025-08-16 16:30:00,274 - scraper - INFO - Processing 1 links with 5 workers
2025-08-16 16:30:09,716 - scraper - INFO - Original Initial Public Offering for AUTOCOUNT_DOTCOM_BERHAD (3 PDFs) is in the database


In [None]:
manager.run_one(
    rtype = ReportType.IPO,
    company_name = "CABNET HOLDINGS BERHAD"
)

In [2]:
manager.run_one(
    rtype = ReportType.IPO,
    company_name = "VETECE HOLDINGS BERHAD"
)

2025-08-07 09:01:25,277 - scraper - INFO - === Initial Public Offering ===
2025-08-07 09:01:29,804 - scraper - INFO - Using company code: 0319 (VETECE HOLDINGS BERHAD)
2025-08-07 09:01:29,804 - scraper - INFO - Fetching Initial Public Offering links (Year: N/A, Company: VETECE HOLDINGS BERHAD)...
2025-08-07 09:01:38,403 - scraper - INFO - Found 1 announcement links
2025-08-07 09:01:38,404 - scraper - INFO - Processing 1 links with 5 workers
2025-08-07 09:02:01,990 - scraper - INFO - Original Initial Public Offering for VETECE_HOLDINGS_BERHAD (3 PDFs) is in the database


In [2]:
manager.run_one(
    rtype = ReportType.IPO,
    company_name = "EDELTEQ HOLDINGS BERHAD"
)

2025-08-09 22:53:48,040 - scraper - INFO - === Initial Public Offering ===
2025-08-09 22:53:52,542 - scraper - INFO - Using company code: 0278 (EDELTEQ HOLDINGS BERHAD)
2025-08-09 22:53:52,543 - scraper - INFO - Fetching Initial Public Offering links (Year: N/A, Company: EDELTEQ HOLDINGS BERHAD)...
2025-08-09 22:54:01,194 - scraper - INFO - Found 1 announcement links
2025-08-09 22:54:01,195 - scraper - INFO - Processing 1 links with 5 workers
2025-08-09 22:54:26,711 - scraper - INFO - Original Initial Public Offering for EDELTEQ_HOLDINGS_BERHAD (3 PDFs) is in the database


In [2]:
manager.run_one(
    rtype = ReportType.IPO,
    company_name = "ICT ZONE ASIA BERHAD"
)

2025-08-24 16:47:01,069 - scraper - INFO - === Initial Public Offering ===
2025-08-24 16:47:05,672 - scraper - INFO - Using company code: 0358 (ICT ZONE ASIA BERHAD)
2025-08-24 16:47:05,673 - scraper - INFO - Fetching Initial Public Offering links (Year: N/A, Company: ICT ZONE ASIA BERHAD)...
2025-08-24 16:47:14,383 - scraper - INFO - Found 1 announcement links
2025-08-24 16:47:14,384 - scraper - INFO - Processing 1 links with 5 workers
2025-08-24 16:47:39,704 - scraper - INFO - Original Initial Public Offering for ICT_ZONE_ASIA_BERHAD (3 PDFs) is in the database


In [None]:
manager.run_one(
    rtype = ReportType.ANNUAL,
    year = 2024,
    company_name = "FARM FRESH BERHAD"
)

In [5]:
manager.run_one(
    rtype = ReportType.IPO,
    company_name = "FARM FRESH BERHAD"
)

2025-08-24 16:42:17,351 - scraper - INFO - === Initial Public Offering ===
2025-08-24 16:42:21,965 - scraper - INFO - Using company code: 5306 (FARM FRESH BERHAD)
2025-08-24 16:42:21,966 - scraper - INFO - Fetching Initial Public Offering links (Year: N/A, Company: FARM FRESH BERHAD)...
2025-08-24 16:42:30,656 - scraper - INFO - Found 1 announcement links
2025-08-24 16:42:30,657 - scraper - INFO - Processing 1 links with 5 workers
2025-08-24 16:42:39,700 - scraper - INFO - Original Initial Public Offering for FARM_FRESH_BERHAD (4 PDFs) is in the database


In [None]:
manager.run_one(
    rtype = ReportType.IPO,
    company_name = "CABNET HOLDINGS BERHAD"
)

In [None]:
manager.run_one(
    rtype = ReportType.ANNUAL,
    year = 2025,
    company_name = "SKYWORLD DEVELOPMENT BERHAD",  # None for all companies
)

In [None]:
manager.run_one(
    rtype = ReportType.ANNUAL,
    year = 2024,
)

## Delete reports from MongoDB according to Company, Year, ReportType

In [None]:
deleted_count = await manager.storage.delete_reports(
        company = "Autocount Dotcom Berhad",
        report_type = ReportType.IPO,
        #year = 2024
)
