In [None]:
import logging
import os
import asyncio
from ogmyrag.report_scraper.storage import AsyncStorageManager
from motor.motor_asyncio import AsyncIOMotorClient
from ogmyrag.base import MongoStorageConfig
from ogmyrag.report_scraper.manager import ScraperManager
from ogmyrag.report_scraper.models  import ReportType
from ogmyrag.my_logging import configure_logger
from dotenv import load_dotenv

scraper_logger = configure_logger(name='scraper',log_level=logging.INFO, log_file='logs/scraper.log')
scraper_logger.info("\n" + "=" * 80)

load_dotenv(override=True)

mongo_db_uri = os.getenv("MONGO_DB_URI_JJ","")
db_name = "FYP"

mongo_storage_config: MongoStorageConfig = {
    "database_name": db_name
}

mongo_client = AsyncIOMotorClient(
    mongo_db_uri,
    serverSelectionTimeoutMS=5000
)
storage = AsyncStorageManager(mongo_client, mongo_storage_config)

manager = ScraperManager(
    storage_manager=storage,
    max_workers=5,
    dry_run=False,
)


2025-09-23 16:35:44,206 - scraper - INFO - 
2025-09-23 16:35:44,216 - scraper - INFO - Connected to MongoDB database: FYP


2025-09-23 16:44:06,161 - scraper - INFO - Downloading 1._Notice_of_Prospectus.pdf
2025-09-23 16:44:10,579 - scraper - INFO - Uploaded PDF bytes for 1._Notice_of_Prospectus.pdf → GridFS ID 68d25dda7566b4eb720e1097
2025-09-23 16:44:10,611 - scraper - INFO - Inserted metadata for 1._Notice_of_Prospectus.pdf → 68d25dda7566b4eb720e1099
2025-09-23 16:44:10,636 - scraper - INFO - Downloading 2a._Greatech_Technology_-_Prospectus_(Part_1).pdf
2025-09-23 16:44:14,700 - scraper - INFO - Downloading UWC_Berhad_-_Detailed_Procedures_for_Application_and_Acceptance.pdf
2025-09-23 16:44:14,713 - scraper - INFO - Downloading Securemetric_-_Prospectus_dated_23_Oct_2018_(Part_1).pdf
2025-09-23 16:44:14,737 - scraper - INFO - Downloading Mi_Equipment_-_English_IPO_(Bursa_part_1).pdf
2025-09-23 16:44:14,770 - scraper - INFO - Downloading TechStore_Berhad_-_Detailed_Procedures_for_E-IPO_Share_Application_and_Acceptance.pdf
2025-09-23 16:44:15,808 - scraper - INFO - Uploaded PDF bytes for 2a._Greatech_Techn

In [2]:
company_1 = [
        "VETECE HOLDINGS BERHAD",
        "EDELTEQ HOLDINGS BERHAD", 
        "CABNET HOLDINGS BERHAD",
        "Autocount Dotcom Berhad",
        "ICT ZONE ASIA BERHAD",
        "SFP TECH HOLDINGS BERHAD",
        "TT VISION HOLDINGS BERHAD",
        "RAMSSOL GROUP BERHAD",
        "ECA INTEGRATED SOLUTION BERHAD"
    ]

company_2 = [
    "Greatech Technology Berhad",
    "Mi Technovation Berhad",
    "UWC Berhad",
    "Securemetric Berhad",
    "Techstore Berhad",
    "3REN Berhad",
    "Panda Eco System Berhad",
    "Cloudpoint Technology Berhad",
    "ITMAX System Berhad",
    "Infomina Berhad",
]

## Scrap Annual Reports by Company Name & Year

In [2]:
manager.run_one(
    rtype = ReportType.ANNUAL,
    year = 2024,
    sector_name = "TECHNOLOGY"
    #company_name = "",  # None for all companies
)

2025-08-29 13:36:11,930 - scraper - INFO - === Annual Report & CG Report ===
2025-08-29 13:36:11,931 - scraper - INFO - Fetching Annual Report & CG Report links (Year: 2024, Company: ALL)...
2025-08-29 13:36:20,506 - scraper - INFO - Found 20 announcement links
2025-08-29 13:36:20,508 - scraper - INFO - Processing 5 links with 5 workers
2025-08-29 13:36:29,239 - scraper - INFO - Amended Annual Report & CG Report for ARK_RESOURCES_HOLDINGS_BERHAD (1 PDFs) is in the database
2025-08-29 13:36:29,310 - scraper - INFO - Amended Annual Report & CG Report for MALAYSIAN_PACIFIC_INDUSTRIES_BERHAD (2 PDFs) is in the database
2025-08-29 13:36:29,396 - scraper - INFO - Amended Annual Report & CG Report for KEY_ASIC_BERHAD (2 PDFs) is in the database
2025-08-29 13:36:33,669 - scraper - INFO - Amended Annual Report & CG Report for SEE_HUP_CONSOLIDATED_BERHAD (1 PDFs) is in the database
2025-08-29 13:36:33,737 - scraper - INFO - Amended Annual Report & CG Report for CTOS_DIGITAL_BERHAD (1 PDFs) is in

In [5]:
manager.run_one(
    rtype = ReportType.IPO,
    company_name = company_1 + company_2,
)

2025-09-23 16:43:11,293 - scraper - INFO - === Initial Public Offering ===
2025-09-23 16:43:11,295 - scraper - INFO - Fetching concurrently for 19 companies (up to 5 links each)...
2025-09-23 16:43:15,977 - scraper - INFO - Using company code: 0276 (Autocount Dotcom Berhad)
2025-09-23 16:43:15,991 - scraper - INFO - Using company code: 0319 (VETECE HOLDINGS BERHAD)
2025-09-23 16:43:15,991 - scraper - INFO - Fetching Initial Public Offering links (Year: N/A, Company: Autocount Dotcom Berhad)...
2025-09-23 16:43:15,991 - scraper - INFO - Fetching Initial Public Offering links (Year: N/A, Company: VETECE HOLDINGS BERHAD)...
2025-09-23 16:43:16,095 - scraper - INFO - Using company code: 0191 (CABNET HOLDINGS BERHAD)
2025-09-23 16:43:16,096 - scraper - INFO - Fetching Initial Public Offering links (Year: N/A, Company: CABNET HOLDINGS BERHAD)...
2025-09-23 16:43:16,277 - scraper - INFO - Using company code: 0358 (ICT ZONE ASIA BERHAD)
2025-09-23 16:43:16,278 - scraper - INFO - Fetching Initi

In [6]:
manager.run_one(
    rtype = ReportType.ANNUAL,
    year = 2025,
    company_name = company_2
)

2025-09-23 16:48:27,131 - scraper - INFO - === Annual Report & CG Report ===
2025-09-23 16:48:27,132 - scraper - INFO - Fetching concurrently for 10 companies (up to 5 links each)...
2025-09-23 16:48:31,751 - scraper - INFO - Using company code: 5292 (UWC Berhad)
2025-09-23 16:48:31,752 - scraper - INFO - Fetching Annual Report & CG Report links (Year: 2025, Company: UWC Berhad)...
2025-09-23 16:48:32,000 - scraper - INFO - Using company code: 0203 (Securemetric Berhad)
2025-09-23 16:48:32,005 - scraper - INFO - Using company code: 0208 (Greatech Technology Berhad)
2025-09-23 16:48:32,015 - scraper - INFO - Using company code: 5286 (Mi Technovation Berhad)
2025-09-23 16:48:32,029 - scraper - INFO - Using company code: 0343 (Techstore Berhad)
2025-09-23 16:48:32,030 - scraper - INFO - Fetching Annual Report & CG Report links (Year: 2025, Company: Securemetric Berhad)...
2025-09-23 16:48:32,030 - scraper - INFO - Fetching Annual Report & CG Report links (Year: 2025, Company: Greatech Tec

In [7]:
manager.run_one(
    rtype = ReportType.ANNUAL,
    year = 2024,
    company_name = company_2
)

2025-09-23 16:48:45,736 - scraper - INFO - === Annual Report & CG Report ===
2025-09-23 16:48:45,737 - scraper - INFO - Fetching concurrently for 10 companies (up to 5 links each)...
2025-09-23 16:48:50,283 - scraper - INFO - Using company code: 0343 (Techstore Berhad)
2025-09-23 16:48:50,284 - scraper - INFO - Fetching Annual Report & CG Report links (Year: 2024, Company: Techstore Berhad)...
2025-09-23 16:48:50,392 - scraper - INFO - Using company code: 5292 (UWC Berhad)
2025-09-23 16:48:50,393 - scraper - INFO - Fetching Annual Report & CG Report links (Year: 2024, Company: UWC Berhad)...
2025-09-23 16:48:50,495 - scraper - INFO - Using company code: 0208 (Greatech Technology Berhad)
2025-09-23 16:48:50,496 - scraper - INFO - Fetching Annual Report & CG Report links (Year: 2024, Company: Greatech Technology Berhad)...
2025-09-23 16:48:50,685 - scraper - INFO - Using company code: 0203 (Securemetric Berhad)
2025-09-23 16:48:50,686 - scraper - INFO - Fetching Annual Report & CG Report

In [8]:
manager.run_one(
    rtype = ReportType.ANNUAL,
    year = 2023,
    company_name = company_2
)

2025-09-23 16:49:57,624 - scraper - INFO - === Annual Report & CG Report ===
2025-09-23 16:49:57,625 - scraper - INFO - Fetching concurrently for 10 companies (up to 5 links each)...
2025-09-23 16:50:02,179 - scraper - INFO - Using company code: 0208 (Greatech Technology Berhad)
2025-09-23 16:50:02,179 - scraper - INFO - Fetching Annual Report & CG Report links (Year: 2023, Company: Greatech Technology Berhad)...
2025-09-23 16:50:02,328 - scraper - INFO - Using company code: 5286 (Mi Technovation Berhad)
2025-09-23 16:50:02,351 - scraper - INFO - Using company code: 5292 (UWC Berhad)
2025-09-23 16:50:02,387 - scraper - INFO - Using company code: 0343 (Techstore Berhad)
2025-09-23 16:50:02,388 - scraper - INFO - Fetching Annual Report & CG Report links (Year: 2023, Company: Mi Technovation Berhad)...
2025-09-23 16:50:02,388 - scraper - INFO - Fetching Annual Report & CG Report links (Year: 2023, Company: UWC Berhad)...
2025-09-23 16:50:02,389 - scraper - INFO - Fetching Annual Report & 

In [9]:
manager.run_one(
    rtype = ReportType.ANNUAL,
    year = 2022,
    company_name = company_2
)

2025-09-23 16:51:05,468 - scraper - INFO - === Annual Report & CG Report ===
2025-09-23 16:51:05,469 - scraper - INFO - Fetching concurrently for 10 companies (up to 5 links each)...
2025-09-23 16:51:10,065 - scraper - INFO - Using company code: 5292 (UWC Berhad)
2025-09-23 16:51:10,140 - scraper - INFO - Using company code: 0343 (Techstore Berhad)
2025-09-23 16:51:10,141 - scraper - INFO - Fetching Annual Report & CG Report links (Year: 2022, Company: UWC Berhad)...
2025-09-23 16:51:10,142 - scraper - INFO - Fetching Annual Report & CG Report links (Year: 2022, Company: Techstore Berhad)...
2025-09-23 16:51:10,199 - scraper - INFO - Using company code: 5286 (Mi Technovation Berhad)
2025-09-23 16:51:10,199 - scraper - INFO - Fetching Annual Report & CG Report links (Year: 2022, Company: Mi Technovation Berhad)...
2025-09-23 16:51:10,338 - scraper - INFO - Using company code: 0208 (Greatech Technology Berhad)
2025-09-23 16:51:10,351 - scraper - INFO - Using company code: 0203 (Securemet

In [10]:
manager.run_one(
    rtype = ReportType.ANNUAL,
    year = 2021,
    company_name = company_2
)

2025-09-23 16:51:54,607 - scraper - INFO - === Annual Report & CG Report ===
2025-09-23 16:51:54,608 - scraper - INFO - Fetching concurrently for 10 companies (up to 5 links each)...
2025-09-23 16:51:59,316 - scraper - INFO - Using company code: 5286 (Mi Technovation Berhad)
2025-09-23 16:51:59,317 - scraper - INFO - Fetching Annual Report & CG Report links (Year: 2021, Company: Mi Technovation Berhad)...
2025-09-23 16:51:59,461 - scraper - INFO - Using company code: 0203 (Securemetric Berhad)
2025-09-23 16:51:59,462 - scraper - INFO - Fetching Annual Report & CG Report links (Year: 2021, Company: Securemetric Berhad)...
2025-09-23 16:51:59,740 - scraper - INFO - Using company code: 0208 (Greatech Technology Berhad)
2025-09-23 16:51:59,741 - scraper - INFO - Fetching Annual Report & CG Report links (Year: 2021, Company: Greatech Technology Berhad)...
2025-09-23 16:51:59,805 - scraper - INFO - Using company code: 5292 (UWC Berhad)
2025-09-23 16:51:59,806 - scraper - INFO - Fetching Annu

In [11]:
manager.run_one(
    rtype = ReportType.ANNUAL,
    year = 2020,
    company_name = company_2
)

2025-09-23 16:52:39,269 - scraper - INFO - === Annual Report & CG Report ===
2025-09-23 16:52:39,271 - scraper - INFO - Fetching concurrently for 10 companies (up to 5 links each)...
2025-09-23 16:52:43,833 - scraper - INFO - Using company code: 0203 (Securemetric Berhad)
2025-09-23 16:52:43,834 - scraper - INFO - Fetching Annual Report & CG Report links (Year: 2020, Company: Securemetric Berhad)...
2025-09-23 16:52:44,029 - scraper - INFO - Using company code: 0343 (Techstore Berhad)
2025-09-23 16:52:44,033 - scraper - INFO - Using company code: 5292 (UWC Berhad)
2025-09-23 16:52:44,041 - scraper - INFO - Using company code: 0208 (Greatech Technology Berhad)
2025-09-23 16:52:44,042 - scraper - INFO - Fetching Annual Report & CG Report links (Year: 2020, Company: Techstore Berhad)...
2025-09-23 16:52:44,042 - scraper - INFO - Fetching Annual Report & CG Report links (Year: 2020, Company: UWC Berhad)...
2025-09-23 16:52:44,042 - scraper - INFO - Fetching Annual Report & CG Report links 

In [12]:
manager.run_one(
    rtype = ReportType.ANNUAL,
    year = 2019,
    company_name = company_2
)

2025-09-23 16:53:21,697 - scraper - INFO - === Annual Report & CG Report ===
2025-09-23 16:53:21,699 - scraper - INFO - Fetching concurrently for 10 companies (up to 5 links each)...
2025-09-23 16:53:26,387 - scraper - INFO - Using company code: 0208 (Greatech Technology Berhad)
2025-09-23 16:53:26,408 - scraper - INFO - Using company code: 0343 (Techstore Berhad)
2025-09-23 16:53:26,408 - scraper - INFO - Fetching Annual Report & CG Report links (Year: 2019, Company: Greatech Technology Berhad)...
2025-09-23 16:53:26,409 - scraper - INFO - Fetching Annual Report & CG Report links (Year: 2019, Company: Techstore Berhad)...
2025-09-23 16:53:26,530 - scraper - INFO - Using company code: 5286 (Mi Technovation Berhad)
2025-09-23 16:53:26,535 - scraper - INFO - Using company code: 0203 (Securemetric Berhad)
2025-09-23 16:53:26,535 - scraper - INFO - Fetching Annual Report & CG Report links (Year: 2019, Company: Mi Technovation Berhad)...
2025-09-23 16:53:26,536 - scraper - INFO - Fetching A

In [13]:
manager.run_one(
    rtype = ReportType.ANNUAL,
    year = 2018,
    company_name = company_2
)

2025-09-23 16:54:04,729 - scraper - INFO - === Annual Report & CG Report ===
2025-09-23 16:54:04,730 - scraper - INFO - Fetching concurrently for 10 companies (up to 5 links each)...
2025-09-23 16:54:09,243 - scraper - INFO - Using company code: 5286 (Mi Technovation Berhad)
2025-09-23 16:54:09,244 - scraper - INFO - Fetching Annual Report & CG Report links (Year: 2018, Company: Mi Technovation Berhad)...
2025-09-23 16:54:09,350 - scraper - INFO - Using company code: 5292 (UWC Berhad)
2025-09-23 16:54:09,403 - scraper - INFO - Using company code: 0208 (Greatech Technology Berhad)
2025-09-23 16:54:09,403 - scraper - INFO - Fetching Annual Report & CG Report links (Year: 2018, Company: UWC Berhad)...
2025-09-23 16:54:09,404 - scraper - INFO - Fetching Annual Report & CG Report links (Year: 2018, Company: Greatech Technology Berhad)...
2025-09-23 16:54:09,558 - scraper - INFO - Using company code: 0203 (Securemetric Berhad)
2025-09-23 16:54:09,563 - scraper - INFO - Using company code: 03

In [14]:
manager.run_one(
    rtype = ReportType.ANNUAL,
    year = 2017,
    company_name = company_2
)

2025-09-23 16:54:43,409 - scraper - INFO - === Annual Report & CG Report ===
2025-09-23 16:54:43,410 - scraper - INFO - Fetching concurrently for 10 companies (up to 5 links each)...
2025-09-23 16:54:48,009 - scraper - INFO - Using company code: 5286 (Mi Technovation Berhad)
2025-09-23 16:54:48,010 - scraper - INFO - Fetching Annual Report & CG Report links (Year: 2017, Company: Mi Technovation Berhad)...
2025-09-23 16:54:48,231 - scraper - INFO - Using company code: 0208 (Greatech Technology Berhad)
2025-09-23 16:54:48,243 - scraper - INFO - Using company code: 0203 (Securemetric Berhad)
2025-09-23 16:54:48,245 - scraper - INFO - Using company code: 5292 (UWC Berhad)
2025-09-23 16:54:48,245 - scraper - INFO - Fetching Annual Report & CG Report links (Year: 2017, Company: Greatech Technology Berhad)...
2025-09-23 16:54:48,246 - scraper - INFO - Fetching Annual Report & CG Report links (Year: 2017, Company: Securemetric Berhad)...
2025-09-23 16:54:48,247 - scraper - INFO - Fetching Annu

## Delete reports from MongoDB according to Company, Year, ReportType

In [None]:
deleted_count = await manager.storage.delete_reports(
        company = "Autocount Dotcom Berhad",
        report_type = ReportType.IPO,
        #year = 2024
)


In [None]:
companies_2 = [
    "Greatech Technology Berhad",
    "Mi Technovation Berhad",
    "UWC Berhad",
    "Securemetric Berhad",
    "Techstore Berhad",
    "3REN Berhad",
    "Panda Eco System Berhad",
    "Cloudpoint Technology Berhad",
    "ITMAX System Berhad",
    "Infomina Berhad",
]

In [None]:
companies_1 = [
    "Cabnet Holdings Berhad",
    "Aemulus Holdings Berhad",
    "Autocount Dotcom Berhad",
    "Edelteq Holdings Berhad",
    "ICT Zone Asia Berhad",
    "ViTrox Corporation Berhad",
    "VETECE HOLDINGS BERHAD",
    "Censof Holdings Berhad",
    "Edaran BHD",
    "AIMFLEX Berhad",
]

companies = [
    "Inari Amertron Berhad",
    "IFCA MSC Berhad",
    "EVD Berhad",
    "HeiTech Padu BHD",
    "Greatech Technology Berhad", # this
    "MMAG Holdings Berhad",
    "Mi Technovation Berhad",
    "Kronologi Asia Berhad",
    "JHM Consolidation BHD",
    "AwanBiru Technology Berhad",
]

companies_2 = [
    "Pineapple Resources BHD",
    "Pentamaster Corporation Berhad",
    "Malaysian Pacific Industries",
    "ManagePay Systems Berhad",
    "UWC Berhad",
    "SMTrack Berhad",
    "Securemetric Berhad",
    "Radiant Globaltech Berhad", # this
    "Revenue Group Berhad", # this
    "Techstore Berhad",
]

companies_3 = [
    "3REN Berhad",
    "Panda Eco System Berhad",
    "Cloudpoint Technology Berhad",
    "Oppstar Berhad",
    "TT Vision Holdings Berhad",
    "NationGate Holdings Berhad",
    "ITMAX System Berhad",
    "Infomina Berhad",
    "ECA Integrated Solution Berhad",
    "UMediC Group Berhad",
]

companies_4 = [
    "LGMS Berhad",
    "Cnergenz Berhad",
    "Ramssol Group Berhad",
    "CTOS Digital Berhad",
    "Hong Seng Consolidated Berhad",
    "VSTECS Berhad",
]