In [1]:
import logging
import os
import asyncio
from ogmyrag.report_scraper.storage import AsyncStorageManager
from ogmyrag.report_scraper.manager import ScraperManager
from ogmyrag.report_scraper.models  import ReportType
from ogmyrag.my_logging import configure_logger
from dotenv import load_dotenv

scraper_logger = configure_logger(name='scraper',log_level=logging.INFO, log_file='logs/scraper.log')
scraper_logger.info("\n" + "=" * 80)

load_dotenv(override=True)

mongo_db_uri = os.getenv("MONGO_DB_URI_JJ","")

db_name = "FYP"
storage = AsyncStorageManager(mongo_uri=mongo_db_uri, db_name=db_name)

manager = ScraperManager(
    storage_manager=storage,
    max_workers=5,
    dry_run=False,
)

2025-08-07 09:01:22,063 - scraper - INFO - 
2025-08-07 09:01:22,072 - scraper - INFO - Connected to MongoDB database: FYP


2025-08-07 09:01:47,320 - scraper - INFO - Downloading 01._VETECE_Holdings_Berhad_-_Prospectus_(Part_1).pdf
2025-08-07 09:01:52,407 - scraper - INFO - Uploaded PDF bytes for 01._VETECE_Holdings_Berhad_-_Prospectus_(Part_1).pdf → GridFS ID 6893fb0095dba616e9fcaddb
2025-08-07 09:01:52,429 - scraper - INFO - Inserted metadata for 01._VETECE_Holdings_Berhad_-_Prospectus_(Part_1).pdf → 6893fb0095dba616e9fcade5
2025-08-07 09:01:52,448 - scraper - INFO - Downloading 02._VETECE_Holdings_Berhad_-_Prospectus_(Part_2).pdf
2025-08-07 09:01:57,438 - scraper - INFO - Uploaded PDF bytes for 02._VETECE_Holdings_Berhad_-_Prospectus_(Part_2).pdf → GridFS ID 6893fb0595dba616e9fcade6
2025-08-07 09:01:57,472 - scraper - INFO - Inserted metadata for 02._VETECE_Holdings_Berhad_-_Prospectus_(Part_2).pdf → 6893fb0595dba616e9fcadef
2025-08-07 09:01:57,498 - scraper - INFO - Downloading 03._VETECE_Holdings_Berhad_-_Detailed_Procedures_for_Application.pdf
2025-08-07 09:02:01,966 - scraper - INFO - Uploaded PDF by

## Scrap Annual Reports by Company Name & Year

In [None]:
manager.run_one(
    rtype = ReportType.ANNUAL,
    year = 2024,
    company_name = "Autocount Dotcom Berhad",  # None for all companies
)

In [None]:
manager.run_one(
    rtype = ReportType.IPO,
    company_name = "Autocount Dotcom Berhad"
)

In [None]:
manager.run_one(
    rtype = ReportType.IPO,
    company_name = "CABNET HOLDINGS BERHAD"
)

In [2]:
manager.run_one(
    rtype = ReportType.IPO,
    company_name = "VETECE HOLDINGS BERHAD"
)

2025-08-07 09:01:25,277 - scraper - INFO - === Initial Public Offering ===
2025-08-07 09:01:29,804 - scraper - INFO - Using company code: 0319 (VETECE HOLDINGS BERHAD)
2025-08-07 09:01:29,804 - scraper - INFO - Fetching Initial Public Offering links (Year: N/A, Company: VETECE HOLDINGS BERHAD)...
2025-08-07 09:01:38,403 - scraper - INFO - Found 1 announcement links
2025-08-07 09:01:38,404 - scraper - INFO - Processing 1 links with 5 workers
2025-08-07 09:02:01,990 - scraper - INFO - Original Initial Public Offering for VETECE_HOLDINGS_BERHAD (3 PDFs) is in the database


In [None]:
manager.run_one(
    rtype = ReportType.ANNUAL,
    year = 2024,
    company_name = "FARM FRESH BERHAD"
)

In [None]:
manager.run_one(
    rtype = ReportType.IPO,
    company_name = "FARM FRESH BERHAD"
)

In [None]:
manager.run_one(
    rtype = ReportType.IPO,
    company_name = "CABNET HOLDINGS BERHAD"
)

In [None]:
manager.run_one(
    rtype = ReportType.ANNUAL,
    year = 2025,
    company_name = "SKYWORLD DEVELOPMENT BERHAD",  # None for all companies
)

In [None]:
manager.run_one(
    rtype = ReportType.ANNUAL,
    year = 2024,
)

## Delete reports from MongoDB according to Company, Year, ReportType

In [None]:
deleted_count = await manager.storage.delete_reports(
        company = "Autocount Dotcom Berhad",
        report_type = ReportType.IPO,
        #year = 2024
)
