In [1]:
import logging
import os
import asyncio
from ogmyrag.report_scraper.storage import AsyncStorageManager
from ogmyrag.report_scraper.manager import ScraperManager
from ogmyrag.report_scraper.models  import ReportType
from ogmyrag.my_logging import configure_logger
from dotenv import load_dotenv

scraper_logger = configure_logger(name='scraper',log_level=logging.INFO, log_file='logs/scraper.log')
scraper_logger.info("\n" + "=" * 80)

load_dotenv(override=True)

mongo_db_uri = os.getenv("MONGO_DB_URI_JJ","")

db_name = "FYP"
storage = AsyncStorageManager(mongo_uri=mongo_db_uri, db_name=db_name)

manager = ScraperManager(
    storage_manager=storage,
    max_workers=5,
    dry_run=False,
)

2025-08-03 11:33:43,498 - scraper - INFO - 
2025-08-03 11:33:43,509 - scraper - INFO - Connected to MongoDB database: FYP


2025-08-03 11:34:11,552 - scraper - INFO - Downloading Farm_Fresh_Berhad_-_Integrated_Annual_Report_2024_(Part_1).pdf
2025-08-03 11:34:16,555 - scraper - INFO - Uploaded PDF bytes for Farm_Fresh_Berhad_-_Integrated_Annual_Report_2024_(Part_1).pdf → GridFS ID 688ed8b7a8704f0db2b14496
2025-08-03 11:34:16,589 - scraper - INFO - Inserted metadata for Farm_Fresh_Berhad_-_Integrated_Annual_Report_2024_(Part_1).pdf → 688ed8b8a8704f0db2b1449e
2025-08-03 11:34:16,610 - scraper - INFO - Downloading Farm_Fresh_Berhad_-_Integrated_Annual_Report_2024_(Part_2).pdf
2025-08-03 11:34:21,995 - scraper - INFO - Uploaded PDF bytes for Farm_Fresh_Berhad_-_Integrated_Annual_Report_2024_(Part_2).pdf → GridFS ID 688ed8bda8704f0db2b1449f
2025-08-03 11:34:22,023 - scraper - INFO - Inserted metadata for Farm_Fresh_Berhad_-_Integrated_Annual_Report_2024_(Part_2).pdf → 688ed8bda8704f0db2b144ac
2025-08-03 11:34:22,044 - scraper - INFO - Downloading Farm_Fresh_Berhad_-_Integrated_Annual_Report_2024_(Part_3).pdf
2025

## Scrap Annual Reports by Company Name & Year

In [None]:
manager.run_one(
    rtype = ReportType.ANNUAL,
    year = 2024,
    company_name = "Autocount Dotcom Berhad",  # None for all companies
)

In [None]:
manager.run_one(
    rtype = ReportType.IPO,
    company_name = "Autocount Dotcom Berhad"
)

In [2]:
manager.run_one(
    rtype = ReportType.ANNUAL,
    year = 2024,
    company_name = "FARM FRESH BERHAD"
)

2025-08-03 11:33:48,730 - scraper - INFO - === Annual Report & CG Report ===
2025-08-03 11:33:54,265 - scraper - INFO - Using company code: 5306 (FARM FRESH BERHAD)
2025-08-03 11:33:54,265 - scraper - INFO - Fetching Annual Report & CG Report links (Year: 2024, Company: FARM FRESH BERHAD)...
2025-08-03 11:34:02,842 - scraper - INFO - Found 1 announcement links
2025-08-03 11:34:02,844 - scraper - INFO - Processing 1 links with 5 workers
2025-08-03 11:34:36,578 - scraper - INFO - Original Annual Report & CG Report for FARM_FRESH_BERHAD (5 PDFs) is in the database


In [None]:
manager.run_one(
    rtype = ReportType.IPO,
    company_name = "FARM FRESH BERHAD"
)

In [None]:
manager.run_one(
    rtype = ReportType.ANNUAL,
    year = 2024,
)

## Delete reports from MongoDB according to Company, Year, ReportType

In [None]:
deleted_count = await manager.storage.delete_reports(
        company = "FARM_FRESH_BERHAD",
        report_type = ReportType.ANNUAL,
        year = 2024
)
