## Initializing Project

In [1]:
import logging
import os
from dotenv import load_dotenv
from pathlib import Path
from pprint import pprint

In [2]:
# Setting up logging
from ogmyrag.my_logging import configure_logger

app_logger = configure_logger(name='og-myrag',log_level=logging.DEBUG, log_file='logs/app.log')
test_logger = configure_logger(name='mongodb',log_level=logging.DEBUG, log_file='logs/mongodb.log', to_console=False)

In [3]:
load_dotenv(override=True)

True

In [4]:
# Testing connection to mongodb

from ogmyrag.storage import MongoDBStorage

mongo_db_uri = os.getenv("MONGO_DB_URI","")

try:
    mongo = MongoDBStorage(mongo_db_uri)
    mongo.use_database("ogmyrag")
    mongo.use_collection("company_disclosures")
except Exception as e:
    app_logger.error(f"Could not connect to MongoDB: {str(e)}")

## Uploading Documents to MongoDB

In [None]:
from ogmyrag.util import get_formatted_company_data

folder_name = "adb_prospectus"
folder_path = Path.cwd() / folder_name

if not folder_path.is_dir():
    app_logger.info(f"Folder '{folder_name}' not found in project root.")
    app_logger.info(f"Project is terminated")
else:
    txt_files = list(folder_path.glob("*.txt"))
    if not txt_files:
        app_logger.info("No .txt files found in the folder.")
    else:
        for txt_file in txt_files:
            current_data = get_formatted_company_data(
                txt_file.read_text(encoding='utf-8'),
                txt_file.stem,
                "prospectus",
                "Autocount Dotcom Berhad"
             )
            existing_documents = mongo.read_documents({"name": current_data["name"]})
            if existing_documents:
                app_logger.info(f"Document with name '{current_data['name']}' already exists in the database.")
            else:
                try:
                    document_id = mongo.create_document(current_data)
                    app_logger.info(f"Inserted document with name '{current_data['name']}' and id '{document_id}' into the database.")
                except Exception as e:
                    app_logger.error(f"Error inserting document with name '{current_data['name']}': {str(e)}")

2025-04-15 12:59:30,084 - og-myrag - INFO - Document with name 'ADB_PROSPECTUS_SECTION_1' already exists in the database.
2025-04-15 12:59:30,166 - og-myrag - INFO - Document with name 'ADB_PROSPECTUS_SECTION_10' already exists in the database.
2025-04-15 12:59:30,236 - og-myrag - INFO - Document with name 'ADB_PROSPECTUS_SECTION_2' already exists in the database.
2025-04-15 12:59:30,328 - og-myrag - INFO - Document with name 'ADB_PROSPECTUS_SECTION_3' already exists in the database.
2025-04-15 12:59:31,035 - og-myrag - INFO - Document with name 'ADB_PROSPECTUS_SECTION_4' already exists in the database.
2025-04-15 12:59:31,748 - og-myrag - INFO - Document with name 'ADB_PROSPECTUS_SECTION_5' already exists in the database.
2025-04-15 12:59:31,801 - og-myrag - INFO - Document with name 'ADB_PROSPECTUS_SECTION_6' already exists in the database.
2025-04-15 12:59:31,978 - og-myrag - INFO - Document with name 'ADB_PROSPECTUS_SECTION_7A' already exists in the database.
2025-04-15 12:59:32,09