# Pipeline of Automated Ontology Construction

## Initialization

In [None]:
import logging
import os
import nest_asyncio
from dotenv import load_dotenv

In [None]:
from ogmyrag.my_logging import configure_logger
from ogmyrag.util import get_formatted_ontology
from ogmyrag.base import MongoStorageConfig
from ogmyrag.ontology_construction import OntologyConstructionSystem

In [None]:
# Setup logging

ontology_construction_logger = configure_logger(name='ontology_construction',log_level=logging.DEBUG, log_file='logs/ontology_construction.log')
openai_logger = configure_logger(name='openai',log_level=logging.INFO, log_file='logs/openai.log', to_console=False)
mongo_logger = configure_logger(name='mongodb',log_level=logging.DEBUG, log_file='logs/mongodb.log', to_console=False)

In [None]:
# Patch event loop to support re-entry in Jupyter

nest_asyncio.apply()

In [None]:
# Load environment variables

load_dotenv(override=True)

mongo_db_uri = os.getenv("MONGO_DB_URI","")

openai_api_key = os.getenv("OPENAI_API_KEY","")

neo4j_uri = os.getenv("NEO4J_URI","")
neo4j_username = os.getenv("NEO4J_USERNAME","")
neo4j_password = os.getenv("NEO4J_PASSWORD","")

if not mongo_db_uri:
    ontology_construction_logger.error("Please set the MONGO_DB_URI environment variable.")

if not openai_api_key:
    ontology_construction_logger.error("Please set the OPENAI_API_KEY environment variable.")

if not neo4j_uri or not neo4j_username or not neo4j_password:
    ontology_construction_logger.error("Please set the NE04J_URI, NE04J_USERNAME, and NE04J_PASSWORD environment variables.")

## Setup Construction Pipeline

### Initialize Ontology Construction System

In [None]:
ontology_config: MongoStorageConfig = {
    'connection_uri': mongo_db_uri,
    'database_name': 'ogmyrag',
    'collection_name': 'ontology_v2'
}

In [None]:
try:
    onto_system = OntologyConstructionSystem(
        ontology_purpose="The ontology aims to model the interrelationships and operational dynamics of companies listed on Malaysia's Main and ACE Markets. It is constructed exclusively from structured and semi-structured data found in Bursa Malaysia's listing requirements, company prospectuses, and official reports.\nIts primary goal is to support retail investors in performing qualitative fundamental analysis by capturing a broad range of business-relevant elements. These include strategic factors such as market positioning, partnerships, supply chains, and competition; operational structures such as executive roles, board committees, and business activities; as well as regulatory aspects including shareholder ownership, corporate governance, and compliance practices.\nRather than limiting the scope to predefined categories, the ontology should be designed to adaptively extract and organize any concept or relationship from the source documents that can contribute to a deeper understanding of a company's structure, behavior, or risk profile.\nThe ontology drives a unidirectional, relationship-centric knowledge graph, which is used as the core schema for graph-based Retrieval-Augmented Generation (RAG) systems focused on investor-oriented analysis.",
        ontology_config=ontology_config,
    )
except Exception as e:
    ontology_construction_logger.error(
        f"Error while creating ontology construction system: {e}"
    )

### Construct Ontology

In [None]:
files = [
    # "adb_prospectus_section_1.txt",
    "adb_prospectus_section_2.txt",
    # "adb_prospectus_section_9b.txt",
    # "adb_prospectus_section_10.txt",
    # "adb_prospectus_section_11.txt",
    # "adb_prospectus_section_12.txt",
]

for filename in files:
    file_path = f"adb_prospectus/{filename}"

    with open(file_path, "r") as file:
        source_text = file.read()

    await onto_system.handle_request(source_text=source_text)

In [None]:

ontology_construction_logger.info(get_formatted_ontology(data=onto_system.get_current_onto()))