# Pipeline of Automated Ontology Construction

## Initialize Environment

In [None]:
import logging
import os
import json
import nest_asyncio
from dotenv import load_dotenv
from motor.motor_asyncio import AsyncIOMotorClient
from ogmyrag.my_logging import configure_logger
from ogmyrag.util import get_formatted_ontology, fetch_reports_along_with_constraints
from ogmyrag.base import MongoStorageConfig
from ogmyrag.llm import OpenAIAsyncClient
from ogmyrag.storage import AsyncMongoDBStorage
from ogmyrag.ontology_construction import OntologyConstructionSystem

# Setup logging
ontology_construction_logger = configure_logger(name='ontology_construction',log_level=logging.DEBUG, log_file='logs/ontology_construction.log')
openai_logger = configure_logger(name='openai',log_level=logging.INFO, log_file='logs/openai.log', to_console=False)
mongo_logger = configure_logger(name='mongodb',log_level=logging.DEBUG, log_file='logs/mongodb.log', to_console=False)

# Patch event loop to support re-entry in Jupyter
nest_asyncio.apply()

# Load environment variables
load_dotenv(override=True)

mongo_db_uri = os.getenv("MONGO_DB_URI","")
mongo_db_uri_reports = os.getenv("MONGO_DB_URI_REPORTS","")
openai_api_key = os.getenv("OPENAI_API_KEY","")
neo4j_uri = os.getenv("NEO4J_URI","")
neo4j_username = os.getenv("NEO4J_USERNAME","")
neo4j_password = os.getenv("NEO4J_PASSWORD","")

if not mongo_db_uri:
    ontology_construction_logger.error("Please set the MONGO_DB_URI environment variable.")
    
if not mongo_db_uri_reports:
    ontology_construction_logger.error("Please set the MONGO_DB_URI_REPORTS environment variable.")

if not openai_api_key:
    ontology_construction_logger.error("Please set the OPENAI_API_KEY environment variable.")

if not neo4j_uri or not neo4j_username or not neo4j_password:
    ontology_construction_logger.error("Please set the NE04J_URI, NE04J_USERNAME, and NE04J_PASSWORD environment variables.")

## Set Up Ontology Construction Pipeline

### Initialize Variables for Database Connection

In [None]:
ontology_config: MongoStorageConfig = {
    'database_name': 'ogmyrag',
    'collection_name': 'ontology_test'
}

ontology_evaluation_config: MongoStorageConfig = {
    'database_name': 'ogmyrag',
    'collection_name': 'ontology_evaluation_test'
}

company_disclosures_config: MongoStorageConfig = {
    'database_name': 'FYP',
    'collection_name': 'company_disclosures'
}

constraints_config: MongoStorageConfig = {
    'database_name': 'FYP',
    'collection_name': 'constraints'
}

mongo_client_onto = AsyncIOMotorClient(
    mongo_db_uri,
    serverSelectionTimeoutMS=5000,
)

mongo_client_reports = AsyncIOMotorClient(
    mongo_db_uri_reports,
    serverSelectionTimeoutMS=5000,
)

async_mongo_storage_reports = AsyncMongoDBStorage(client=mongo_client_reports)

### Initialize Ontology Construction System

In [None]:
try:
    onto_system = OntologyConstructionSystem(
        mongo_client=mongo_client_onto,
        ontology_purpose="The ontology aims to model the interrelationships and operational dynamics of companies listed on Malaysia’s Main and ACE Markets. Its primary objective is to capture and represent relational knowledge that enables inference of implicit insights from explicit data, supporting retail investors in conducting fundamental qualitative analysis.\nSpecifically, the ontology will cover three core dimensions:\n1. Strategic Aspects – such as partnerships, supply chain dependencies, market positioning, and competitive relationships.\n2. Operational Aspects – including executive roles, board and committee structures, business segments, and core activities.\nThe ontology is designed to power a knowledge graph where relationships between entities are prioritized, enabling multi-layered reasoning about company strategy and operations context to aid investment decision-making.",
        ontology_config=ontology_config,
        ontology_evaluation_config=ontology_evaluation_config,
        llm_client=OpenAIAsyncClient(api_key=openai_api_key),
        agent_configs={
            "OntologyConstructionAgent": {
                "model": "o4-mini",
                "text": {"format": {"type": "text"}},
                "reasoning": {"effort": "medium"},
                "max_output_tokens": 100000,
            },
            "OntologyEvaluationAgent": {
                "model": "o4-mini",
                "text": {"format": {"type": "text"}},
                "reasoning": {"effort": "medium"},
                "max_output_tokens": 100000,
            },
            "OntologyEnhancementAgent": {
                "model": "o4-mini",
                "text": {"format": {"type": "text"}},
                "reasoning": {"effort": "medium"},
                "max_output_tokens": 100000,
            },
        },
    )

except Exception as e:
    ontology_construction_logger.error(
        f"Error while creating ontology construction system: {e}"
    )

### Extend the Ontology

In [None]:
# Fetch the reports to be processed
files_to_process = await fetch_reports_along_with_constraints(
    async_mongo_storage_reports=async_mongo_storage_reports,
    company_disclosures_config=company_disclosures_config,
    constraints_config=constraints_config,
    from_company="CABNET_HOLDINGS_BERHAD",
    type="PROSPECTUS",
    published_at="2017",
)

ontology_construction_logger.info(
    f"Files to be processed:\n{json.dumps(files_to_process,indent=2)}"
)

for file_name, file_content in files_to_process["files"].items():
    ontology_construction_logger.info(f"Extending ontology using: {file_name}")
    try:
        await onto_system.extend_ontology(
            source_text=file_content,
            source_text_constraints=files_to_process["constraints"],
            openai_model="o4-mini",
        )
    except Exception as e:
        ontology_construction_logger.error(f"Error while extending ontology: {e}")

### Enhance the Ontology

In [7]:
try:
    await onto_system.enhance_ontology_via_loop(openai_model="o4-mini")
except Exception as e:
    ontology_construction_logger.error(f"Error while enhancing ontology: {e}")

2025-09-08 10:40:05,462 - ontology_construction - INFO - OntologyConstructionSystem:
Enhancing ontology via loop. Current iteration 1
2025-09-08 10:40:05,625 - ontology_construction - INFO - OntologyEvaluationAgent is called
2025-09-08 10:40:05,627 - ontology_construction - DEBUG - System Prompt:


You are an ontology evaluation agent. Your task is to evaluate the given ontology according to the criteria defined below.

Guidelines:
   1. Evaluation Principles
      - You must evaluate the given ontology from two perspectives:
         1. High-Level Evaluation (ontology as a whole)
            - Goals (priority order):
               1. Purpose-oriented: Every entity and relationship must support the ontology’s stated purpose.
               2. Compact: No redundant or overlapping entity/relationship types. Avoid bidirectional duplication. Ensure each entity type is connected to at least one relationship. Remove any entity not connected to a relationship.
               3. Robust: Flexi

### Log the Ontology

In [8]:
current_onto = await onto_system.get_current_onto()
ontology_construction_logger.info(get_formatted_ontology(data=current_onto))

2025-09-08 10:49:01,277 - ontology_construction - INFO - Entities:
1. Company
- definition: A publicly listed corporate entity on Malaysia’s Main or ACE Market.
- llm-guidance: When to use: Referencing companies listed on Bursa Malaysia’s Main or ACE Market. Format: Full company name.
- examples: Autocount Dotcom Berhad

2. Person
- definition: A natural person who interacts with a company, including corporate officers, directors, advisors, investors, or other individuals.
- llm-guidance: When to use: Identifying any individual related to a listed company by name. Format: Full personal name.
- examples: Choo Chin Peng, Ng Wan Peng

3. Committee
- definition: A formal committee established by a company’s board to oversee specific functions such as audit, remuneration, or nomination.
- llm-guidance: When to use: Referring to board committees by their official names. Format: Full committee name.
- examples: Audit and Risk Management Committee, Remuneration Committee

4. Market
- definitio