# Pipeline of Automated Ontology Construction

## Initialization

In [1]:
import logging
import os
import nest_asyncio
from dotenv import load_dotenv

In [None]:
from ogmyrag.my_logging import configure_logger
from ogmyrag.base import MongoStorageConfig
from ogmyrag.ontology_construction import OntologyConstructionSystem

In [None]:
# Setup logging

ontology_construction_logger = configure_logger(name='ontology_construction',log_level=logging.DEBUG, log_file='logs/ontology_construction.log')
openai_logger = configure_logger(name='openai',log_level=logging.INFO, log_file='logs/openai.log', to_console=False)
mongo_logger = configure_logger(name='mongodb',log_level=logging.DEBUG, log_file='logs/mongodb.log', to_console=False)

In [4]:
# Patch event loop to support re-entry in Jupyter

nest_asyncio.apply()

In [5]:
# Load environment variables

load_dotenv(override=True)

mongo_db_uri = os.getenv("MONGO_DB_URI","")

openai_api_key = os.getenv("OPENAI_API_KEY","")

neo4j_uri = os.getenv("NEO4J_URI","")
neo4j_username = os.getenv("NEO4J_USERNAME","")
neo4j_password = os.getenv("NEO4J_PASSWORD","")

if not mongo_db_uri:
    ontology_construction_logger.error("Please set the MONGO_DB_URI environment variable.")

if not openai_api_key:
    ontology_construction_logger.error("Please set the OPENAI_API_KEY environment variable.")

if not neo4j_uri or not neo4j_username or not neo4j_password:
    ontology_construction_logger.error("Please set the NE04J_URI, NE04J_USERNAME, and NE04J_PASSWORD environment variables.")

## Setup Construction Pipeline

### Initialize Ontology Construction System

In [None]:
ontology_config: MongoStorageConfig = {
    'connection_uri': mongo_db_uri,
    'database_name': 'ogmyrag',
    'collection_name': 'ontology'
}

cq_config: MongoStorageConfig = {
    'connection_uri': mongo_db_uri,
    'database_name': 'ogmyrag',
    'collection_name': 'competency_questions'
}

feedback_config: MongoStorageConfig = {
    'connection_uri': mongo_db_uri,
    'database_name': 'ogmyrag',
    'collection_name': 'feedback'
}

In [7]:
try:
   onto_system = OntologyConstructionSystem(
      ontology_purpose="to model the interrelationships and dynamics of companies listed on Malaysia's Main and ACE Markets, using data solely from Bursa Malaysia's listing requirements, prospectuses, and reports. The model emphasizes strategic and operational activities—including executives, directors, board committees, competitors, business partners, suppliers, products, services, trademarks, and technology utilization—as well as location-specific details (e.g., exports, imports, operations) and regulatory aspects (e.g., corporate governance, shareholder ownership, and compliance). The ontology drives a unidirectional, relationship-based knowledge graph for graph-based Retrieval-Augmented Generation (RAG), supporting retail investors in qualitative fundamental analysis and regulatory compliance assessment.",
      ontology_config=ontology_config,
      cq_config=cq_config,
      feedback_config=feedback_config
   )
except Exception as e:
   ontology_construction_logger.error(f"Error while creating ontology construction system: {e}")

### Generate Competency Questions

In [None]:
await onto_system.generate_competency_questions(
   personality_num=1,
   task_num=10,
   question_num=4,
)

### Construct Ontology

In [None]:
files = {
   "Section 8": "adb_prospectus_section_8.txt",
   "Section 9a": "adb_prospectus_section_9a.txt",
   "Section 9b": "adb_prospectus_section_9b.txt",
   "Section 10": "adb_prospectus_section_10.txt",
   "Section 11": "adb_prospectus_section_11.txt",
   "Section 12": "adb_prospectus_section_12.txt",
}

for chapter_name, filename in files.items():
    file_path = f"adb_prospectus/{filename}"
    
    with open(file_path, 'r') as file:
        source_text = file.read()

    source_text_desc = f"{chapter_name} of the prospectus of Autocount Dotcom Berhad"

    await onto_system.handle_request(
        source_text=source_text,
        document_desc=source_text_desc,
        requires_reconstruct=True
    )
    await onto_system.minimize_ontology()


In [None]:
await onto_system.minimize_ontology()

In [None]:
await onto_system.enhance_ontology_clarity()

In [8]:
from ogmyrag.util import get_formatted_ontology

ontology_construction_logger.info(get_formatted_ontology(data=onto_system.get_current_onto(), exclude_entity_fields="is_stable", exclude_relationship_fields="is_stable"))

2025-05-14 15:36:34,521 - ontology_construction - INFO - Entities:
  1. Person
  - definition: An individual associated with a company, including directors, officers, employees, or advisors.
  - llm-guidance: Extract full personal names in the order presented; remove honorifics (e.g., 'Mr.', 'Dr.') and post-nominal titles (e.g., 'Ph.D.', 'CPA').
  - examples: Tan Hui Mei, John Lim, Siti Aminah

  2. Company
  - definition: A legal entity or organization mentioned in official company documents, such as annual reports, prospectuses, or filings.
  - llm-guidance: Extract the full registered company name exactly as it appears in official filings; omit registration numbers, internal codes, or informal abbreviations.
  - examples: ABC Berhad, XYZ Management Company, Trustee Services Sdn Bhd

  3. BoardCommittee
  - definition: A committee formed by a company's board of directors to oversee a specific governance or operational area.
  - llm-guidance: Extract the exact names of board committee