# Pipeline of Automated Ontology Construction

## Initialization

In [1]:
import logging
import os
import nest_asyncio
from dotenv import load_dotenv

In [2]:
from ogmyrag.my_logging import configure_logger
from ogmyrag.ontology_construction import (
   StorageConfig,
   OntologyConstructionSystem
)

In [3]:
ontology_construction_logger = configure_logger(name='ontology_construction',log_level=logging.DEBUG, log_file='logs/ontology_construction.log')
openai_logger = configure_logger(name='openai',log_level=logging.INFO, log_file='logs/openai.log')

In [4]:
# Patch event loop to support re-entry in Jupyter

nest_asyncio.apply()

In [5]:
# Load environment variables

load_dotenv(override=True)

mongo_db_uri = os.getenv("MONGO_DB_URI","")

openai_api_key = os.getenv("OPENAI_API_KEY","")

neo4j_uri = os.getenv("NEO4J_URI","")
neo4j_username = os.getenv("NEO4J_USERNAME","")
neo4j_password = os.getenv("NEO4J_PASSWORD","")

if not mongo_db_uri:
    ontology_construction_logger.error("Please set the MONGO_DB_URI environment variable.")

if not openai_api_key:
    ontology_construction_logger.error("Please set the OPENAI_API_KEY environment variable.")

if not neo4j_uri or not neo4j_username or not neo4j_password:
    ontology_construction_logger.error("Please set the NE04J_URI, NE04J_USERNAME, and NE04J_PASSWORD environment variables.")

## Setup Construction Pipeline

### Initialize Ontology Construction System

In [6]:
ontology_config: StorageConfig = {
    'connection_uri': mongo_db_uri,
    'database_name': 'ogmyrag',
    'collection_name': 'ontology'
}

cq_config: StorageConfig = {
    'connection_uri': mongo_db_uri,
    'database_name': 'ogmyrag',
    'collection_name': 'competency_questions'
}

feedback_config: StorageConfig = {
    'connection_uri': mongo_db_uri,
    'database_name': 'ogmyrag',
    'collection_name': 'feedback'
}

In [None]:
try:
   onto_system = OntologyConstructionSystem(
      ontology_purpose="to model the interrelationships and dynamics of companies listed on Malaysia's Main and ACE Markets, using data solely from Bursa Malaysia's listing requirements, prospectuses, and reports. The model emphasizes strategic and operational activities—including executives, directors, board committees, competitors, business partners, suppliers, products, services, trademarks, and technology utilization—as well as location-specific details (e.g., exports, imports, operations) and regulatory aspects (e.g., corporate governance, shareholder ownership, and compliance). The ontology drives a unidirectional, relationship-based knowledge graph for graph-based Retrieval-Augmented Generation (RAG), supporting retail investors in qualitative fundamental analysis and regulatory compliance assessment.",
      ontology_config=ontology_config,
      cq_config=cq_config,
      feedback_config=feedback_config
   )
except Exception as e:
   ontology_construction_logger.error(f"Error while creating ontology construction system: {e}")

### Generate Competency Questions

In [None]:
await onto_system.generate_competency_questions(
   personality_num=1,
   task_num=10,
   question_num=4,
)

### Construct Ontology

In [None]:
files = {
   "Section 2": "adb_prospectus_section_2.txt",
   "Section 3": "adb_prospectus_section_3.txt",
   "Section 4": "adb_prospectus_section_4.txt",
   "Section 5": "adb_prospectus_section_5.txt",
   "Section 6": "adb_prospectus_section_6.txt",
   "Section 7a": "adb_prospectus_section_7a.txt",
   "Section 7b": "adb_prospectus_section_7b.txt",
   "Section 7c": "adb_prospectus_section_7c.txt",
}


for chapter_name, filename in files.items():
    file_path = f"adb_prospectus/{filename}"
    
    with open(file_path, 'r') as file:
        source_text = file.read()

    source_text_desc = f"{chapter_name} of the prospectus of Autocount Dotcom Berhad"

    await onto_system.handle_request(
        source_text=source_text,
        document_desc=source_text_desc,
        requires_reconstruct=True
    )
    await onto_system.minimize_ontology()


In [9]:
await onto_system.minimize_ontology()

2025-05-13 22:04:27,224 - ontology_construction - INFO - OntologyComplexityReductionAgent is called
2025-05-13 22:04:27,227 - openai - INFO - Sending query to o4-mini using ResponsesAPI
2025-05-13 22:07:12,988 - openai - INFO - Received response from ResponsesAPI:
 Response(id='resp_6823516dcad8819899d584b4fd8d9e1e0571acbf1a26d837', created_at=1747145070.0, error=None, incomplete_details=None, instructions=None, metadata={}, model='o4-mini-2025-04-16', object='response', output=[ResponseReasoningItem(id='rs_6823518ecf2c8198820863becc9515e30571acbf1a26d837', summary=[], type='reasoning', status=None), ResponseOutputMessage(id='msg_6823520d2a8c8198ba431dd9ba5414510571acbf1a26d837', content=[ResponseOutputText(annotations=[], text='{"updated_ontology":{"entities":{"Person":{"definition":"An individual named in company documents, such as directors, officers, employees, or advisors.","llm-guidance":"Extract full personal names in the order presented, excluding honorifics (e.g., \'Mr.\', \'D

In [8]:
await onto_system.enhance_ontology_clarity()

2025-05-13 22:00:47,198 - ontology_construction - INFO - OntologyClarityEnhancementAgent is called
2025-05-13 22:00:47,651 - openai - INFO - Sending query to o4-mini using ResponsesAPI
2025-05-13 22:02:42,040 - openai - INFO - Received response from ResponsesAPI:
 Response(id='resp_6823509011e48198b1d9da48be0e48ee0da0fb934beb5d96', created_at=1747144848.0, error=None, incomplete_details=None, instructions=None, metadata={}, model='o4-mini-2025-04-16', object='response', output=[ResponseReasoningItem(id='rs_68235090ff6c819899e70f3c7c78c6280da0fb934beb5d96', summary=[], type='reasoning', status=None), ResponseOutputMessage(id='msg_682350d75668819882f215b511984ef40da0fb934beb5d96', content=[ResponseOutputText(annotations=[], text='{\n  "updated_ontology": {\n    "entities": {\n      "Person": {\n        "definition": "An individual named in company documents, such as directors, officers, employees, or advisors.",\n        "llm-guidance": "Extract full personal names in the order presented

In [None]:
from ogmyrag.util import get_formatted_ontology

ontology_construction_logger.info(get_formatted_ontology(onto_system.get_current_onto()))

In [None]:
current_onto = onto_system.get_current_onto()

current_onto["relationships"]["exportsTo"] = {
   "source": "Company",
   "target": "Location",
   "llm-guidance": "Apply when a company is explicitly stated to export products or services to a named location (e.g., 'ABC Berhad exports semiconductors to Japan') in prospectuses, annual reports, or regulatory filings.",
   "is_stable": "FALSE",
   "examples":  ["ABC Berhad exportsTo Japan", "XYZ Berhad exportsTo Singapore"]
}

current_onto["note"] = "Manual modification facilitated by Grok"
await onto_system.create_onto_manually(new_onto=current_onto)