# Pipeline of Automated Ontology Construction

## Initialization

In [None]:
import logging
import os
import nest_asyncio
from dotenv import load_dotenv
from motor.motor_asyncio import AsyncIOMotorClient


In [None]:
from ogmyrag.my_logging import configure_logger
from ogmyrag.util import get_formatted_ontology
from ogmyrag.base import MongoStorageConfig
from ogmyrag.ontology_construction import OntologyConstructionSystem

In [None]:
# Setup logging

ontology_construction_logger = configure_logger(name='ontology_construction',log_level=logging.DEBUG, log_file='logs/ontology_construction.log')
openai_logger = configure_logger(name='openai',log_level=logging.INFO, log_file='logs/openai.log', to_console=False)
mongo_logger = configure_logger(name='mongodb',log_level=logging.DEBUG, log_file='logs/mongodb.log', to_console=False)

In [None]:
# Patch event loop to support re-entry in Jupyter

nest_asyncio.apply()

In [None]:
# Load environment variables

load_dotenv(override=True)

mongo_db_uri = os.getenv("MONGO_DB_URI","")

openai_api_key = os.getenv("OPENAI_API_KEY","")

neo4j_uri = os.getenv("NEO4J_URI","")
neo4j_username = os.getenv("NEO4J_USERNAME","")
neo4j_password = os.getenv("NEO4J_PASSWORD","")

if not mongo_db_uri:
    ontology_construction_logger.error("Please set the MONGO_DB_URI environment variable.")

if not openai_api_key:
    ontology_construction_logger.error("Please set the OPENAI_API_KEY environment variable.")

if not neo4j_uri or not neo4j_username or not neo4j_password:
    ontology_construction_logger.error("Please set the NE04J_URI, NE04J_USERNAME, and NE04J_PASSWORD environment variables.")

## Setup Construction Pipeline

### Initialize Ontology Construction System

In [None]:
ontology_config: MongoStorageConfig = {
    'database_name': 'ogmyrag',
    'collection_name': 'ontology_v3'
}

ontology_evaluation_config: MongoStorageConfig = {
    'database_name': 'ogmyrag',
    'collection_name': 'ontology_evaluation'
}

In [None]:
mongo_client = AsyncIOMotorClient(
    mongo_db_uri,
    serverSelectionTimeoutMS=5000,
)

try:
    onto_system = OntologyConstructionSystem(
        mongo_client=mongo_client,
        ontology_purpose="The ontology aims to model the interrelationships and operational dynamics of companies listed on Malaysia’s Main and ACE Markets. Its primary objective is to capture and represent relational knowledge that enables inference of implicit insights from explicit data, supporting retail investors in conducting fundamental qualitative analysis.\nSpecifically, the ontology will cover three core dimensions:\n1. Strategic Aspects – such as partnerships, supply chain dependencies, market positioning, and competitive relationships.\n2. Operational Aspects – including executive roles, board and committee structures, business segments, and core activities.\nThe ontology is designed to power a knowledge graph where relationships between entities are prioritized, enabling multi-layered reasoning about company strategy and operations context to aid investment decision-making.",
        ontology_config=ontology_config,
        ontology_evaluation_config=ontology_evaluation_config
    )
except Exception as e:
    ontology_construction_logger.error(
        f"Error while creating ontology construction system: {e}"
    )

### Construct Ontology

In [None]:
files = [
    "adb_prospectus_section_1.txt",
    # "adb_prospectus_section_2.txt",
    # "adb_prospectus_section_3.txt",
    # "adb_prospectus_section_4.txt",
    # "adb_prospectus_section_5.txt",
    # "adb_prospectus_section_6.txt",
    # "adb_prospectus_section_7a.txt",
    # "adb_prospectus_section_7b.txt",
    # "adb_prospectus_section_7c.txt",
    # "adb_prospectus_section_8.txt",
    # "adb_prospectus_section_9a.txt",
    # "adb_prospectus_section_9b.txt",
    # "adb_prospectus_section_10.txt",
    # "adb_prospectus_section_11.txt",
    # "adb_prospectus_section_12a.txt",
    # "adb_prospectus_section_12b.txt",
    # "adb_prospectus_section_12c.txt",
]

source_text_constraints = """
The following key-value pairs aid in interpreting the source text. Apply these mappings when extracting and storing entities and relationships to maintain consistency and accuracy. This means that if your extraction involves translating a key into its representative value—for example, if the key is `CYT` and the value is `Choo Yan Tiee, the Promoter, Specified Shareholder, major shareholder, Executive Director and Managing Director of our Company`—then instead of extracting `CYT` as the entity name, you should extract `Choo Yan Tiee` as the entity name.
	1. ADB: Autocount Dotcom Berhad (Registration No.: 202201006885 (1452582-U))
	2. COMPANY: Autocount Dotcom Berhad (Registration No.: 202201006885 (1452582-U))
	3. ELECTRONIC PROSPECTUS: Copy of this Prospectus that is issued, circulated or disseminated via the internet and/or an electronic storage medium
	4. Bursa Securities: Bursa Malaysia Securities Berhad (Registration No.: 200301033577 (635998-W))
	5. Website: www.bursamalaysia.com
	6. Malacca Securities: Malacca Securities Sdn Bhd (Registration No.: 197301002760 (16121-H))
	7. IPO: Initial public offering comprising the Public Issue and Offer for Sale, collectively
	8. SHARES: Ordinary shares in ADB
	9. GROUP: ADB and its subsidiaries, namely ACSB, ACSPL, AOTGSB and ASSB, collectively
	10. SC: Securities Commission Malaysia
	11. CMSA: Capital Markets and Services Act 2007
	12. Third Party Internet Sites: Third party internet sites referenced in this Prospectus
	13. we: Company or Group or any member of Group
	14. us: Company or Group or any member of Group
	15. our: Company or Group or any member of Group
	16. ourselves: Company or Group or any member of Group
	17. Management: Executive Directors and Key Senior Management as disclosed in this Prospectus
	18. Government: Government of Malaysia
	19. RM: Ringgit Malaysia
	20. sen: Lawful currency of Malaysia (sen)
	21. ACSB: Auto Count Sdn Bhd (Registration No.: 200601031841 (751600-A))
	22. ACSPL: Autocount (S) Pte Ltd (Registration No.: 201713604G)
	23. ADB Group: ADB and its subsidiaries, namely ACSB, ACSPL, AOTGSB and ASSB, collectively
	24. AOTGSB: Autocount On The Go Sdn Bhd (Registration No.: 201601008185 (1179113-V))
	25. ASSB: Autocount Software Sdn Bhd (Registration No.: 202001018079 (1374399-V))
	26. ACE Market: ACE Market of Bursa Securities
	27. Acquisition of ACSB: Acquisition by ADB of the entire equity interest of ACSB from the previous shareholders of ACSB i.e. CCP, CYT, Liaw Huah Seng, Lim Kim Seng, Lee Chern Siong, Tey Wah Sheng and Ng Boon Thye for a purchase consideration of RM8,007,509.00, satisfied via 456,914,998 ADB Shares, which was completed on 20 June 2022
	28. Acquisition of AOTGSB: Acquisition by ACSB of the entire equity interest of AOTGSB from the previous shareholders of AOTGSB, i.e. CCP, CYT, Tan Yeow Sing and Lai Guan Siong for a purchase consideration of RM300,000.00, satisfied via cash, which was completed on 15 April 2022
	29. Acquisition of ASSB: Acquisition by ADB of the entire equity interest of ASSB from the previous shareholders of ASSB, i.e. CCP and CYT for a purchase consideration of RM5,500.00, satisfied via cash, which was completed on 9 May 2022
	30. Acquisitions: Collectively, the Acquisition of ACSB, the Acquisition of AOTGSB and the Acquisition of ASSB
	31. Act: Companies Act 2016
	32. ADA: Authorised Depository Agent, a person appointed by Bursa Depository under the Rules
	33. ADB Shares: Ordinary shares in ADB
	34. AMCL: Autocount (Myanmar) Company Limited (Registration No.: 111547688)
	35. Application: Application for our IPO Shares by way of Application Form, Electronic Share Application and/or Internet Share Application
	36. Application Form(s): Printed application form(s) for the application of our IPO Shares accompanying this Prospectus
	37. ASEAN: Association of Southeast Asian Nations
	38. ATM: Automated teller machine
	39. Authorised Dealer(s): Any person or entity who has a valid dealer agreement with the our Group
	40. Authorised Financial Institution(s): Authorised financial institution(s) participating in the Internet Share Application in respect of the payments for the IPO Shares
	41. Board: Board of Directors of ADB
	42. Bursa Depository: Bursa Malaysia Depository Sdn Bhd (Registration No.: 198701006854 (165570-W))
	43. CA 1965: Companies Act 1965
	44. CAGR: Compounded annual growth rate
	45. CCC: Certificate of Completion and Compliance
	46. CCM: Companies Commission of Malaysia
	47. CCP: Choo Chin Peng, the Promoter, Specified Shareholder, major shareholder, Executive Director and Chairman of our Company
	48. CDS: Central Depository System
	49. CDS Account: An account established by Bursa Depository for a depositor for the recording of securities and for dealing in such securities by the depositor
	50. COS: Cost of sales
	51. Constitution: Constitution of ADB
	52. COVID-19: Novel coronavirus disease 2019, an infectious respiratory disease which first broke out in 2019
	53. CYT: Choo Yan Tiee, the Promoter, Specified Shareholder, major shareholder, Executive Director and Managing Director of our Company
	54. Depositor: A holder of the CDS Account
	55. Director(s): Member(s) of our Board and within the meaning given in Section 2(1) of the CMSA
	56. Disposal of AMCL: Disposal by ACSPL and CYT of the entire equity interest of AMCL held by them for a total disposal consideration of USD50,000.00, satisfied via cash which was completed on 12 May 2022
	57. EBIT: Earnings before interest and tax
	58. EBITDA: Earnings before interest, taxation, depreciation and amortisation
	59. Electronic Share Application: Application for our IPO Shares through a Participating Financial Institution's ATM
	60. Eligible Persons: Eligible employees of our Group and persons who have contributed to the success of our Group, collectively
	61. EPS: Earnings per Share
	62. F&B: Food and beverage
	63. Financial Years Under Review: FYE 2019, FYE 2020, FYE 2021 and FYE 2022
	64. FYE: Financial year ended/ending 31 December, as the case may be
	65. GP: Gross profit
	66. GST: Malaysian Goods and Services Tax
	67. HR: Human resource
	68. ICT: Information and Communications Technology
	69. IMR: Smith Zander International Sdn Bhd (Registration No.: 201301028298 (1058128-V))
	70. Smith Zander: Smith Zander International Sdn Bhd (Registration No.: 201301028298 (1058128-V))
	71. IMR Report: Independent Market Research Report on the financial management software industry prepared by Smith Zander
	72. Internet Participating Financial Institution(s): Participating financial institution(s) for the Internet Share Application, as listed in Section 15 of this Prospectus
	73. Internet Share Application: Application for IPO Shares through an online share application service provided by the Internet Participating Financial Institutions
	74. IPO Price: Issue / Offer Price of RM0.33 per Share under our Public Issue and Offer for Sale
	75. IPO Share(s): Collectively, the Issue Share(s) and the Offer Share(s)
	76. Issue Share(s): New Share(s) to be issued under the Public Issue
	77. Issuing House: Malaysian Issuing House Sdn Bhd (Registration No.: 199301003608 (258345-X))
	78. IT: Information Technology
	79. Key Senior Management: Key senior management and key technical personnel as set out in Section 9.3 of this Prospectus
	80. Listing: Admission of ADB to the Official List of Bursa Securities and the listing of and quotation for our entire enlarged issued share capital comprising 550,500,000 Shares on the ACE Market
	81. Listing Requirements: ACE Market Listing Requirements of Bursa Securities, as amended from time to time
	82. LPD: 15 March 2023, being the latest practicable date prior to the issuance of this Prospectus
	83. MAICSA: Malaysian Institute of Chartered Secretaries and Administrators
	84. Principal Adviser: Malacca Securities Sdn Bhd (Registration No.: 197301002760 (16121-H))
	85. Sponsor: Malacca Securities Sdn Bhd (Registration No.: 197301002760 (16121-H))
	86. Underwriter: Malacca Securities Sdn Bhd (Registration No.: 197301002760 (16121-H))
	87. Placement Agent: Malacca Securities Sdn Bhd (Registration No.: 197301002760 (16121-H))
	88. Malaysian Public: Citizens of Malaysia and companies, societies, co-operatives and institutions incorporated or organised under the laws of Malaysia but excludes Directors of our Group, our substantial shareholders and persons connected with them
	89. Market Day: Any day on which Bursa Securities is open for trading of securities
	90. MCCG: Malaysian Code on Corporate Governance
	91. MCO: Movement control order under the Prevention and Control of Infectious Diseases Act 1988 and the Police Act 1967
	92. MDEC: Malaysia Digital Economy Corporation
	93. MFRS: Malaysian Financial Reporting Standards
	94. MSC: Multimedia Super Corridor
	95. NA: Net assets
	96. NBV: Net book value
	97. OASIS Office: Our office bearing postal address at B2-3A01 & B2-3A02, Meritus Tower @ Oasis Corporate Park, Oasis Damansara (Pusat Korporat Oasis) No. 2, Jalan PJU 1A/2, Ara Damansara, 47301 Petaling Jaya, Selangor Darul Ehsan
	98. Offer for Sale: Offer for sale by the Selling Shareholders of 44,040,000 Offer Shares at the IPO Price
	99. Offer Share(s): The existing Shares to be offered by the Selling Shareholders pursuant to the Offer for Sale
	100. Official List: A list specifying all securities which have been admitted for listing on Bursa Securities and not removed
	101. Participating Financial Institutions: Participating financial institutions for the Electronic Share Application as listed in Section 15 of this Prospectus
	102. PAT: Profit after taxation
	103. PBT: Profit before taxation
	104. PE Multiple: Price-to-earnings multiple
	105. Pink Form Allocation: Allocation of 11,010,000 IPO Shares for subscription by the Eligible Persons
	106. Promoters: Collectively, CCP and CYT
	107. Prospectus: This Prospectus dated 14 April 2023 in relation to our IPO
	108. Prospectus Guidelines: Prospectus Guidelines issued by the SC
	109. Public Issue: Public issue of 93,585,000 Issue Shares at the IPO Price
	110. R&D: Research and development
	111. Rules: Rules of Bursa Depository as issued under the SICDA
	112. Selling Shareholders: Collectively, CCP, CYT, Ng Boon Thye, Lim Kim Seng, Liaw Huah Seng, Tey Wah Sheng and Lee Chern Siong
	113. SICDA: Securities Industry (Central Depositories) Act 1991
	114. SME: Small and medium enterprises
	115. SOP: Standard operating procedure
	116. Specified Shareholder(s): CCP and CYT
	117. SST: Sales and service tax
	118. Technology Solutions Provider: A local technology company (at least 51.00% Malaysian owned) identified by MDEC to support Malaysian SMEs in digital adoption under the SME Business Digitalisation Grant announced in the Budget 2020
	119. Underwriting Agreement: Underwriting agreement dated 20 March 2023 entered into between our Company and the Underwriter pursuant to our IPO
	120. AUD: Australian Dollar
	121. BND: Brunei Dollar
	122. SGD: Singapore Dollar
	123. USD: United States Dollar
	124. %: Per centum
	125. Application Programming Interface: A software intermediary that enables integration between different software and/or IT systems
	126. API: A software intermediary that enables integration between different software and/or IT systems
	127. Back-end: A core component of a software that stores and processes data input by users, through its processing mechanisms to perform the intended functions of the software
	128. Cloud-native software: Cloud native computing is an approach in software development that utilises cloud computing to build and run scalable applications in modern and dynamic environments
	129. Dongle: A USB device used to store a software for use by users, whereas users are required to plug the dongle into a computer device to access the software
	130. Electronic commerce: The sale and purchase of products and/or services over the internet
	131. e-commerce: The sale and purchase of products and/or services over the internet
	132. Electronic wallet: An application that runs on mobile devices to access electronic money and enable financial transactions to be made online
	133. e-wallet: An application that runs on mobile devices to access electronic money and enable financial transactions to be made online
	134. File-based system: A data storage tool in a software where each component of the software has its own storage medium to store data separately. It is suitable for storage of data but has limited complex functions such as, amongst others, handling large data size and complex data or preventing duplication of data, hence resulting in inefficiency in data consolidation, and may cause data inconsistency which burden the storage capacity
	135. Financial management software: A computer program designed to manage a business' financial related activities such as keeping track of income, expenses, assets, liabilities and payroll, as well as managing financial transactions with internal parties (e.g., employees) and external parties (e.g. customers and suppliers)
	136. Front-end: A component of a software that serves as a platform for users to use the software (e.g., input data)
	137. Hardware: A physical component that is used to run software
	138. Off-the-shelf software: A standard software that is available for sale and use immediately and is not custom-developed
	139. On-premise software: A type of software which is installed and operated from an in-house server or computing infrastructure. The software can be accessed through a closed network within the in-house server or computing infrastructure
	140. Parameter: A variable or set of rules in data analytics and data presentation that analyse data to produce a meaningful outcome
	141. POS: POS system is a computerised network that consists of the main computer linked with several checkout terminals and supported by different hardware features starting from barcode scanners and ending with card payment terminals
	142. Software-as-a-service: A software distribution model where the software is managed and hosted by software principals and users subscribe for the usage of the software and access it through the internet
	143. SaaS: A software distribution model where the software is managed and hosted by software principals and users subscribe for the usage of the software and access it through the internet
	144. Software: A set of instructions and/or programs that are readable by hardware such as computers, to perform its functions
	145. Software licence identifier: A set of alphanumeric codes generated by software principals and is used for software installations on computer devices by customers
	146. Structured Query Language: A computer language used to communicate with data stored in the relational database management system. It allows users to instruct the database on how it should store and manage (e.g., insert, delete, and update) the data through a query mechanism
	147. SQL: A computer language used to communicate with data stored in the relational database management system. It allows users to instruct the database on how it should store and manage (e.g., insert, delete, and update) the data through a query mechanism
	148. Troubleshooting: A form of problem solving, often applied to identify and fix technical issues found in machines, computers and software systems
	149. User interface: A space in which interactions between users and computer systems occur
   150. Vertical solution: A software solution specifically designed to suit different business nature and needs of customers in each industry
"""

for filename in files:
    file_path = f"adb_prospectus/{filename}"

    with open(file_path, "r") as file:
        source_text = file.read()

    try:
        await onto_system.extend_ontology(
            source_text=source_text,
            source_text_constraints=source_text_constraints,
            openai_model="o4-mini",
        )
    except Exception as e:
        ontology_construction_logger.error(f"Error while extending ontology: {e}")

In [None]:
try:
    evaluation_report = await onto_system.get_ontology_evaluation_report(
        openai_model="gpt-5-mini",
    )
    
    await onto_system.enhance_ontology(evaluation_feedback=evaluation_report, openai_model="gpt-5-mini")
except Exception as e:
    ontology_construction_logger.error(f"Error while enhancing ontology: {e}")

### Log Current Ontology

In [None]:
ontology_construction_logger.info(get_formatted_ontology(data=onto_system.get_current_onto()))