# Pipeline of Automated Ontology Construction

## Initialization

In [1]:
import logging
import os
import time
import asyncio
import nest_asyncio
from dotenv import load_dotenv

In [2]:
from ogmyrag.prompts import PROMPT
from ogmyrag.llm import fetch_responses_openai
from ogmyrag.my_logging import configure_logger
from ogmyrag.ontology_construction import (
   OntologyStorageConfig,
   CompetencyQuestionsStorageConfig,
   OntologyConstructionSystem
)

In [3]:
ontology_construction_logger = configure_logger(name='ontology_construction',log_level=logging.DEBUG, log_file='logs/ontology_construction.log')
openai_logger = configure_logger(name='openai',log_level=logging.INFO, log_file='logs/openai.log')

In [4]:
# Patch event loop to support re-entry in Jupyter

nest_asyncio.apply()

In [5]:
# Load environment variables

load_dotenv(override=True)

mongo_db_uri = os.getenv("MONGO_DB_URI","")

openai_api_key = os.getenv("OPENAI_API_KEY","")

neo4j_uri = os.getenv("NEO4J_URI","")
neo4j_username = os.getenv("NEO4J_USERNAME","")
neo4j_password = os.getenv("NEO4J_PASSWORD","")

if not mongo_db_uri:
    ontology_construction_logger.error("Please set the MONGO_DB_URI environment variable.")

if not openai_api_key:
    ontology_construction_logger.error("Please set the OPENAI_API_KEY environment variable.")

if not neo4j_uri or not neo4j_username or not neo4j_password:
    ontology_construction_logger.error("Please set the NE04J_URI, NE04J_USERNAME, and NE04J_PASSWORD environment variables.")

In [None]:
with open('main_market_listing_requirements/chapter_2.txt', 'r') as file:
   content = file.read()
   print(content)

In [None]:
ontology_construction_prompt = PROMPT["ONTOLOGY_CONSTRUCTION"].format(
   document_desc="Chapter 2 of the Main Market Listing Requirements of Bursa Malaysia",
   ontology_purpose="to model the interrelationships and dynamics of companies listed on the Main and ACE Markets in Malaysia, using only information sourced from listing requirements, company prospectuses, and company reports available through Bursa Malaysia. The model emphasizes strategic aspects, including—but not limited to—key stakeholders, major competitors, business partners, suppliers, products and services, as well as location-specific details such as export, import, operations, and other relevant activities. This ontology powers a knowledge graph that enables graph-based Retrieval-Augmented Generation (RAG) to assist retail investors in conducting fundamental analysis",
   ontology="Empty",
   document_constraints="None",
)

In [None]:
print(ontology_construction_prompt)

In [None]:
ontology_q_generation_prompt = PROMPT["ONTOLOGY_CQ_GENERATION"].format(
   ontology_purpose="to model the interrelationships and dynamics of companies listed on the Main and ACE Markets in Malaysia, using only information sourced from listing requirements, company prospectuses, and company reports available through Bursa Malaysia. It focuses on strategic aspects, including—but not limited to—key stakeholders, major competitors, business partners, suppliers, and the products and services offered. This ontology powers a knowledge graph that enables graph-based Retrieval-Augmented Generation (RAG) to assist retail investors in conducting fundamental analysis",
   personality_num=1,
   task_num=10,
   question_num=4
)

In [None]:
ontology_construction_logger.info(ontology_q_generation_prompt)

In [None]:
async def test_concurrent_calls():
    start_time = time.perf_counter()

    async def call_function(i):
        print(f"Task {i} started")
        try:
            result = await fetch_responses_openai(
                model="gpt-4o-mini",
                user_prompt=f"Hello from task {i}",
                system_prompt="You are a helpful assistant.",
                max_output_tokens=100,
                temperature=0.7,
            )
            print(f"Task {i} completed with result: {result.output_text}")
        except Exception as e:
            print(f"Task {i} failed: {e}")

    # Launch 10 concurrent tasks
    tasks = [call_function(i) for i in range(10)]
    await asyncio.gather(*tasks)

    total_time = time.perf_counter() - start_time
    print(f"Total time for 10 tasks: {total_time:.2f} seconds")

In [None]:
await test_concurrent_calls()

## Setup Construction Pipeline

### Initialize Competency Questions

In [None]:
ontology_purpose="to model the interrelationships and dynamics of companies listed on the Main and ACE Markets in Malaysia, using only information sourced from listing requirements, company prospectuses, and company reports available through Bursa Malaysia. The model emphasizes strategic aspects, including—but not limited to—executives, directors, management team members, major competitors, business partners, suppliers, products and services, as well as location-specific details such as exports, imports, operations, and other relevant activities. This ontology powers a knowledge graph that enables graph-based Retrieval-Augmented Generation (RAG) to assist retail investors in conducting fundamental analysis."

In [None]:
ontology_config: OntologyStorageConfig = {
    'connection_uri': mongo_db_uri,
    'database_name': 'ogmyrag',
    'collection_name': 'ontology'
}

In [None]:
cq_config: CompetencyQuestionsStorageConfig = {
    'connection_uri': mongo_db_uri,
    'database_name': 'ogmyrag',
    'collection_name': 'competency_questions'
}

In [None]:
try:
   onto_system = OntologyConstructionSystem(
      ontology_purpose=ontology_purpose,
      ontology_config=ontology_config,
      cq_config=cq_config
   )
except Exception as e:
   ontology_construction_logger.error(f"Error while creating ontology construction system: {e}")

In [None]:
await onto_system.generate_competency_questions(
   personality_num=1,
   task_num=10,
   question_num=4,
   model="gpt-4.1-mini"
)