# Pipeline of Graph Construction

## Initialization

In [1]:
import logging
import os
import nest_asyncio
from dotenv import load_dotenv

In [2]:
from ogmyrag.my_logging import configure_logger
from ogmyrag.base import MongoStorageConfig, Neo4jStorageConfig
from ogmyrag.graph_construction import GraphConstructionSystem

In [3]:
# Patch event loop to support re-entry in Jupyter

nest_asyncio.apply()

In [4]:
# Setup logging
graph_construction_logger = configure_logger(name='graph_construction',log_level=logging.DEBUG, log_file='logs/graph_construction.log')
ontology_construction_logger = configure_logger(name='ontology_construction',log_level=logging.DEBUG, log_file='logs/ontology_construction.log')

openai_logger = configure_logger(name='openai',log_level=logging.DEBUG, log_file='logs/openai.log', to_console=False)
mongo_logger = configure_logger(name='mongodb',log_level=logging.DEBUG, log_file='logs/mongodb.log', to_console=False)
pinecone_logger = configure_logger(name='pinecone',log_level=logging.DEBUG, log_file='logs/pinecone.log', to_console=False)
neo4j_logger = configure_logger(name='neo4j',log_level=logging.DEBUG, log_file='logs/neo4j.log', to_console=False)

In [5]:
# Load environment variables

load_dotenv(override=True)

mongo_db_uri = os.getenv("MONGO_DB_URI","")

openai_api_key = os.getenv("OPENAI_API_KEY","")

pinecone_api_key = os.getenv("PINECONE_API_KEY","")
pinecone_environment = os.getenv("PINECONE_ENVIRONMENT","")
pinecone_cloud = os.getenv("PINECONE_CLOUD","")
pinecone_metric = os.getenv("PINECONE_METRIC", "")
pinecone_dimensions = os.getenv("PINECONE_DIMENSIONS")

neo4j_uri = os.getenv("NEO4J_URI","")
neo4j_username = os.getenv("NEO4J_USERNAME","")
neo4j_password = os.getenv("NEO4J_PASSWORD","")

if not mongo_db_uri:
    graph_construction_logger.error("Please set the MONGO_DB_URI environment variable.")

if not openai_api_key:
    graph_construction_logger.error("Please set the OPENAI_API_KEY environment variable.")

if not pinecone_api_key or not pinecone_environment or not pinecone_cloud or not pinecone_metric or not pinecone_dimensions:
    graph_construction_logger.error("Please set the PINECONE_API_KEY, PINECONE_ENVIRONMENT, PINECONE_CLOUD, PINECONE_METRIC, and PINECONE_DIMENSIONS environment variables.")

if not neo4j_uri or not neo4j_username or not neo4j_password:
    graph_construction_logger.error("Please set the NE04J_URI, NE04J_USERNAME, and NE04J_PASSWORD environment variables.")

## Construction Pipeline

### Initialize Graph Construction System

In [6]:
ontology_config: MongoStorageConfig = {
    "connection_uri": mongo_db_uri,
    "database_name": "ogmyrag",
    "collection_name": "ontology_v2",
}

entity_config: MongoStorageConfig = {
    "connection_uri": mongo_db_uri,
    "database_name": "ogmyrag",
    "collection_name": "entities_v2",
}

relationship_config: MongoStorageConfig = {
    "connection_uri": mongo_db_uri,
    "database_name": "ogmyrag",
    "collection_name": "relationships_v2",
}

disclosure_config: MongoStorageConfig = {
    "connection_uri": mongo_db_uri,
    "database_name": "ogmyrag",
    "collection_name": "company_disclosures",
}

graphdb_config: Neo4jStorageConfig = {
    "uri": neo4j_uri,
    "user": neo4j_username,
    "password": neo4j_password,
}

In [7]:
try:
    graph_system = GraphConstructionSystem(
        ontology_config=ontology_config,
        disclosure_config=disclosure_config,
        entity_config=entity_config,
        relationship_config=relationship_config,
        graphdb_config=graphdb_config,
    )
except Exception as e:
    graph_construction_logger.error(
        f"GraphConstructionSystem\nError while creating graph construction system: {e}"
    )

### Process unparsed documents

In [8]:
try:
    await graph_system.insert_entities_relationships_from_unparsed_documents_into_mongodb(
        from_company="Autocount Dotcom Berhad",
        type="PROSPECTUS",
        published_at="14-April-2023",
        exclude_documents=[
            # "ADB_PROSPECTUS_SECTION_1",
            # "ADB_PROSPECTUS_SECTION_2",
            # "ADB_PROSPECTUS_SECTION_3",
            "ADB_PROSPECTUS_SECTION_4",
            "ADB_PROSPECTUS_SECTION_5",
            "ADB_PROSPECTUS_SECTION_6",
            "ADB_PROSPECTUS_SECTION_7A",
            "ADB_PROSPECTUS_SECTION_7B",
            "ADB_PROSPECTUS_SECTION_7C",
            "ADB_PROSPECTUS_SECTION_8",
            "ADB_PROSPECTUS_SECTION_9A",
            "ADB_PROSPECTUS_SECTION_9B",
            "ADB_PROSPECTUS_SECTION_10",
        ],
    )
except Exception as e:
    graph_construction_logger.error(
        f"GraphConstructionSystem\nError while inserting entities and relationships into MongoDB: {e}"
    )

2025-07-24 00:26:30,623 - graph_construction - INFO - GraphConstructionSystem
Preparing ontology...
2025-07-24 00:26:30,688 - graph_construction - INFO - GraphConstructionSystem
Preparing constraints...
2025-07-24 00:26:31,593 - graph_construction - INFO - GraphConstructionSystem
Constraints used for extraction

The following key-value pairs aid in interpreting the source text. Apply these mappings when extracting and storing entities and relationships to maintain consistency and accuracy.
	1. ADB: Autocount Dotcom Berhad (Registration No.: 202201006885 (1452582-U))
	2. COMPANY: Autocount Dotcom Berhad (Registration No.: 202201006885 (1452582-U))
	3. ELECTRONIC PROSPECTUS: Copy of this Prospectus that is issued, circulated or disseminated via the internet and/or an electronic storage medium
	4. Bursa Securities: Bursa Malaysia Securities Berhad (Registration No.: 200301033577 (635998-W))
	5. Website: www.bursamalaysia.com
	6. Malacca Securities: Malacca Securities Sdn Bhd (Registration

[Error] JSON parsing failed: substring not found
[Error] JSON parsing failed: substring not found


2025-07-24 00:29:09,937 - graph_construction - INFO - GraphConstructionSystem
Successfully inserted 115 entity(ies) and 98 relationship(s) into MongoDB.
2025-07-24 00:29:09,937 - graph_construction - INFO - GraphConstructionSystem
Updating the 'is_parsed' status of documents...
2025-07-24 00:29:10,188 - graph_construction - INFO - GraphConstructionSystem
Updated the 'is_parsed' status of 3 documents.


In [9]:
await graph_system.insert_entities_into_neo4j()
await graph_system.insert_relationships_into_neo4j()

2025-07-24 00:30:46,807 - graph_construction - INFO - GraphConstructionSystem
Reading entities from MongoDB...
2025-07-24 00:30:46,906 - graph_construction - INFO - GraphConstructionSystem
Read 115 entity(ies) that have not been uploaded to Neo4j.
2025-07-24 00:30:46,911 - graph_construction - INFO - GraphConstructionSystem
Inserting 115 entity(ies) into Neo4j...
2025-07-24 00:30:52,454 - graph_construction - INFO - GraphConstructionSystem
Successfully inserted 115 entity(ies) into Neo4j.
2025-07-24 00:30:52,472 - graph_construction - INFO - GraphConstructionSystem
Updating 115 entity(ies) with 'inserted_into_graphdb_at' field.
2025-07-24 00:30:54,748 - graph_construction - INFO - GraphConstructionSystem
Updated 115 entity(ies) with 'inserted_into_graphdb_at' field.
2025-07-24 00:30:54,764 - graph_construction - INFO - GraphConstructionSystem
Reading relationships from MongoDB...
2025-07-24 00:30:54,821 - graph_construction - INFO - GraphConstructionSystem
Read 98 relationship(s) that 