# Pipeline of Graph Construction

## Initialization

In [1]:
import logging
import os
import nest_asyncio
from dotenv import load_dotenv

In [2]:
from ogmyrag.my_logging import configure_logger
from ogmyrag.base import MongoStorageConfig, Neo4jStorageConfig, PineconeStorageConfig
from ogmyrag.graph_construction import GraphConstructionSystem

In [3]:
# Patch event loop to support re-entry in Jupyter

nest_asyncio.apply()

In [4]:
# Setup logging
graph_construction_logger = configure_logger(name='graph_construction',log_level=logging.DEBUG, log_file='logs/graph_construction.log')
ontology_construction_logger = configure_logger(name='ontology_construction',log_level=logging.DEBUG, log_file='logs/ontology_construction.log')

openai_logger = configure_logger(name='openai',log_level=logging.DEBUG, log_file='logs/openai.log', to_console=False)
mongo_logger = configure_logger(name='mongodb',log_level=logging.DEBUG, log_file='logs/mongodb.log', to_console=False)
pinecone_logger = configure_logger(name='pinecone',log_level=logging.DEBUG, log_file='logs/pinecone.log', to_console=False)
neo4j_logger = configure_logger(name='neo4j',log_level=logging.DEBUG, log_file='logs/neo4j.log', to_console=False)

In [5]:
# Load environment variables

load_dotenv(override=True)

mongo_db_uri = os.getenv("MONGO_DB_URI","")

openai_api_key = os.getenv("OPENAI_API_KEY","")

pinecone_api_key = os.getenv("PINECONE_API_KEY","")
pinecone_environment = os.getenv("PINECONE_ENVIRONMENT","")
pinecone_cloud = os.getenv("PINECONE_CLOUD","")
pinecone_metric = os.getenv("PINECONE_METRIC", "")
pinecone_dimensions = os.getenv("PINECONE_DIMENSIONS")

neo4j_uri = os.getenv("NEO4J_URI","")
neo4j_username = os.getenv("NEO4J_USERNAME","")
neo4j_password = os.getenv("NEO4J_PASSWORD","")

if not mongo_db_uri:
    graph_construction_logger.error("Please set the MONGO_DB_URI environment variable.")

if not openai_api_key:
    graph_construction_logger.error("Please set the OPENAI_API_KEY environment variable.")

if not pinecone_api_key or not pinecone_environment or not pinecone_cloud or not pinecone_metric or not pinecone_dimensions:
    graph_construction_logger.error("Please set the PINECONE_API_KEY, PINECONE_ENVIRONMENT, PINECONE_CLOUD, PINECONE_METRIC, and PINECONE_DIMENSIONS environment variables.")

if not neo4j_uri or not neo4j_username or not neo4j_password:
    graph_construction_logger.error("Please set the NE04J_URI, NE04J_USERNAME, and NE04J_PASSWORD environment variables.")

## Construction Pipeline

### Initialize Graph Construction System

In [6]:
ontology_config: MongoStorageConfig = {
    "connection_uri": mongo_db_uri,
    "database_name": "ogmyrag",
    "collection_name": "ontology_v2",
}

entity_config: MongoStorageConfig = {
    "connection_uri": mongo_db_uri,
    "database_name": "ogmyrag",
    "collection_name": "entities_v2",
}

relationship_config: MongoStorageConfig = {
    "connection_uri": mongo_db_uri,
    "database_name": "ogmyrag",
    "collection_name": "relationships_v2",
}

disclosure_config: MongoStorageConfig = {
    "connection_uri": mongo_db_uri,
    "database_name": "ogmyrag",
    "collection_name": "company_disclosures",
}

graphdb_config: Neo4jStorageConfig = {
    "uri": neo4j_uri,
    "user": neo4j_username,
    "password": neo4j_password,
}

entity_vector_config: PineconeStorageConfig = {
    "index_name": "ogmyrag",
    "pinecone_api_key": pinecone_api_key,
    "pinecone_environment": pinecone_environment,
    "pinecone_cloud": pinecone_cloud,
    "pinecone_metric": pinecone_metric,
    "pinecone_dimensions": pinecone_dimensions,
    "openai_api_key": openai_api_key
}
    

In [7]:
try:
    graph_system = GraphConstructionSystem(
        ontology_config=ontology_config,
        disclosure_config=disclosure_config,
        entity_config=entity_config,
        relationship_config=relationship_config,
        entity_vector_config=entity_vector_config,
        graphdb_config=graphdb_config,
    )
except Exception as e:
    graph_construction_logger.error(
        f"GraphConstructionSystem\nError while creating graph construction system: {e}"
    )

### Process unparsed documents

In [None]:
try:
    await graph_system.insert_entities_relationships_from_unparsed_documents_into_mongodb(
        from_company="Autocount Dotcom Berhad",
        type="PROSPECTUS",
        published_at="14-April-2023",
        exclude_documents=[
            # "ADB_PROSPECTUS_SECTION_1",
            # "ADB_PROSPECTUS_SECTION_2",
            "ADB_PROSPECTUS_SECTION_3",
            "ADB_PROSPECTUS_SECTION_4",
            "ADB_PROSPECTUS_SECTION_5",
            "ADB_PROSPECTUS_SECTION_6",
            "ADB_PROSPECTUS_SECTION_7A",
            "ADB_PROSPECTUS_SECTION_7B",
            "ADB_PROSPECTUS_SECTION_7C",
            "ADB_PROSPECTUS_SECTION_8",
            "ADB_PROSPECTUS_SECTION_9A",
            "ADB_PROSPECTUS_SECTION_9B",
            "ADB_PROSPECTUS_SECTION_10",
        ],
    )
except Exception as e:
    graph_construction_logger.error(
        f"GraphConstructionSystem\nError while inserting entities and relationships into MongoDB: {e}"
    )

In [None]:
await graph_system.insert_entities_into_neo4j()
await graph_system.insert_relationships_into_neo4j()

In [None]:
await graph_system.insert_entities_into_pinecone()

In [11]:
result = await graph_system.get_formatted_similar_results_from_pinecone(query_texts=["soung yue", "chee seng"], top_k=10)
graph_construction_logger.info(result)

2025-07-24 02:30:41,243 - graph_construction - INFO - Target: soung yue
Found:
1. Liew Soung Yue (0.715411901 similarity score)
2. Liew Soung Yue (0.715411901 similarity score)
3. Choo Yan Tiee (0.406549633 similarity score)
4. Choo Yan Tiee (0.406011641 similarity score)
5. Choo Yan Tiee (0.406011641 similarity score)
6. Chin Chee Seng (0.392994672 similarity score)
7. Chin Chee Seng (0.392994672 similarity score)
8. Wong Youn Kim (0.365910381 similarity score)
9. Wong Youn Kim (0.365890861 similarity score)
10. Choo Chin Peng (0.339348882 similarity score)

Target: chee seng
Found:
1. Chin Chee Seng (0.763325155 similarity score)
2. Chin Chee Seng (0.763325155 similarity score)
3. Choo Chin Peng (0.539525211 similarity score)
4. Choo Chin Peng (0.539441645 similarity score)
5. Choo Chin Peng (0.539436281 similarity score)
6. Liew Soung Yue (0.459509522 similarity score)
7. Liew Soung Yue (0.459509522 similarity score)
8. Ng Wan Peng (0.411478966 similarity score)
9. Ng Wan Peng (0.41