# Pipeline of Graph Construction

## Initialization

In [1]:
import logging
import os
import nest_asyncio
from dotenv import load_dotenv

In [2]:
from ogmyrag.my_logging import configure_logger
from ogmyrag.base import MongoStorageConfig
from ogmyrag.graph_construction import GraphConstructionSystem

In [3]:
# Patch event loop to support re-entry in Jupyter

nest_asyncio.apply()

In [4]:
# Setup logging
graph_construction_logger = configure_logger(name='graph_construction',log_level=logging.DEBUG, log_file='logs/graph_construction.log')

openai_logger = configure_logger(name='openai',log_level=logging.DEBUG, log_file='logs/openai.log', to_console=False)
mongo_logger = configure_logger(name='mongodb',log_level=logging.DEBUG, log_file='logs/mongodb.log', to_console=False)
pinecone_logger = configure_logger(name='pinecone',log_level=logging.DEBUG, log_file='logs/pinecone.log', to_console=False)
neo4j_logger = configure_logger(name='neo4j',log_level=logging.DEBUG, log_file='logs/neo4j.log', to_console=False)

In [5]:
# Load environment variables

load_dotenv(override=True)

mongo_db_uri = os.getenv("MONGO_DB_URI","")

openai_api_key = os.getenv("OPENAI_API_KEY","")

pinecone_api_key = os.getenv("PINECONE_API_KEY","")
pinecone_environment = os.getenv("PINECONE_ENVIRONMENT","")
pinecone_cloud = os.getenv("PINECONE_CLOUD","")
pinecone_metric = os.getenv("PINECONE_METRIC", "")
pinecone_dimensions = os.getenv("PINECONE_DIMENSIONS")

neo4j_uri = os.getenv("NEO4J_URI","")
neo4j_username = os.getenv("NEO4J_USERNAME","")
neo4j_password = os.getenv("NEO4J_PASSWORD","")

if not mongo_db_uri:
    graph_construction_logger.error("Please set the MONGO_DB_URI environment variable.")

if not openai_api_key:
    graph_construction_logger.error("Please set the OPENAI_API_KEY environment variable.")

if not pinecone_api_key or not pinecone_environment or not pinecone_cloud or not pinecone_metric or not pinecone_dimensions:
    graph_construction_logger.error("Please set the PINECONE_API_KEY, PINECONE_ENVIRONMENT, PINECONE_CLOUD, PINECONE_METRIC, and PINECONE_DIMENSIONS environment variables.")

if not neo4j_uri or not neo4j_username or not neo4j_password:
    graph_construction_logger.error("Please set the NE04J_URI, NE04J_USERNAME, and NE04J_PASSWORD environment variables.")

## Construction Pipeline

### Initialize Graph Construction System

In [6]:
ontology_config: MongoStorageConfig = {
    'connection_uri': mongo_db_uri,
    'database_name': 'ogmyrag',
    'collection_name': 'ontology'
}

disclosure_config: MongoStorageConfig = {
    'connection_uri': mongo_db_uri,
    'database_name': 'ogmyrag',
    'collection_name': 'company_disclosures'
}

entity_config: MongoStorageConfig = {
    'connection_uri': mongo_db_uri,
    'database_name': 'ogmyrag',
    'collection_name': 'entities'
}

relationship_config: MongoStorageConfig = {
    'connection_uri': mongo_db_uri,
    'database_name': 'ogmyrag',
    'collection_name': 'relationships'
}

In [7]:
try:
   graph_system = GraphConstructionSystem(
      ontology_config=ontology_config,
      disclosure_config=disclosure_config,
      entity_config=entity_config,
      relationship_config=relationship_config,
   )
except Exception as e:
  graph_construction_logger.error(f"GraphConstructionSystem\nError while creating graph construction system: {e}")

### Process unparsed documents

In [None]:
try:
   await graph_system.insert_entities_relationships_from_unparsed_documents_into_mongodb(
   from_company="Autocount Dotcom Berhad",
   type="PROSPECTUS",
   published_at="14-April-2023",
   exclude_documents= [
      "ADB_PROSPECTUS_SECTION_2",
      "ADB_PROSPECTUS_SECTION_3",
      "ADB_PROSPECTUS_SECTION_4",
      "ADB_PROSPECTUS_SECTION_5",
      "ADB_PROSPECTUS_SECTION_6",
      "ADB_PROSPECTUS_SECTION_7A",
      "ADB_PROSPECTUS_SECTION_7B",
      "ADB_PROSPECTUS_SECTION_7C",
      "ADB_PROSPECTUS_SECTION_8",
      "ADB_PROSPECTUS_SECTION_9A",
      "ADB_PROSPECTUS_SECTION_9B",
      "ADB_PROSPECTUS_SECTION_10"
   ]
)
except Exception as e:
   graph_construction_logger.error(f"GraphConstructionSystem\nError while inserting entities and relationships into MongoDB: {e}")