# Pipeline of Graph Retrieval

## Initialization

In [1]:
import logging
import os
import gradio as gr
import nest_asyncio
from dotenv import load_dotenv
from motor.motor_asyncio import AsyncIOMotorClient

In [2]:
from ogmyrag.my_logging import configure_logger
from ogmyrag.base import MongoStorageConfig, Neo4jStorageConfig, PineconeStorageConfig
from ogmyrag.graph_retrieval import GraphRetrievalSystem

In [3]:
# Patch event loop to support re-entry in Jupyter
nest_asyncio.apply()

In [4]:
# Setup logging
graph_retrieval_logger = configure_logger(name='graph_retrieval',log_level=logging.DEBUG, log_file='logs/graph_retrieval.log')

In [5]:
# Load environment variables
load_dotenv(override=True)

mongo_db_uri = os.getenv("MONGO_DB_URI", "")

openai_api_key = os.getenv("OPENAI_API_KEY", "")

pinecone_entities_api_key = os.getenv("PINECONE_ENTITIES_API_KEY", "")
pinecone_entities_environment = os.getenv("PINECONE_ENTITIES_ENVIRONMENT", "")
pinecone_entities_cloud = os.getenv("PINECONE_ENTITIES_CLOUD", "")
pinecone_entities_metric = os.getenv("PINECONE_ENTITIES_METRIC", "")
pinecone_entities_dimensions = os.getenv("PINECONE_ENTITIES_DIMENSIONS")

neo4j_uri = os.getenv("NEO4J_URI", "")
neo4j_username = os.getenv("NEO4J_USERNAME", "")
neo4j_password = os.getenv("NEO4J_PASSWORD", "")

rag_pinecone_api_key = os.getenv("PINECONE_API_KEY_JJ", "")


if not mongo_db_uri:
    graph_retrieval_logger.error("Please set the MONGO_DB_URI environment variable.")

if not openai_api_key:
    graph_retrieval_logger.error("Please set the OPENAI_API_KEY environment variable.")

if (
    not pinecone_entities_api_key
    or not pinecone_entities_environment
    or not pinecone_entities_cloud
    or not pinecone_entities_metric
    or not pinecone_entities_dimensions
):
    graph_retrieval_logger.error(
        "Please set the PINECONE_ENTITIES_API_KEY, PINECONE_ENTITIES_ENVIRONMENT, PINECONE_ENTITIES_CLOUD, PINECONE_ENTITIES_METRIC, and PINECONE_ENTITIES_DIMENSIONS environment variables."
    )

if not neo4j_uri or not neo4j_username or not neo4j_password:
    graph_retrieval_logger.error(
        "Please set the NE04J_URI, NE04J_USERNAME, and NE04J_PASSWORD environment variables."
    )

## Initialize Graph Retrieval System

In [6]:
ontology_config: MongoStorageConfig = {
    "database_name": "ogmyrag",
    "collection_name": "ontology_v2",
}

graphdb_config: Neo4jStorageConfig = {
    "uri": neo4j_uri,
    "user": neo4j_username,
    "password": neo4j_password,
}

entity_vector_config: PineconeStorageConfig = {
    "index_name": "ogmyrag",
    "pinecone_api_key": pinecone_entities_api_key,
    "pinecone_environment": pinecone_entities_environment,
    "pinecone_cloud": pinecone_entities_cloud,
    "pinecone_metric": pinecone_entities_metric,
    "pinecone_dimensions": pinecone_entities_dimensions,
    "openai_api_key": openai_api_key
}

rag_vector_config: PineconeStorageConfig = {
    "index_name": "company-disclosures-index",
    "pinecone_api_key": rag_pinecone_api_key,
    "pinecone_environment": "us-east-1",
    "pinecone_cloud": "aws",
    "pinecone_metric": "cosine",
    "pinecone_dimensions": 1536,
    "openai_api_key": openai_api_key,
}
    

In [7]:
mongo_client = AsyncIOMotorClient(
    mongo_db_uri,
    serverSelectionTimeoutMS=5000,
)

try:
    graph_system = GraphRetrievalSystem(
        mongo_client=mongo_client,
        ontology_config=ontology_config,
        entity_vector_config=entity_vector_config,
        graphdb_config=graphdb_config,
        rag_vector_config=rag_vector_config
    )
except Exception as e:
    graph_retrieval_logger.error(
        f"GraphRetrievalSystem\nError while creating graph retrieval system: {e}"
    )

In [9]:
chunks = []
async for chunk in graph_system.rag_query(
    user_request="what is the mission and vision for autocount?",
    top_k_for_similarity=15,
    similarity_threshold=0.5,
):
    chunks.append(chunk)

res = "\n\n".join(chunks)   # or pick the last chunk, or parse it

print(res)

2025-09-01 15:27:15,009 - graph_retrieval - INFO - RAGAgent is called
2025-09-01 15:27:15,011 - graph_retrieval - DEBUG - RAGAgent
Queries used:
['what is the mission and vision for autocount?']
2025-09-01 15:27:35,332 - graph_retrieval - INFO - RAGAgent: completed RAG for query='what is the mission and vision for autocount?'


## Calling RAGAgent...

- Mission: To provide cost-effective business software to SMEs in Asia.
- Vision: To be the No.1 business software provider in Asia. This vision represents our commitment to achieving market dominance, driving innovation, ensuring customer satisfaction, and influencing industry standards. We aim to position ourselves as the foremost provider of high-quality, comprehensive business software across Asia.


## Setting Up User Interface

In [8]:
async def respond(message: str, history: list):
    accumulated_content = ""
    try:
        async for chunk in graph_system.rag_query(
            user_request=message, top_k_for_similarity=15, similarity_threshold=0.5
        ):
            accumulated_content += chunk + "\n\n"
            yield accumulated_content
    except Exception as e:
        graph_retrieval_logger.exception(
            f"GraphRetrievalSystem\nError while creating graph retrieval system: {e}"
        )
        yield "Unexpected error occurred. Please contact the developer"


demo = gr.ChatInterface(
    fn=respond,
    type="messages",
    title="Ontology-Grounded Graph-Based RAG",
)

demo.launch()

* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.




2025-09-08 17:11:25,710 - graph_retrieval - INFO - RAGAgent is called
2025-09-08 17:11:25,711 - graph_retrieval - DEBUG - RAGAgent
Queries used:
['what does cabnet holdings do?']
2025-09-08 17:11:48,646 - graph_retrieval - INFO - RAGAgent: completed RAG for query='what does cabnet holdings do?'
2025-09-08 17:12:48,023 - graph_retrieval - INFO - RAGAgent is called
2025-09-08 17:12:48,025 - graph_retrieval - DEBUG - RAGAgent
Queries used:
['who are the board directors of autocount?']
2025-09-08 17:13:08,912 - graph_retrieval - INFO - RAGAgent: completed RAG for query='who are the board directors of autocount?'
2025-09-08 17:13:24,561 - graph_retrieval - INFO - RAGAgent is called
2025-09-08 17:13:24,562 - graph_retrieval - DEBUG - RAGAgent
Queries used:
['who are the board directors of cabnet?']
2025-09-08 17:13:42,221 - graph_retrieval - INFO - RAGAgent: completed RAG for query='who are the board directors of cabnet?'
