# Pipeline of Graph Retrieval


## Initialize Environment


In [4]:
import logging
import os
import gradio as gr
import nest_asyncio
from dotenv import load_dotenv
from motor.motor_asyncio import AsyncIOMotorClient
from ogmyrag.my_logging import configure_logger
from ogmyrag.base import MongoStorageConfig, Neo4jStorageConfig, PineconeStorageConfig, BaseLLMClient
from ogmyrag.llm import OpenAIAsyncClient
from ogmyrag.graph_retrieval import GraphRetrievalSystem

# Patch event loop to support re-entry in Jupyter
nest_asyncio.apply()

# Setup logging
graph_retrieval_logger = configure_logger(
    name="graph_retrieval", log_level=logging.DEBUG, log_file="logs/graph_retrieval.log"
)

# Load environment variables
load_dotenv(override=True)

mongo_db_uri = os.getenv("MONGO_DB_URI", "")

openai_api_key = os.getenv("OPENAI_API_KEY", "")

pinecone_entities_api_key = os.getenv("PINECONE_ENTITIES_API_KEY", "")
pinecone_entities_environment = os.getenv("PINECONE_ENTITIES_ENVIRONMENT", "")
pinecone_entities_cloud = os.getenv("PINECONE_ENTITIES_CLOUD", "")
pinecone_entities_metric = os.getenv("PINECONE_ENTITIES_METRIC", "")
pinecone_entities_dimensions = os.getenv("PINECONE_ENTITIES_DIMENSIONS")

neo4j_uri = os.getenv("NEO4J_URI", "")
neo4j_username = os.getenv("NEO4J_USERNAME", "")
neo4j_password = os.getenv("NEO4J_PASSWORD", "")

rag_pinecone_api_key = os.getenv("PINECONE_API_KEY_JJ", "")

if not mongo_db_uri:
    graph_retrieval_logger.error("Please set the MONGO_DB_URI environment variable.")

if not openai_api_key:
    graph_retrieval_logger.error("Please set the OPENAI_API_KEY environment variable.")

if (
    not pinecone_entities_api_key
    or not pinecone_entities_environment
    or not pinecone_entities_cloud
    or not pinecone_entities_metric
    or not pinecone_entities_dimensions
):
    graph_retrieval_logger.error(
        "Please set the PINECONE_ENTITIES_API_KEY, PINECONE_ENTITIES_ENVIRONMENT, PINECONE_ENTITIES_CLOUD, PINECONE_ENTITIES_METRIC, and PINECONE_ENTITIES_DIMENSIONS environment variables."
    )

if not neo4j_uri or not neo4j_username or not neo4j_password:
    graph_retrieval_logger.error(
        "Please set the NE04J_URI, NE04J_USERNAME, and NE04J_PASSWORD environment variables."
    )

## Setup Graph Retrieval Pipeline


### Initialize Variables for Database Connection


In [2]:
ontology_config: MongoStorageConfig = {
    "database_name": "ogmyrag",
    "collection_name": "ontology",
}

graphdb_config: Neo4jStorageConfig = {
    "uri": neo4j_uri,
    "user": neo4j_username,
    "password": neo4j_password,
}

entity_vector_config: PineconeStorageConfig = {
    "index_name": "ogmyrag",
    "pinecone_api_key": pinecone_entities_api_key,
    "pinecone_environment": pinecone_entities_environment,
    "pinecone_cloud": pinecone_entities_cloud,
    "pinecone_metric": pinecone_entities_metric,
    "pinecone_dimensions": pinecone_entities_dimensions,
    "openai_api_key": openai_api_key,
}

rag_vector_config: PineconeStorageConfig = {
    "index_name": "company-disclosures-index",
    "pinecone_api_key": rag_pinecone_api_key,
    "pinecone_environment": "us-east-1",
    "pinecone_cloud": "aws",
    "pinecone_metric": "cosine",
    "pinecone_dimensions": 1536,
    "openai_api_key": openai_api_key,
}

mongo_client = AsyncIOMotorClient(
    mongo_db_uri,
    serverSelectionTimeoutMS=5000,
)

### Initialize Graph Retrieval System


In [3]:
try:
    graph_system = GraphRetrievalSystem(
        mongo_client=mongo_client,
        ontology_config=ontology_config,
        entity_vector_config=entity_vector_config,
        graphdb_config=graphdb_config,
        rag_vector_config=rag_vector_config,
        llm_client=OpenAIAsyncClient(api_key=openai_api_key),
        agent_configs={
            "ChatAgent": {
                "model": "o4-mini",
                "text": {"format": {"type": "text"}},
                "reasoning": {"effort": "medium"},
                "max_output_tokens": 100000,
            },
            "RequestDecompositionAgent": {
                "model": "gpt-5-mini",
                "text": {"format": {"type": "text"}},
                "reasoning": {"effort": "medium"},
                "max_output_tokens": 100000,
            },
            "QueryAgent": {
                "model": "o4-mini",
                "text": {"format": {"type": "text"}},
                "reasoning": {"effort": "high"},
                "max_output_tokens": 100000,
            },
            "Text2CypherAgent": {
                "model": "o4-mini",
                "text": {"format": {"type": "text"}},
                "reasoning": {"effort": "high"},
                "max_output_tokens": 100000,
            },
            "RetrievalResultCompilationAgent": {
                "model": "gpt-4.1-mini",
                "text": {"format": {"type": "text"}},
                "max_output_tokens": 32768,
            },
        },
    )
except Exception as e:
    graph_retrieval_logger.error(
        f"GraphRetrievalSystem\nError while creating graph retrieval system: {e}"
    )

## Set Up User Interface


In [None]:
async def respond(message: str, history: list):
    accumulated_content = ""
    try:
        async for chunk in graph_system.query(
            user_request=message,
            top_k_for_similarity=20,
            similarity_threshold=0.6,
            max_tool_call=4,
        ):
            accumulated_content += chunk + "\n\n"
            yield accumulated_content
    except Exception as e:
        graph_retrieval_logger.error(
            f"GraphRetrievalSystem\nError while processing user request: {e}"
        )
        yield "**Unexpected error occured. Please contact the developer.**"


demo = gr.ChatInterface(
    fn=respond,
    type="messages",
    title="Ontology-Grounded Graph-Based RAG",
)

demo.launch()

* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.




2025-09-13 12:09:31,045 - graph_retrieval - INFO - ChatAgent is called
2025-09-13 12:09:31,045 - graph_retrieval - DEBUG - ChatAgent
System prompt used:

You are the ChatAgent, operating in a Hybrid RAG system for Malaysian listed companies. Your responsibilities are:
   [1] Interact with users.
   [2] Call the appropriate tool(s) to retrieve relevant information.
   [3] Generate responses strictly based on retrieved results, or when instructed to stop.

Guidelines
   [1] Interaction Logic
      - First, determine the nature of the user request.
      
      - If the request is a read query and potentially related to Malaysian listed companies:
         - Call the appropriate tool to retrieve relevant information.
         
         - After retrieval, decide whether another tool call is required.
      
         - When generating the response, ensure the retrieved information is, in order of priority:
            1. Relevant: aligns with the user’s request.

            2. Decision-rea