In [1]:
from dotenv import load_dotenv
load_dotenv()

from typing import Annotated
from typing_extensions import TypedDict

from langgraph.graph import StateGraph,START,END
from langgraph.graph.message import add_messages

In [2]:
from langchain_google_genai import ChatGoogleGenerativeAI
from google.generativeai.types import HarmCategory, HarmBlockThreshold

import os

os.environ["GOOGLE_API_KEY"] = os.environ["GOOGLE_API_KEY4"]
llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    temperature=0.8,
    max_tokens=None,
    timeout=None,
    max_retries=1,
    safety_settings={
        HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
        HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
        HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
        HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
    }
)

In [3]:
class State(TypedDict):
    # Messages have the type "list". The `add_messages` function
    # in the annotation defines how this state key should be updated
    # (in this case, it appends messages to the list, rather than overwriting them)
    question:str
    messages:Annotated[list,add_messages]
    loop_count:int
    answer:str

In [4]:
# Wait 60 seconds before connecting using these details, or login to https://console.neo4j.io to validate the Aura Instance is available
NEO4J_URI="neo4j+s://2d5e8539.databases.neo4j.io"
NEO4J_USERNAME="neo4j"
NEO4J_PASSWORD="xn8iCGEj2vymA3-43-57qlL63CD70SthzTE_Mt8QfG0"
NEO4J_DATABASE="neo4j"
AURA_INSTANCEID="2d5e8539"
AURA_INSTANCENAME="Instance01"


from langchain_neo4j import Neo4jGraph
enhanced_graph_db = Neo4jGraph(
    url=NEO4J_URI,
    username="neo4j",
    password=NEO4J_PASSWORD,
    driver_config={
        "max_connection_lifetime": 300,  # 5 minutes
        "keep_alive": True,
        "max_connection_pool_size": 50
    },
    enhanced_schema=True)

graph_db = Neo4jGraph(
    url=NEO4J_URI,
    username="neo4j",
    password=NEO4J_PASSWORD,
    driver_config={
        "max_connection_lifetime": 300,  # 5 minutes
        "keep_alive": True,
        "max_connection_pool_size": 50
    },
    enhanced_schema=False)

In [15]:
print(graph_db.schema)

Node properties:
Order {ORDERNUMBER: INTEGER, ORDERDATE: STRING, STATUS: STRING, QTR_ID: INTEGER, MONTH_ID: INTEGER, YEAR_ID: INTEGER}
Product {PRODUCTCODE: STRING, MSRP: FLOAT, PRODUCTLINE: STRING}
Customer {CUSTOMERNAME: STRING, PHONE: STRING, ADDRESSLINE1: STRING, CITY: STRING, POSTALCODE: STRING, COUNTRY: STRING, TERRITORY: STRING, CONTACTLASTNAME: STRING, CONTACTFIRSTNAME: STRING, ADDRESSLINE2: STRING, STATE: STRING}
Relationship properties:
CONTAINS {PRICEEACH: FLOAT, DEALSIZE: STRING, QUANTITYORDERED: INTEGER, ORDERLINENUMBER: INTEGER, SALES: FLOAT}
The relationships:
(:Order)-[:CONTAINS]->(:Product)
(:Order)-[:PLACED_BY]->(:Customer)


In [5]:
from langraph_neo4j3 import AgentState, run_agent_workflow
from langchain_core.tools import tool

@tool
def query_tool(query):
    """This tool can query data from graph database. Query must be in english only."""
    state: AgentState = {
            "question": query,
            "next_action": "",
            "cypher_errors": [],
            "database_records": [],
            "steps": [],
            "answer": "",
            "cypher_statement": ""
        }
    result = run_agent_workflow(state,enhanced_graph_db)
    return result["answer"]

tools=[query_tool]
llm_with_tool=llm.bind_tools(tools)
llm_with_tool


RunnableBinding(bound=ChatGoogleGenerativeAI(model='models/gemini-2.5-flash', google_api_key=SecretStr('**********'), temperature=0.8, max_retries=1, safety_settings={<HarmCategory.HARM_CATEGORY_HARASSMENT: 7>: <HarmBlockThreshold.BLOCK_NONE: 4>, <HarmCategory.HARM_CATEGORY_HATE_SPEECH: 8>: <HarmBlockThreshold.BLOCK_NONE: 4>, <HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: 9>: <HarmBlockThreshold.BLOCK_NONE: 4>, <HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: 10>: <HarmBlockThreshold.BLOCK_NONE: 4>}, client=<google.ai.generativelanguage_v1beta.services.generative_service.client.GenerativeServiceClient object at 0x000001705CC4EE90>, default_metadata=()), kwargs={'tools': [{'type': 'function', 'function': {'name': 'query_tool', 'description': 'This tool can query data from graph database. Query must be in english only.', 'parameters': {'properties': {'query': {}}, 'required': ['query'], 'type': 'object'}}}]}, config={}, config_factories=[])

In [6]:
# 1. The question will not be a direct question. 
# 2. You need to identify the target & the problem from the question first.
# 3. Then find all the possible data sources i.e. nodes and relations affecting the target with the problem using the graph schema provided.
# 4. Then develop a probable reason (hypothesis) for the causing the problem and verify the contribution of that reason in causing the problem using the data queried with the query_tool.
# 5. Step by step you will have to generate hypothesis and verify it using query_tool. Improvise results with each step and reach conclusion.

# Note
# 1. query_tool accepts clear and specific instructions to query the graph in plane english language only, donot generate cypher by yourself.
# 2. Try to get as much data as possible in one single query_tool using words like 'and', 'respectively' etc.
# 3. You can only use the query_tool 6 times.

In [7]:
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_messages(
        [
            (
"system",
"""
You are a expert data analyst. Your job is to do the root cause analysis for the provided question. 
You will be provided with a graph database schema and a graph query_tool to query the data from graph database. 
Note:
1. query_tool accepts instructions in english language only.
"""
            ),
            (
"human",
"""
### Graph database schema (Use it for understanding relations)
---
{schema}
---
### Conversation History:
---
{conversation}
---
User Question:
{question}

"""
            ),
        ]
    )
chain = prompt | llm_with_tool

In [8]:
graph_builder = StateGraph(State)

# Modification: tell the LLM which tools it can call
llm_with_tools = llm.bind_tools(tools)

def chatbot(state: State):
    return {"messages": [chain.invoke({"schema":graph_db.schema,"conversation":state['messages'], "question": state['question'],"loop_count":state["loop_count"]+1})]}

graph_builder.add_node("chatbot", chatbot)

<langgraph.graph.state.StateGraph at 0x1706d6d5550>

In [9]:
from langchain_core.prompts import ChatPromptTemplate
summary_prompt = ChatPromptTemplate.from_messages(
        [
            ("system","You job is to summarize the conversation and frame an answer for the main question asked in the conversation."),
            ("human","{conversation}")
        ]
    )
summary_chain = summary_prompt | llm
def summary(state: State): 
    return {"answer":summary_chain.invoke({"conversation":state['messages']})}

graph_builder.add_node("summary", summary)

<langgraph.graph.state.StateGraph at 0x1706d6d5550>

In [10]:
import json

from langchain_core.messages import ToolMessage

class BasicToolNode:
    """A node that runs the tools requested in the last AIMessage."""

    def __init__(self, tools: list) -> None:
        self.tools_by_name = {tool.name: tool for tool in tools}

    def __call__(self, inputs: dict):
        if messages := inputs.get("messages", []):
            message = messages[-1]
        else:
            raise ValueError("No message found in input")
        outputs = []
        for tool_call in message.tool_calls:
            tool_result = self.tools_by_name[tool_call["name"]].invoke(
                tool_call["args"]
            )
            outputs.append(
                ToolMessage(
                    content=json.dumps(tool_result),
                    name=tool_call["name"],
                    tool_call_id=tool_call["id"],
                )
            )
        return {"messages": outputs}

tool_node = BasicToolNode(tools=[query_tool])
graph_builder.add_node("tools", tool_node)

<langgraph.graph.state.StateGraph at 0x1706d6d5550>

In [11]:
def route_tools(state: State,):
    if state['loop_count']>6:
        return "summary"
    """
    Use in the conditional_edge to route to the ToolNode if the last message
    has tool calls. Otherwise, route to the end.
    """
    if isinstance(state, list):
        ai_message = state[-1]
    elif messages := state.get("messages", []):
        ai_message = messages[-1]
    else:
        raise ValueError(f"No messages found in input state to tool_edge: {state}")
    if hasattr(ai_message, "tool_calls") and len(ai_message.tool_calls) > 0:
        return "tools"
    return END

In [12]:
# The `tools_condition` function returns "tools" if the chatbot asks to use a tool, and "END" if
# it is fine directly responding. This conditional routing defines the main agent loop.
graph_builder.add_conditional_edges(
    "chatbot",
    route_tools,
    # The following dictionary lets you tell the graph to interpret the condition's outputs as a specific node
    # It defaults to the identity function, but if you
    # want to use a node named something else apart from "tools",
    # You can update the value of the dictionary to something else
    # e.g., "tools": "my_tools"
    {"tools": "tools", END: END, "summary":"summary"},
)
# Any time a tool is called, we return to the chatbot to decide the next step
graph_builder.add_edge("tools", "chatbot")
graph_builder.add_edge(START, "chatbot")
graph_builder.add_edge("summary",END)
graph = graph_builder.compile()

In [13]:
def stream_graph_updates(user_input: str):
    initial_state = {
        "question": user_input,
        "messages": [],
        "loop_count": 0,
        "answer": "",
    }

    print(f"\nüß† Processing user input: {user_input}\n")

    final_state = None

    for event in graph.stream(initial_state):
        for value in event.values():
            final_state = value
            messages = value.get("messages", [])
            if not messages:
                continue

            last_msg = messages[-1]
            role = getattr(last_msg, "type", "ai")
            content = getattr(last_msg, "content", "")

            # üßç User messages
            if role in ["human", "user"]:
                print(f"üßç‚Äç‚ôÇÔ∏è User: {content}")

            # ü§ñ AI messages (can include tool calls)
            elif role in ["ai", "assistant"]:
                if content.strip():
                    print(f"ü§ñ AI: {content}")
                else:
                    tool_calls = getattr(last_msg, "tool_calls", None)
                    if tool_calls:
                        for call in tool_calls:
                            tool_args = call.get("args", {})
                            query_text = tool_args.get("query", "").strip()
                            if query_text:  # ‚úÖ only print if query exists
                                print(f"üîç Querying... '{query_text}'")
                    # skip if no tool_calls or empty args ‚Äî don‚Äôt print anything

            # üß∞ Tool message (rarely used here, but safe fallback)
            elif role == "tool":
                tool_args = getattr(last_msg, "args", {})
                query_text = tool_args.get("query", "").strip()
                if query_text:
                    print(f"üîç Querying... '{query_text}'")

            else:
                print(f"{role.capitalize()}: {content}")

    print("\n" + "-" * 60 + "\n")

    return final_state


# Example usage
user_input = "Why is the sale decreasing from 2004 to 2005?"
final_state = stream_graph_updates(user_input)

print("üß© Final Agent State Keys:", list(final_state.keys()))
print("üí¨ Final Answer:", final_state.get("answer", "No answer found"))



üß† Processing user input: Why is the sale decreasing from 2004 to 2005?

üîç Querying... 'Get total sales for each year from 2004 to 2005'
üîç Querying... 'Get the count of orders for each status in 2004'
üîç Querying... 'Get the count of orders for each status in 2005'
ü§ñ AI: The significant decrease in sales from 2004 to 2005 can be attributed to a sharp decline in the volume of successfully completed orders.

Here's a breakdown of the contributing factors:

*   **Reduced Number of Successful Orders:** In 2004, a total of 140 orders were successfully completed (139 Shipped and 1 Resolved), directly contributing to the sales figures. In contrast, 2005 saw a drastic reduction to only 47 orders reaching a successful status (45 Shipped and 2 Resolved). This substantial drop in completed transactions directly impacts total sales.
*   **Overall Decrease in Orders:** The total number of orders placed also saw a significant decline from 144 in 2004 to 59 in 2005. Fewer orders being i