In [1]:
! pip install -r requirements.txt --quiet

# Subgraph for Querying Structured Data

This notebook demonstrates how to use a **subgraph** to write and execute queries against a structured data source.

- Use a Parquet file stored in an **Azure Storage Account** as the structured dataset.
- Generate Python code dynamically to filter a **pandas DataFrame** based on user queries.
- Show how agents can reason over structured data sources to extract precise, relevant information.
- Demonstrates how to use a custom subgraph builder (`langchain_07b_dataframe_subgraph_builder.py`) to easily construct child agents.

This approach highlights how agents can interact with tabular data in real-time through code generation and execution.

ðŸ”— [Subgraphs in LangGraph](https://langchain-ai.github.io/langgraph/concepts/low_level/#subgraphs)


In [1]:
from dotenv import load_dotenv
from os import environ
from langchain_openai import AzureChatOpenAI
from typing_extensions import TypedDict,Literal
from langgraph.types import Command
from langgraph.graph import MessagesState, StateGraph, START, END
from IPython.display import Image, display
from  user_tools import get_weather_tool
from user_functions import vector_search,get_datafame_from_storage
from utils import pretty_print_messages
from langchain_core.messages import HumanMessage,SystemMessage
from langgraph.prebuilt import create_react_agent
from langchain_core.tools import tool


load_dotenv(override=True)


True

## Subgraph: Interfece with Data Center Data (Azure Storage Dataframe)

In [2]:
from langchain_07b_dataframe_subgraph_builder import DataFrameQuerySubgraphBuilder

llm = AzureChatOpenAI(
    temperature=0,
    azure_deployment=environ["AZURE_OPENAI_MODEL"],
    api_version=environ["AZURE_OPENAI_API_VERSION"],
)

df = get_datafame_from_storage('data/usage.parquet')

builder = DataFrameQuerySubgraphBuilder(llm, df)
subgraph_datacenter = builder.build()

# Build Parent Graph to leverage Data Center Graph when needed

In [3]:
members = ["weather" , "datacenter"]

options = members + ["FINISH"]

system_prompt = (
    "You are a supervisor tasked with managing a conversation between the"
    f" following workers: {members}. Given the following user request,"
    " respond with the worker to act next. Each worker will perform a"
    " task and respond with their results and status. When finished,"
    " respond with FINISH.")

class Router(TypedDict):
    """Worker to route to next. If no workers needed, route to FINISH."""

    next: Literal[*options]

In [4]:
llm_with_tools = create_react_agent(llm, tools=[get_weather_tool])

In [5]:
def weather(state: MessagesState) -> Command[Literal["supervisor"]]:
    
    result = llm_with_tools.invoke(state)

    return Command(
        update={
            "messages": [
                HumanMessage(content=result["messages"][-1].content, name="weather")
            ]
        },
        goto="supervisor",
    )

def supervisor(state: MessagesState) -> Command[Literal[*members, "__end__"]]:
    messages = [
        {"role": "system", "content": system_prompt},
    ] + state["messages"]
    response = llm.with_structured_output(Router).invoke(messages)
    goto = response["next"]
    if goto == "FINISH":
        goto = END

    return Command(goto=goto, update={"next": goto})



In [6]:

builder = StateGraph(MessagesState)

builder = StateGraph(MessagesState)
builder.add_node("weather", weather)
builder.add_node("datacenter", subgraph_datacenter)
builder.add_node("supervisor", supervisor)

builder.set_entry_point("supervisor")

graph = builder.compile()


In [7]:
for step in graph.stream({"messages": [{"role": "user", "content": "Return Average Data Center Temperatures?"}]}):
   pretty_print_messages(step)

Update from node supervisor:




Update from node datacenter:



Return Average Data Center Temperatures?

You are a Python data assistant.

DataFrame schema:
timestamp: datetime64[ns]
data_center_id: object
zone: object
power_draw_kw: float64
it_load_kw: float64
cooling_load_kw: float64
pue: float64
temperature_c: float64
humidity_percent: float64
ups_load_percent: float64
battery_backup_status: object
grid_energy_source: object
co2_emissions_kg: float64
alarm_status: object
operator_notes: object

Sample rows:
|    | timestamp                  | data_center_id   | zone   |   power_draw_kw |   it_load_kw |   cooling_load_kw |   pue |   temperature_c |   humidity_percent |   ups_load_percent | battery_backup_status   | grid_energy_source   |   co2_emissions_kg | alarm_status   | operator_notes          |
|---:|:---------------------------|:-----------------|:-------|----------------:|-------------:|------------------:|------:|----------------:|-------------------:|-------------------:|

In [8]:
for step in graph.stream({"messages": [{"role": "user", "content": "how many data centers are there?"}]}):
   pretty_print_messages(step)

Update from node supervisor:




Update from node datacenter:



how many data centers are there?

You are a Python data assistant.

DataFrame schema:
timestamp: datetime64[ns]
data_center_id: object
zone: object
power_draw_kw: float64
it_load_kw: float64
cooling_load_kw: float64
pue: float64
temperature_c: float64
humidity_percent: float64
ups_load_percent: float64
battery_backup_status: object
grid_energy_source: object
co2_emissions_kg: float64
alarm_status: object
operator_notes: object

Sample rows:
|    | timestamp                  | data_center_id   | zone   |   power_draw_kw |   it_load_kw |   cooling_load_kw |   pue |   temperature_c |   humidity_percent |   ups_load_percent | battery_backup_status   | grid_energy_source   |   co2_emissions_kg | alarm_status   | operator_notes          |
|---:|:---------------------------|:-----------------|:-------|----------------:|-------------:|------------------:|------:|----------------:|-------------------:|-------------------:|:-------

In [9]:
for step in graph.stream({"messages": [{"role": "user", "content": "what is the weather in sf"}]}):
   pretty_print_messages(step)

Update from node supervisor:




Update from node weather:


Name: weather

The weather in San Francisco is currently 60Â°F and foggy.


Update from node supervisor:




