In [None]:
# 2025/12/13
# zhangzhong
# https://docs.langchain.com/oss/python/langgraph/streaming

What’s possible with LangGraph streaming:
- Stream graph state — get state updates / values with updates and values modes.
- Stream subgraph outputs — include outputs from both the parent graph and any nested subgraphs.
- Stream LLM tokens — capture token streams from anywhere: inside nodes, subgraphs, or tools.
- Stream custom data — send custom updates or progress signals directly from tool functions.
- Use multiple streaming modes — choose from values (full state), updates (state deltas), messages (LLM tokens + metadata), custom (arbitrary user data), or debug (detailed traces).

values	Streams the full value of the state after each step of the graph.
updates	Streams the updates to the state after each step of the graph. If multiple updates are made in the same step (e.g., multiple nodes are run), those updates are streamed separately.
custom	Streams custom data from inside your graph nodes.
messages	Streams 2-tuples (LLM token, metadata) from any graph nodes where an LLM is invoked.
debug	Streams as much information as possible throughout the execution of the graph.

In [7]:
import os
from langchain_openai import ChatOpenAI

api_key = os.environ["BIGMODEL_API_KEY"]


model = ChatOpenAI(
    temperature=0.6,
    model="glm-4.6",
    api_key=api_key,
    base_url="https://open.bigmodel.cn/api/paas/v4/",
)


In [4]:
## Basic usage example

from typing import TypedDict
from langgraph.graph import StateGraph, START, END

class State(TypedDict):
    topic: str
    joke: str

def refine_topic(state: State):
    return {"topic": state["topic"] + " and cats"}

def generate_joke(state: State):
    return {"joke": f"This is a joke about {state['topic']}"}

graph = (
    StateGraph(State)
    .add_node(refine_topic)
    .add_node(generate_joke)
    .add_edge(START, "refine_topic")
    .add_edge("refine_topic", "generate_joke")
    .add_edge("generate_joke", END)
    .compile()
)

# for mode, chunk in graph.stream(inputs, stream_mode=["updates", "custom"]):
#
# The stream() method returns an iterator that yields streamed outputs
# for chunk in graph.stream(  
#     {"topic": "ice cream"},
#     # Set stream_mode="updates" to stream only the updates to the graph state after each node
#     # Other stream modes are also available. See supported stream modes for details
#     stream_mode="updates",  
# ):
#     print(chunk)


for mode, chunk in graph.stream({"topic": "ice cream"}, stream_mode=["updates", "values"]):
    print(mode, chunk)

values {'topic': 'ice cream'}
updates {'refine_topic': {'topic': 'ice cream and cats'}}
values {'topic': 'ice cream and cats'}
updates {'generate_joke': {'joke': 'This is a joke about ice cream and cats'}}
values {'topic': 'ice cream and cats', 'joke': 'This is a joke about ice cream and cats'}


In [None]:
## Stream multiple modes
# You can pass a list as the stream_mode parameter to stream multiple modes at once.
# The streamed outputs will be tuples of (mode, chunk) where mode is the name of the stream mode and chunk is the data streamed by that mode.


In [None]:
## Stream graph state
# Use the stream modes updates and values to stream the state of the graph as it executes.



In [None]:
## Stream subgraph outputs
# TODO: 现在还没学subgraph，总之就是可以

In [5]:
## Debugging
# Use the debug streaming mode to stream as much information as possible throughout the execution of the graph. 
# The streamed outputs include the name of the node as well as the full state.

for chunk in graph.stream(
    {"topic": "ice cream"},
    stream_mode="debug",  
):
    print(chunk)


{'step': 1, 'timestamp': '2025-12-13T15:05:51.929842+00:00', 'type': 'task', 'payload': {'id': '80920c77-19d1-5ea8-602a-2916f8dba386', 'name': 'refine_topic', 'input': {'topic': 'ice cream'}, 'triggers': ('branch:to:refine_topic',)}}
{'step': 1, 'timestamp': '2025-12-13T15:05:51.930129+00:00', 'type': 'task_result', 'payload': {'id': '80920c77-19d1-5ea8-602a-2916f8dba386', 'name': 'refine_topic', 'error': None, 'result': {'topic': 'ice cream and cats'}, 'interrupts': []}}
{'step': 2, 'timestamp': '2025-12-13T15:05:51.930207+00:00', 'type': 'task', 'payload': {'id': 'af369478-7758-9d96-283e-284431d4ad1d', 'name': 'generate_joke', 'input': {'topic': 'ice cream and cats'}, 'triggers': ('branch:to:generate_joke',)}}
{'step': 2, 'timestamp': '2025-12-13T15:05:51.930290+00:00', 'type': 'task_result', 'payload': {'id': 'af369478-7758-9d96-283e-284431d4ad1d', 'name': 'generate_joke', 'error': None, 'result': {'joke': 'This is a joke about ice cream and cats'}, 'interrupts': []}}


In [None]:
## LLM tokens
# Use the messages streaming mode to stream Large Language Model (LLM) outputs token by token from any part of your graph, including nodes, tools, subgraphs, or tasks.
# 这个好啊，可以用来看大模型的输出
# The streamed output from messages mode is a tuple (message_chunk, metadata) where:
# - message_chunk: the token or message segment from the LLM.
# - metadata: a dictionary containing details about the graph node and LLM invocation.

from dataclasses import dataclass

from langchain.chat_models import init_chat_model
from langgraph.graph import StateGraph, START


@dataclass
class MyState:
    topic: str
    joke: str = ""


def call_model(state: MyState):
    """Call the LLM to generate a joke about a topic"""
    # Note that message events are emitted even when the LLM is run using .invoke rather than .stream
    model_response = model.invoke(  
        [
            {"role": "user", "content": f"Generate a joke about {state.topic}"}
        ]
    )
    return {"joke": model_response.content}

graph = (
    StateGraph(MyState)
    .add_node(call_model)
    .add_edge(START, "call_model")
    .compile()
 )

print("start stream...")

# The "messages" stream mode returns an iterator of tuples (message_chunk, metadata)
# where message_chunk is the token streamed by the LLM and metadata is a dictionary
# with information about the graph node where the LLM was called and other information
for message_chunk, metadata in graph.stream(
    {"topic": "ice cream"},
    stream_mode="messages",  
):
    # Providers can emit empty heartbeat chunks; show only token text
    if message_chunk.content:
        print(message_chunk.content, end="|", flush=True)
    # else: uncomment to debug non-token events (start/end markers, etc.)
    #     print(f"(empty chunk: {metadata})")

# GLM
# 1. too slow
# 2. do not support structured output


start stream...
content='' additional_kwargs={} response_metadata={'model_provider': 'openai'} id='lc_run--019b184c-5fc3-7531-b387-a0f1bccb19b6'
content='' additional_kwargs={} response_metadata={'model_provider': 'openai'} id='lc_run--019b184c-5fc3-7531-b387-a0f1bccb19b6'
content='' additional_kwargs={} response_metadata={'model_provider': 'openai'} id='lc_run--019b184c-5fc3-7531-b387-a0f1bccb19b6'
content='' additional_kwargs={} response_metadata={'model_provider': 'openai'} id='lc_run--019b184c-5fc3-7531-b387-a0f1bccb19b6'
content='' additional_kwargs={} response_metadata={'model_provider': 'openai'} id='lc_run--019b184c-5fc3-7531-b387-a0f1bccb19b6'
content='' additional_kwargs={} response_metadata={'model_provider': 'openai'} id='lc_run--019b184c-5fc3-7531-b387-a0f1bccb19b6'
content='' additional_kwargs={} response_metadata={'model_provider': 'openai'} id='lc_run--019b184c-5fc3-7531-b387-a0f1bccb19b6'
content='' additional_kwargs={} response_metadata={'model_provider': 'openai'} id

In [None]:
## Filter by node
# The "messages" stream mode returns a tuple of (message_chunk, metadata)
# where message_chunk is the token streamed by the LLM and metadata is a dictionary
# with information about the graph node where the LLM was called and other information

# for msg, metadata in graph.stream(
#     inputs,
#     stream_mode="messages",  
# ):
#     # Filter the streamed tokens by the langgraph_node field in the metadata
#     # to only include the tokens from the specified node
#     if msg.content and metadata["langgraph_node"] == "some_node_name":
#         ...

In [None]:
## Filter by LLM invocation

# from langchain.chat_models import init_chat_model

# # model_1 is tagged with "joke"
# model_1 = init_chat_model(model="gpt-4o-mini", tags=['joke'])
# # model_2 is tagged with "poem"
# model_2 = init_chat_model(model="gpt-4o-mini", tags=['poem'])

# graph = ... # define a graph that uses these LLMs

# # The stream_mode is set to "messages" to stream LLM tokens
# # The metadata contains information about the LLM invocation, including the tags
# async for msg, metadata in graph.astream(
#     {"topic": "cats"},
#     stream_mode="messages",  
# ):
#     # Filter the streamed tokens by the tags field in the metadata to only include
#     # the tokens from the LLM invocation with the "joke" tag
#     if metadata["tags"] == ["joke"]:
#         print(msg.content, end="|", flush=True)

In [None]:
## Stream custom data

# To send custom user-defined data from inside a LangGraph node or tool, follow these steps:
# - Use get_stream_writer to access the stream writer and emit custom data.
# - Set stream_mode="custom" when calling .stream() or .astream() to get the custom data in the stream. You can combine multiple modes (e.g., ["updates", "custom"]), but at least one must be "custom".