# Streaming

## Using LangGraph API

Tell about server part of LangGraph Studio and prefered approach to build graphs

In [None]:
from langgraph_sdk import get_client

URL = "http://localhost:61693"
client = get_client(url=URL)

# Search all hosted graphs
assistants = await client.assistants.search()
assistants

In [None]:
assistants[0]["assistant_id"]

In [None]:
from langchain_core.messages import HumanMessage

# Create a new thread
thread = await client.threads.create()

final_state = await client.runs.wait(
    thread_id=thread["thread_id"],
    assistant_id="8a4ac7a4-50eb-5206-98cc-4a72345cb1f7",
    input={"question": "Hi, I’m working on a Python project, and I’m stuck with handling API responses."}
)

final_state

In [None]:
final_state = await client.runs.wait(
    thread_id=thread["thread_id"],
    assistant_id="8a4ac7a4-50eb-5206-98cc-4a72345cb1f7",
    input={"question": "Sorry what was my previous question?"}
)

final_state["answer"]

In [None]:
final_state = await client.runs.wait(
    thread_id=thread["thread_id"],
    assistant_id="8a4ac7a4-50eb-5206-98cc-4a72345cb1f7",
    input={"question": "Ahh, yeah right! So I’m mostly struggling with parsing JSON responses. Sometimes the structure isn’t what I expect, and it breaks my code."}
)

final_state

## Streaming

Observe the difference between constructing graph manually & using LangGraph Studio

### Define chatbot graph

In [None]:
from langchain_openai import ChatOpenAI
from IPython.display import Image, display
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import MessagesState, StateGraph, START, END
from langchain_core.messages import HumanMessage, SystemMessage, RemoveMessage


# OPENAI_API_KEY environment variable must be set
llm = ChatOpenAI(model="gpt-4o-mini")

# System message
chatbot_system_message = SystemMessage(content=("""
You are a helpful and knowledgeable chatbot assistant. 
Your goal is to provide clear and accurate answers to user questions based on the information they provide. 
Stay focused, concise, and ensure your responses are relevant to the context of the conversation. 
If you don’t have enough information, ask for clarification.”
"""))


# Nodes
def chatbot(state: MessagesState) -> MessagesState:
    response = llm.invoke([chatbot_system_message] + state["messages"]);
    return MessagesState(messages = [response])


# Graph
workflow = StateGraph(MessagesState)
workflow.add_node(chatbot)

workflow.add_edge(START, "chatbot")
workflow.add_edge("chatbot", END)


memory = MemorySaver()
graph = workflow.compile(checkpointer=memory)
display(Image(graph.get_graph().draw_mermaid_png()))

Streaming modes:

- updates (exposes only new data)
- values (always shows the whole state)
- messages
- debug
- custom

### Stream_mode=updates

In [None]:
# Create a thread
config = {"configurable": {"thread_id": "1"}}

user_input = HumanMessage(content="Hi, I’m working on a Python project, and I’m stuck with handling API responses.")
for event in graph.stream({"messages": [user_input]}, config, stream_mode="updates"):
    print(event)

In [None]:
user_input = HumanMessage(content="Sorry what was my previous question?")
for event in graph.stream({"messages": [user_input]}, config, stream_mode="updates"):
    for m in event['chatbot']['messages']:
        m.pretty_print()

### Stream_mode=values

In [None]:
config = {"configurable": {"thread_id": "2"}}

user_input = HumanMessage(content="Hi, I’m working on a Python project, and I’m stuck with handling API responses.")
for event in graph.stream({"messages": [user_input]}, config, stream_mode="values"):
    print(event)

In [None]:
config = {"configurable": {"thread_id": "2"}}

user_input = HumanMessage(content="Hi, I’m working on a Python project, and I’m stuck with handling API responses.")
for event in graph.stream({"messages": [user_input]}, config, stream_mode="values"):
    for m in event['messages']:
        m.pretty_print()
    print("\n")
    print("#"*100)
    print("\n")

### Streaming deeper (updates inside Node) - a.k.a. "streaming LLM tokens from a specific node"

In [None]:
config = {"configurable": {"thread_id": "4"}}

user_input = HumanMessage(content="Hi, I’m working on a Python project, and I’m stuck with handling API responses.")
for event in graph.stream({"messages": [user_input]}, config, stream_mode="messages"):
    print(event)

# so we have a message with content and metadata

In [None]:
config = {"configurable": {"thread_id": "4"}}

user_input = HumanMessage(content="Ahh, yeah right! So I’m mostly struggling with parsing JSON responses. Sometimes the structure isn’t what I expect, and it breaks my code.")
for msg, metadata in graph.stream({"messages": [user_input]}, config, stream_mode="messages"):
    if (metadata['langgraph_node'] == 'chatbot'):
        print(msg.content, end="")

# same style of outputing data as in chat app (a token by token)

## Streaming with LangGraph API

In [None]:
from langgraph_sdk import get_client

URL = "http://localhost:61693"
client = get_client(url=URL)

assistants = await client.assistants.search()
assistants

In [None]:
thread = await client.threads.create()

input_message = HumanMessage(content="Hi, I’m working on a Python project, and I’m stuck with handling API responses.")

async for part in client.runs.stream(
        thread["thread_id"], 
        assistant_id="8a4ac7a4-50eb-5206-98cc-4a72345cb1f7", 
        input={"messages": [input_message]}, 
        stream_mode="messages"):
    print(part)

# check event types

In [None]:
from langchain_core.messages import convert_to_messages

thread = await client.threads.create()
input_message = HumanMessage(content="Should I invest in Tesla stocks?")

async for event in client.runs.stream(
            thread["thread_id"], 
            assistant_id="b7480eb0-6390-53a5-9bc4-29bf27cbd1c4", 
            input={"messages": [input_message]}, 
            stream_mode="values"):
    messages = event.data.get('messages',None)
    if messages:
        print(convert_to_messages(messages)[-1])

# display content only with convert_to_messages util