In [None]:
import os, getpass

def _set_env(var: str):
    if not os.environ.get(var):
        os.environ[var] = getpass.getpass(f"{var}: ")

_set_env("OPENAI_API_KEY")

In [None]:
from IPython.display import Image, display
from typing import Literal

from langchain_openai import ChatOpenAI
from langchain_core.messages import SystemMessage, HumanMessage, RemoveMessage
from langchain_core.runnables import RunnableConfig

from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import StateGraph, START, END
from langgraph.graph import MessagesState

# LLM
model = ChatOpenAI(model="gpt-4o", temperature=0) 

# State 
class State(MessagesState):
    summary: str

# Define the logic to call the model
def call_model(state: State, config: RunnableConfig):
    
    # Get summary if it exists
    summary = state.get("summary", "")

    # If there is summary, then we add it
    if summary:
        
        # Add summary to system message
        system_message = f"Summary of conversation earlier: {summary}"

        # Append summary to any newer messages
        messages = [SystemMessage(content=system_message)] + state["messages"]
    
    else:
        messages = state["messages"]
    
    response = model.invoke(messages, config)
    return {"messages": response}

def summarize_conversation(state: State):
    
    # First, we get any existing summary
    summary = state.get("summary", "")

    # Create our summarization prompt 
    if summary:
        
        # A summary already exists
        summary_message = (
            f"This is summary of the conversation to date: {summary}\n\n"
            "Extend the summary by taking into account the new messages above:"
        )
        
    else:
        summary_message = "Create a summary of the conversation above:"

    # Add prompt to our history
    messages = state["messages"] + [HumanMessage(content=summary_message)]
    response = model.invoke(messages)
    
    # Delete all but the 2 most recent messages
    delete_messages = [RemoveMessage(id=m.id) for m in state["messages"][:-2]]
    return {"summary": response.content, "messages": delete_messages}

# Determine whether to end or summarize the conversation
def should_continue(state: State)-> Literal ["summarize_conversation",END]:
    
    """Return the next node to execute."""
    
    messages = state["messages"]
    
    # If there are more than six messages, then we summarize the conversation
    if len(messages) > 6:
        return "summarize_conversation"
    
    # Otherwise we can just end
    return END

# Define a new graph
workflow = StateGraph(State)
workflow.add_node("conversation", call_model)
workflow.add_node(summarize_conversation)

# Set the entrypoint as conversation
workflow.add_edge(START, "conversation")
workflow.add_conditional_edges("conversation", should_continue)
workflow.add_edge("summarize_conversation", END)

# Compile
memory = MemorySaver()
graph = workflow.compile(checkpointer=memory)
display(Image(graph.get_graph().draw_mermaid_png()))

### Streaming full state

Agora, vamos falar sobre maneiras de transmitir o estado do nosso grafo.

`.stream` e `.astream` são métodos síncronos e assíncronos para transmitir resultados.

O LangGraph suporta alguns modos diferentes de transmissão do estado do grafo.

* `values`: Transmite o estado completo do grafo após cada chamada de nó.

* `updates`: Transmite as atualizações do estado do grafo após cada chamada de nó.

Vamos testar primeiramente stream_mode= "updates"

In [None]:
#Criar thread
config= {"configurable": {"thread_id": "1"}}

# Iniciar conversa
for chunck in graph.stream({"messages": [HumanMessage(content= "hi! I'm Lance")]}, config, stream_mode="updates"):
    print(chunck)

Printando só o state update

In [None]:
for chunck in graph.stream({"messages": [HumanMessage(content= "hi! I'm Lance")]}, config, stream_mode="updates"):
    chunck['conversation']['messages'].pretty_print()

Agora utilizando o stream_mode= "values"

Este é o state completo do graph depois do nó conversation ser chamado

In [None]:
#Criando thread
config= {"configuration": {"thread_id": "2"}}

input_message= HumanMessage(content= "hi! I'm Lance")
for event in graph.stream({"messages": [input_message]}, config, stream_mode="values"):
    for m in event["messages"]:
        m.pretty_print()
    print("----" * 25)

### Streaming tokens

Frequentemente, queremos transmitir mais do que apenas o estado do grafo.

Em particular, com chamadas do modelo de chat, é comum transmitir os tokens à medida que são gerados.

Podemos fazer isso usando o método `.astream_events`, que transmite eventos conforme eles ocorrem dentro dos nós!

Cada evento é um dicionário com algumas chaves:

* `event`: Este é o tipo de evento que está sendo emitido.

* `name`: Este é o nome do evento.

* `data`: Estes são os dados associados ao evento.

* `metadata`: Contém `langgraph_node`, o nó que emitiu o evento.

In [None]:

config = {"configurable": {"thread_id": "3"}}
input_message = HumanMessage(content="Tell me about the 49ers NFL team")
async for event in graph.astream_events({"messages": [input_message]}, config, version="v2"):
    print(f"Node: {event['metadata'].get('langgraph_node','')}. Type: {event['event']}. Name: {event['name']}")

O ponto central é que os tokens dos modelos de chat dentro do seu grafo têm o tipo `on_chat_model_stream`.

Podemos usar `event['metadata']['langgraph_node']` para selecionar o nó do qual transmitir os dados.

E podemos usar `event['data']` para obter os dados reais de cada evento, que neste caso é um `AIMessageChunk`.

In [None]:
node_to_stream = 'conversation'
config = {"configurable": {"thread_id": "4"}}
input_message = HumanMessage(content="Tell me about the 49ers NFL team")
async for event in graph.astream_events({"messages": [input_message]}, config, version="v2"):
    # Get chat model tokens from a particular node 
    if event["event"] == "on_chat_model_stream" and event['metadata'].get('langgraph_node','') == node_to_stream:
        print(event["data"])

In [None]:
config = {"configurable": {"thread_id": "5"}}
input_message = HumanMessage(content="Tell me about the 49ers NFL team")
async for event in graph.astream_events({"messages": [input_message]}, config, version="v2"):
    # Get chat model tokens from a particular node 
    if event["event"] == "on_chat_model_stream" and event['metadata'].get('langgraph_node','') == node_to_stream:
        data = event["data"]
        print(data["chunk"].content, end="|")

### Streaming with LangGraph API

In [None]:
from langgraph_sdk import get_client

# This is the URL of the local development server
URL = "http://127.0.0.1:2024"
client = get_client(url=URL)

# Search all hosted graphs
assistants = await client.assistants.search()


#Os objetos transmitidos possuem:
#evento: Tipo
#dados: Estado

# Create a new thread
thread = await client.threads.create()
# Input message
input_message = HumanMessage(content="Multiply 2 and 3")
async for event in client.runs.stream(thread["thread_id"], 
                                      assistant_id="agent", 
                                      input={"messages": [input_message]}, 
                                      stream_mode="values"):
    print(event)

In [None]:
from langchain_core.messages import convert_to_messages
thread = await client.threads.create()
input_message = HumanMessage(content="Multiply 2 and 3")
async for event in client.runs.stream(thread["thread_id"], assistant_id="agent", input={"messages": [input_message]}, stream_mode="values"):
    messages = event.data.get('messages',None)
    if messages:
        print(convert_to_messages(messages)[-1])
    print('='*25)