# Architecture considerations

- Trade-Off Agency-Reliabilty
- Latency
- Autonomy
- Variance

# Architecture Techniques

- Streaming/ Intermediate Outputs
- Structured Outputs
- Human in the Loop
- Double texting modes

### Structured output

- Prompting
- Tool calling
- JSON Mode

In [19]:
from pydantic import BaseModel, Field

class Joke(BaseModel):
    setup: str = Field(description="Setup of the joke")
    punchline: str = Field(description="Punchline to the joke")

In [20]:
from langchain_ollama import ChatOllama

model = ChatOllama(model="llama3.2:1b")
model = model.with_structured_output(Joke)

model.invoke("Tell me a joke about cats")

Joke(setup='cat', punchline='Why did the cat join a band?')

- Low temperatures
- Validation errors

### Intermediate output

#### Re-Act agent

In [21]:
import ast
from typing import Annotated, TypedDict

#from langchain_community.tools import DuckDuckGoSearchResults
from langchain_core.tools import tool
from langchain_ollama import ChatOllama, OllamaLLM

from langgraph.graph import START, StateGraph
from langgraph.graph.message import add_messages
from langgraph.prebuilt import ToolNode, tools_condition

In [22]:
@tool
def suma(query: str) -> str:
    """A simple calculator tool. Input should be a sum expression."""
    print(query)
    return eval(query)

@tool
def resta(query: str) -> str:
    """A simple calculator tool. Input should be a substract expression."""
    print(query)
    return eval(query)

@tool
def multiply(query: str) -> str:
    """A simple calculator tool. Input should be a multiply expression."""
    print(query)
    return eval(query)

@tool
def division(query: str) -> str:
    """A simple calculator tool. Input should be a division expression."""
    print(query)
    return eval(query)

#search = DuckDuckGoSearchResults()
tools = [suma, resta, multiply,division]

############### TEST ONLINE MODEL ##################
#import dotenv
#import os
#from langchain_openai.chat_models import ChatOpenAI
#
#dotenv.load_dotenv()
#apikey = os.getenv("AI-API-KEY")
#model_sel = os.getenv("MODEL")
#url = os.getenv("BASE-URL")

#model = ChatOpenAI(
#    base_url=url,
#    api_key=apikey,
#    model=model_sel
#).bind_tools(tools)

####################################################
model = ChatOllama(model="llama3.2:1b", temperature=0).bind_tools(tools)

class State(TypedDict):
    messages: Annotated[list, add_messages]

def model_node(state: State) -> State:
    res = model.invoke(state["messages"])
    return {"messages": res}

In [23]:
builder = StateGraph(State)
builder.add_node("model", model_node)
builder.add_node("tools", ToolNode(tools))
builder.add_edge(START, "model")
builder.add_conditional_edges("model", tools_condition)
builder.add_edge("tools", "model")

graph = builder.compile()

In [24]:
from langchain_core.messages import HumanMessage
input = {
    "messages": [
        HumanMessage("""Calculate (123+5) - (2*3)""")
    ]
}
for c in graph.stream(input):
    print(c)

{'model': {'messages': AIMessage(content='', additional_kwargs={}, response_metadata={'model': 'llama3.2:1b', 'created_at': '2025-08-23T13:01:01.8918304Z', 'done': True, 'done_reason': 'stop', 'total_duration': 7238069500, 'load_duration': 26877300, 'prompt_eval_count': 314, 'prompt_eval_duration': 4857409200, 'eval_count': 29, 'eval_duration': 2353041500, 'model_name': 'llama3.2:1b'}, id='run--7bcae866-cd57-4146-84df-90caf27d8d05-0', tool_calls=[{'name': 'suma', 'args': {'query': '(123+5) - (2*3)'}, 'id': 'b77879e1-913b-4024-a07f-96fa766fdf3e', 'type': 'tool_call'}], usage_metadata={'input_tokens': 314, 'output_tokens': 29, 'total_tokens': 343})}}
(123+5) - (2*3)
{'tools': {'messages': [ToolMessage(content='122', name='suma', id='f7642880-e864-4f48-a585-06cae9e58e03', tool_call_id='b77879e1-913b-4024-a07f-96fa766fdf3e')]}}
{'model': {'messages': AIMessage(content='The calculation result is 122.', additional_kwargs={}, response_metadata={'model': 'llama3.2:1b', 'created_at': '2025-08-2

In [25]:
for c in graph.stream(input, stream_mode='updates'): #updates, values, debug, combining
    print(c)

{'model': {'messages': AIMessage(content='', additional_kwargs={}, response_metadata={'model': 'llama3.2:1b', 'created_at': '2025-08-23T13:01:12.736005Z', 'done': True, 'done_reason': 'stop', 'total_duration': 8653884300, 'load_duration': 70837200, 'prompt_eval_count': 314, 'prompt_eval_duration': 6394846800, 'eval_count': 29, 'eval_duration': 2184776700, 'model_name': 'llama3.2:1b'}, id='run--9853fc04-9e5f-4a35-a3d7-2a85ed1b7c90-0', tool_calls=[{'name': 'suma', 'args': {'query': '(123+5) - (2*3)'}, 'id': '837b1d8a-78cd-4ac5-882e-c0934f45ed65', 'type': 'tool_call'}], usage_metadata={'input_tokens': 314, 'output_tokens': 29, 'total_tokens': 343})}}
(123+5) - (2*3)
{'tools': {'messages': [ToolMessage(content='122', name='suma', id='c39852fc-bc1d-43bb-840d-69dc63d0b479', tool_call_id='837b1d8a-78cd-4ac5-882e-c0934f45ed65')]}}
{'model': {'messages': AIMessage(content='The calculation result is 122.', additional_kwargs={}, response_metadata={'model': 'llama3.2:1b', 'created_at': '2025-08-23

### Stream token by token

In [26]:
output = graph.astream_events(input, version='v2')

In [27]:
async for event in output:
    print(event)
    if event["event"] == "on_chat_model_stream":
        content = event["data"]["chunk"].content
        if content:
            print(content)

{'event': 'on_chain_start', 'data': {'input': {'messages': [HumanMessage(content='Calculate (123+5) - (2*3)', additional_kwargs={}, response_metadata={}, id='aa039626-644f-4ca2-a2b5-30c154b429cf')]}}, 'name': 'LangGraph', 'tags': [], 'run_id': '1d6db3f5-ba84-4a10-823a-d38c4ac0fde0', 'metadata': {}, 'parent_ids': []}
{'event': 'on_chain_start', 'data': {'input': {'messages': [HumanMessage(content='Calculate (123+5) - (2*3)', additional_kwargs={}, response_metadata={}, id='aa039626-644f-4ca2-a2b5-30c154b429cf')]}}, 'name': 'model', 'tags': ['graph:step:1'], 'run_id': '31236848-c8ee-484d-8086-1a8e4dab5692', 'metadata': {'langgraph_step': 1, 'langgraph_node': 'model', 'langgraph_triggers': ('branch:to:model',), 'langgraph_path': ('__pregel_pull', 'model'), 'langgraph_checkpoint_ns': 'model:2e4d72bd-f108-e75a-3302-71667315aa7d'}, 'parent_ids': ['1d6db3f5-ba84-4a10-823a-d38c4ac0fde0']}
{'event': 'on_chain_start', 'data': {'input': {'messages': [HumanMessage(content='Calculate (123+5) - (2*3)

### Human in the loop

In [45]:
from langgraph.checkpoint.memory import MemorySaver
graph = builder.compile(checkpointer=MemorySaver())

#### Interrupt pattern

In [46]:
import asyncio
from contextlib import aclosing
event = asyncio.Event()

In [47]:
config = {"configurable":{"thread_id":"1"}}

In [48]:
async with aclosing(graph.astream(input, config)) as stream:
    async for chunk in stream:
        if event.is_set():
            break
        else:
            print(chunk)
event.set()

{'model': {'messages': AIMessage(content='', additional_kwargs={}, response_metadata={'model': 'llama3.2:1b', 'created_at': '2025-08-23T13:25:34.5928106Z', 'done': True, 'done_reason': 'stop', 'total_duration': 7709399800, 'load_duration': 28057300, 'prompt_eval_count': 314, 'prompt_eval_duration': 5673627600, 'eval_count': 29, 'eval_duration': 2004545000, 'model_name': 'llama3.2:1b'}, id='run--311a740b-d4c1-4b18-8a1b-9cbbb2dedaeb-0', tool_calls=[{'name': 'suma', 'args': {'query': '(123+5) - (2*3)'}, 'id': '922dcc76-0884-4405-88a4-7878e345714f', 'type': 'tool_call'}], usage_metadata={'input_tokens': 314, 'output_tokens': 29, 'total_tokens': 343})}}
(123+5) - (2*3)
{'tools': {'messages': [ToolMessage(content='122', name='suma', id='53f6d8b5-6807-4c43-aa89-52d4cd534287', tool_call_id='922dcc76-0884-4405-88a4-7878e345714f')]}}
{'model': {'messages': AIMessage(content='The calculation result is 122.', additional_kwargs={}, response_metadata={'model': 'llama3.2:1b', 'created_at': '2025-08-2

#### Authorize pattern

> Stop before tools

In [54]:
config = {"configurable": {"thread_id": "1"}}
graph = builder.compile(checkpointer=MemorySaver())
output = graph.astream(input, config, interrupt_before=['tools'])

async for c in output:
    print(c)

{'model': {'messages': AIMessage(content='', additional_kwargs={}, response_metadata={'model': 'llama3.2:1b', 'created_at': '2025-08-23T13:28:47.6694365Z', 'done': True, 'done_reason': 'stop', 'total_duration': 2080032600, 'load_duration': 27871400, 'prompt_eval_count': 314, 'prompt_eval_duration': 64784600, 'eval_count': 29, 'eval_duration': 1985814500, 'model_name': 'llama3.2:1b'}, id='run--13146437-6b7f-4c86-9019-025dfcd2726d-0', tool_calls=[{'name': 'suma', 'args': {'query': '(123+5) - (2*3)'}, 'id': '6529ceff-1eba-4eb9-8ed2-561a68ba1792', 'type': 'tool_call'}], usage_metadata={'input_tokens': 314, 'output_tokens': 29, 'total_tokens': 343})}}
{'__interrupt__': ()}


> Continue (Resume case)
> Restart option

In [55]:
config = {"configurable": {"thread_id": "1"}}

output = graph.astream(None, config, interrupt_before=['tools'])

async for c in output:
    print(c)

(123+5) - (2*3)
{'tools': {'messages': [ToolMessage(content='122', name='suma', id='7a72d11c-8273-4853-b3fe-a9f4c9ee63e3', tool_call_id='6529ceff-1eba-4eb9-8ed2-561a68ba1792')]}}
{'model': {'messages': AIMessage(content='The calculation result is 122.', additional_kwargs={}, response_metadata={'model': 'llama3.2:1b', 'created_at': '2025-08-23T13:29:53.6063291Z', 'done': True, 'done_reason': 'stop', 'total_duration': 1675564300, 'load_duration': 43434100, 'prompt_eval_count': 105, 'prompt_eval_duration': 1155776200, 'eval_count': 8, 'eval_duration': 474161700, 'model_name': 'llama3.2:1b'}, id='run--bf3c783e-3a8f-4f84-b848-2b18a444246e-0', usage_metadata={'input_tokens': 105, 'output_tokens': 8, 'total_tokens': 113})}}


> Edit state

In [108]:
config = {"configurable": {"thread_id": "1"}}
graph = builder.compile(checkpointer=MemorySaver())
output = graph.astream(input, config, interrupt_before=['tools'])

async for c in output:
    print(c)

{'model': {'messages': AIMessage(content='', additional_kwargs={}, response_metadata={'model': 'llama3.2:1b', 'created_at': '2025-08-23T13:59:22.4918075Z', 'done': True, 'done_reason': 'stop', 'total_duration': 10025798600, 'load_duration': 1787152400, 'prompt_eval_count': 314, 'prompt_eval_duration': 6048696200, 'eval_count': 29, 'eval_duration': 2188800600, 'model_name': 'llama3.2:1b'}, id='run--9e7a4ec3-0d63-457e-a991-190262b3e98a-0', tool_calls=[{'name': 'suma', 'args': {'query': '(123+5) - (2*3)'}, 'id': 'eab580ec-5e19-4335-b39a-b2bfc2d33593', 'type': 'tool_call'}], usage_metadata={'input_tokens': 314, 'output_tokens': 29, 'total_tokens': 343})}}
{'__interrupt__': ()}


In [None]:
state = graph.get_state(config)
last_message = state.values['messages'][-1]
last_message.tool_calls[0]['name'] = "resta" #Fix selected tool

In [110]:
update = {"messages":last_message}
graph.update_state(config, update)

{'configurable': {'thread_id': '1',
  'checkpoint_ns': '',
  'checkpoint_id': '1f080297-1e43-616c-8002-6031e13cefea'}}

In [113]:
output = graph.astream(None, config, interrupt_before=['tools'])

async for c in output:
    print(c)

(123+5) - (2*3)
{'tools': {'messages': [ToolMessage(content='122', name='resta', id='2990b6cc-d51d-4c3f-87dd-c5ccbaf69643', tool_call_id='eab580ec-5e19-4335-b39a-b2bfc2d33593')]}}
{'model': {'messages': AIMessage(content='The calculation results in 122.', additional_kwargs={}, response_metadata={'model': 'llama3.2:1b', 'created_at': '2025-08-23T14:00:38.6361074Z', 'done': True, 'done_reason': 'stop', 'total_duration': 1271923600, 'load_duration': 28464500, 'prompt_eval_count': 106, 'prompt_eval_duration': 801029300, 'eval_count': 8, 'eval_duration': 441120300, 'model_name': 'llama3.2:1b'}, id='run--ea5f7a07-174d-4ae9-a545-ee52d35d4228-0', usage_metadata={'input_tokens': 106, 'output_tokens': 8, 'total_tokens': 114})}}


> Fork (Return to a past state)

In [115]:
history = [
    state for state in
    graph.get_state_history(config)
]

# replay a past state
graph.invoke(None, history[3].config)

{'messages': [HumanMessage(content='Calculate (123+5) - (2*3)', additional_kwargs={}, response_metadata={}, id='aa039626-644f-4ca2-a2b5-30c154b429cf'),
  AIMessage(content='', additional_kwargs={}, response_metadata={'model': 'llama3.2:1b', 'created_at': '2025-08-23T13:59:22.4918075Z', 'done': True, 'done_reason': 'stop', 'total_duration': 10025798600, 'load_duration': 1787152400, 'prompt_eval_count': 314, 'prompt_eval_duration': 6048696200, 'eval_count': 29, 'eval_duration': 2188800600, 'model_name': 'llama3.2:1b'}, id='run--9e7a4ec3-0d63-457e-a991-190262b3e98a-0', tool_calls=[{'name': 'resta', 'args': {'query': '(123+5) - (2*3)'}, 'id': 'eab580ec-5e19-4335-b39a-b2bfc2d33593', 'type': 'tool_call'}], usage_metadata={'input_tokens': 314, 'output_tokens': 29, 'total_tokens': 343}),
  ToolMessage(content='122', name='resta', id='2990b6cc-d51d-4c3f-87dd-c5ccbaf69643', tool_call_id='eab580ec-5e19-4335-b39a-b2bfc2d33593'),
  AIMessage(content='The calculation results in 122.', additional_kwa

In [116]:
history

[StateSnapshot(values={'messages': [HumanMessage(content='Calculate (123+5) - (2*3)', additional_kwargs={}, response_metadata={}, id='aa039626-644f-4ca2-a2b5-30c154b429cf'), AIMessage(content='', additional_kwargs={}, response_metadata={'model': 'llama3.2:1b', 'created_at': '2025-08-23T13:59:22.4918075Z', 'done': True, 'done_reason': 'stop', 'total_duration': 10025798600, 'load_duration': 1787152400, 'prompt_eval_count': 314, 'prompt_eval_duration': 6048696200, 'eval_count': 29, 'eval_duration': 2188800600, 'model_name': 'llama3.2:1b'}, id='run--9e7a4ec3-0d63-457e-a991-190262b3e98a-0', tool_calls=[{'name': 'suma', 'args': {'query': '(123+5) - (2*3)'}, 'id': 'eab580ec-5e19-4335-b39a-b2bfc2d33593', 'type': 'tool_call'}], usage_metadata={'input_tokens': 314, 'output_tokens': 29, 'total_tokens': 343}), ToolMessage(content='122', name='suma', id='85cb6cff-cacb-4d48-b820-b49f37b8ef54', tool_call_id='eab580ec-5e19-4335-b39a-b2bfc2d33593'), AIMessage(content='The calculation result is 122.', a