# Architecture considerations

- Trade-Off Agency-Reliabilty
- Latency
- Autonomy
- Variance

# Architecture Techniques

- Streaming/ Intermediate Outputs
- Structured Outputs
- Human in the Loop
- Double texting modes

### Structured output

- Prompting
- Tool calling
- JSON Mode

In [1]:
from pydantic import BaseModel, Field

class Joke(BaseModel):
    setup: str = Field(description="Setup of the joke")
    punchline: str = Field(description="Punchline to the joke")

In [2]:
from langchain_ollama import ChatOllama

model = ChatOllama(model="llama3.2:1b")
model = model.with_structured_output(Joke)

model.invoke("Tell me a joke about cats")

Joke(setup='cat', punchline="Because they're purr-fectly lazy and have nine lives, but only when it comes to picking up after themselves.")

- Low temperatures
- Validation errors

### Intermediate output

#### Re-Act agent

In [1]:
import ast
from typing import Annotated, TypedDict

#from langchain_community.tools import DuckDuckGoSearchResults
from langchain_core.tools import tool
from langchain_ollama import ChatOllama, OllamaLLM

from langgraph.graph import START, StateGraph
from langgraph.graph.message import add_messages
from langgraph.prebuilt import ToolNode, tools_condition

In [8]:
@tool
def suma(query: str) -> str:
    """A simple calculator tool. Input should be a sum expression."""
    print(query)
    return eval(query)

@tool
def resta(query: str) -> str:
    """A simple calculator tool. Input should be a substract expression."""
    print(query)
    return eval(query)

@tool
def multiply(query: str) -> str:
    """A simple calculator tool. Input should be a multiply expression."""
    print(query)
    return eval(query)

@tool
def division(query: str) -> str:
    """A simple calculator tool. Input should be a division expression."""
    print(query)
    return eval(query)

#search = DuckDuckGoSearchResults()
tools = [suma, resta, multiply,division]

############### TEST ONLINE MODEL ##################
#import dotenv
#import os
#from langchain_openai.chat_models import ChatOpenAI
#
#dotenv.load_dotenv()
#apikey = os.getenv("AI-API-KEY")
#model_sel = os.getenv("MODEL")
#url = os.getenv("BASE-URL")

#model = ChatOpenAI(
#    base_url=url,
#    api_key=apikey,
#    model=model_sel
#).bind_tools(tools)

####################################################
model = ChatOllama(model="llama3.2:1b", temperature=0).bind_tools(tools)

class State(TypedDict):
    messages: Annotated[list, add_messages]

def model_node(state: State) -> State:
    res = model.invoke(state["messages"])
    return {"messages": res}

In [9]:
builder = StateGraph(State)
builder.add_node("model", model_node)
builder.add_node("tools", ToolNode(tools))
builder.add_edge(START, "model")
builder.add_conditional_edges("model", tools_condition)
builder.add_edge("tools", "model")

graph = builder.compile()

In [10]:
from langchain_core.messages import HumanMessage
input = {
    "messages": [
        HumanMessage("""Calculate (123+5) - (2*3)""")
    ]
}
for c in graph.stream(input):
    print(c)

{'model': {'messages': AIMessage(content='', additional_kwargs={}, response_metadata={'model': 'llama3.2:1b', 'created_at': '2025-08-23T12:49:57.7616696Z', 'done': True, 'done_reason': 'stop', 'total_duration': 7466736100, 'load_duration': 50417000, 'prompt_eval_count': 314, 'prompt_eval_duration': 5269612200, 'eval_count': 29, 'eval_duration': 2144016900, 'model_name': 'llama3.2:1b'}, id='run--77215243-ca51-4b19-921d-3e5081cbf51c-0', tool_calls=[{'name': 'suma', 'args': {'query': '(123+5) - (2*3)'}, 'id': '8c4486fd-d9f9-474a-bb23-fe02292672b8', 'type': 'tool_call'}], usage_metadata={'input_tokens': 314, 'output_tokens': 29, 'total_tokens': 343})}}
(123+5) - (2*3)
{'tools': {'messages': [ToolMessage(content='122', name='suma', id='e807f640-0e8d-4960-ac3c-a67d1e777aba', tool_call_id='8c4486fd-d9f9-474a-bb23-fe02292672b8')]}}
{'model': {'messages': AIMessage(content='The calculation result is 122.', additional_kwargs={}, response_metadata={'model': 'llama3.2:1b', 'created_at': '2025-08-2

In [None]:
for c in graph.stream(input, stream_mode='updates'): #updates, values, debug, combining
    print(c)

{'model': {'messages': AIMessage(content='', additional_kwargs={}, response_metadata={'model': 'llama3.2:1b', 'created_at': '2025-08-23T12:51:23.2165677Z', 'done': True, 'done_reason': 'stop', 'total_duration': 7987617900, 'load_duration': 33843900, 'prompt_eval_count': 314, 'prompt_eval_duration': 5721575900, 'eval_count': 29, 'eval_duration': 2229520300, 'model_name': 'llama3.2:1b'}, id='run--16236fd3-73b7-4e58-94a3-99cea143e284-0', tool_calls=[{'name': 'suma', 'args': {'query': '(123+5) - (2*3)'}, 'id': '9ff9846f-752d-4175-a4fd-cdd0ca6b8333', 'type': 'tool_call'}], usage_metadata={'input_tokens': 314, 'output_tokens': 29, 'total_tokens': 343})}}
(123+5) - (2*3)
{'tools': {'messages': [ToolMessage(content='122', name='suma', id='cc902f9d-e619-409f-bc70-28b45c7f872b', tool_call_id='9ff9846f-752d-4175-a4fd-cdd0ca6b8333')]}}
{'model': {'messages': AIMessage(content='The calculation result is 122.', additional_kwargs={}, response_metadata={'model': 'llama3.2:1b', 'created_at': '2025-08-2

### Stream token by token

In [17]:
output = graph.astream_events(input, version='v2')

In [18]:
async for event in output:
    print(event)
    if event["event"] == "on_chat_model_stream":
        content = event["data"]["chunk"].content
        if content:
            print(content)

{'event': 'on_chain_start', 'data': {'input': {'messages': [HumanMessage(content='Calculate (123+5) - (2*3)', additional_kwargs={}, response_metadata={}, id='ad51f9b5-d6d5-4112-b83a-022bdadcf511')]}}, 'name': 'LangGraph', 'tags': [], 'run_id': 'ec4eb3c3-77df-4efe-a172-62786fb0f53e', 'metadata': {}, 'parent_ids': []}
{'event': 'on_chain_start', 'data': {'input': {'messages': [HumanMessage(content='Calculate (123+5) - (2*3)', additional_kwargs={}, response_metadata={}, id='ad51f9b5-d6d5-4112-b83a-022bdadcf511')]}}, 'name': 'model', 'tags': ['graph:step:1'], 'run_id': '9be77cc3-9310-4d82-849c-2c38d8ba71f5', 'metadata': {'langgraph_step': 1, 'langgraph_node': 'model', 'langgraph_triggers': ('branch:to:model',), 'langgraph_path': ('__pregel_pull', 'model'), 'langgraph_checkpoint_ns': 'model:85786ad3-6a75-aaea-f115-e1f803fa7f13'}, 'parent_ids': ['ec4eb3c3-77df-4efe-a172-62786fb0f53e']}
{'event': 'on_chain_start', 'data': {'input': {'messages': [HumanMessage(content='Calculate (123+5) - (2*3)