In [1]:
from pathlib import Path
import re
from typing import Any
from pydantic import Field, BaseModel
from collections.abc import Mapping

from grasp_agents import (
    BaseTool,
    LLMAgent,
    LLMPromptArgs,
    RunContextWrapper,
    RunArgs,
    AgentMessage,
    ImageData,
    Conversation,
)
from grasp_agents.openai import OpenAILLM, OpenAILLMSettings
from grasp_agents.grasp_logging import setup_logging
from grasp_agents.agent_message_pool import AgentMessagePool
from grasp_agents.comm_agent import DynCommPayload
from grasp_agents.utils import get_timestamp
from grasp_agents.workflow.sequential_agent import SequentialWorkflowAgent

  from tqdm.autonotebook import tqdm


Set up logging to write to the console and/or file

In [2]:
PACKAGE_DIR = Path.cwd()
LOGGING_DIR = Path.cwd() / "data/multiagent/logs"

In [3]:
LOGGING_CFG_PATH = PACKAGE_DIR / "configs/logging/default.yaml"
setup_logging(
    LOGGING_DIR / f"grasp_agents_demo_{get_timestamp()}.log", LOGGING_CFG_PATH
)

Paths to images used in the demo

In [4]:
IMG_1_URL = "https://www.simplilearn.com/ice9/free_resources_article_thumb/Expressions_In_C_2.PNG"
IMG_2_PATH = PACKAGE_DIR / "src/grasp_agents/examples/data/expr.jpeg"

Utils

In [5]:
def print_single_output(out: Any) -> None:
    print(f"\n<Response>:\n{out.payloads[0]}")

## Simple generation with validated outputs

Output type validation

In [6]:
# list[int] is the output type used to validate the output
chatbot = LLMAgent[None, list[int], None](
    agent_id="chatbot",
    llm=OpenAILLM(model_name="gpt-4.1", api_provider="openai"),
)

# This initialises printer and usage tracker
ctx = RunContextWrapper[None](print_messages=True)

In [7]:
# Code block delimiters are stripped from the output
out = await chatbot.run(
    "Output a list of 3 integers from 0 to 10 as a python array, no talking",
    ctx=ctx,
)
print_single_output(out)

[32m
<chatbot>[USER]
Output a list of 3 integers from 0 to 10 as a python array, no talking[0m
[94m
<chatbot>[ASSISTANT]
[
  3,
  7,
  1
][0m
[94m
------------------------------------
I/O/(R)/(C) tokens: 27/9/0/0[0m

<Response>:
[3, 7, 1]


Output type validation with structured outputs

In [8]:
# Some providers (e.g. `openai` and `google_ai_studio`) support structured outputs.
# With the OpenAI API, this will require a Pydantic model to validate the output.

from enum import StrEnum


class Selector(StrEnum):
    A = "A"
    B = "B"


class Response(BaseModel):
    result: list[int] = Field(..., description="3 random integers")
    value: Selector = Field(..., description="Choose a value randomly")


chatbot = LLMAgent[None, Response, None](
    agent_id="chatbot",
    llm=OpenAILLM(
        model_name="gemini-2.0-flash",
        api_provider="google_ai_studio",
        llm_settings=OpenAILLMSettings(use_structured_outputs=True),
        # response_format=Response,
    ),
)

# By default, response_format is set to the output type of the agent (Response)
# In some cases, you may want to set it to a different type, e.g. when using
# custom output parsing.

ctx = RunContextWrapper[None](print_messages=True)

In [9]:
out = await chatbot.run("start", ctx=ctx)
print_single_output(out)

[32m
<chatbot>[USER]
start[0m
[94m
<chatbot>[ASSISTANT]
{
  "result": [
    42,
    7,
    99
  ],
  "value": "B"
}[0m
[94m
------------------------------------
I/O/(R)/(C) tokens: 21/33[0m

<Response>:
result=[42, 7, 99] value=<Selector.B: 'B'>


# Chat with images

In [10]:
chatbot = LLMAgent[None, str, None](
    agent_id="chatbot", llm=OpenAILLM(model_name="gpt-4.1", api_provider="openai")
)

ctx = RunContextWrapper[None](print_messages=True)

In [11]:
out = await chatbot.run("Where are you headed, stranger?", ctx=ctx)
print_single_output(out)

[32m
<chatbot>[USER]
Where are you headed, stranger?[0m
[94m
<chatbot>[ASSISTANT]
Well howdy! I reckon I’m headed wherever the next question leads—just following the trail of curiosity. Where are you off to, partner?[0m
[94m
------------------------------------
I/O/(R)/(C) tokens: 14/30/0/0[0m

<Response>:
Well howdy! I reckon I’m headed wherever the next question leads—just following the trail of curiosity. Where are you off to, partner?


In [12]:
out = await chatbot.run("What did you just say, exactly?", ctx=ctx)
print_single_output(out)

[32m
<chatbot>[USER]
What did you just say, exactly?[0m
[94m
<chatbot>[ASSISTANT]
I said:  
“Well howdy! I reckon I’m headed wherever the next question leads—just following the trail of curiosity. Where are you off to, partner?”

Would you like me to say it again, or maybe say it a different way?[0m
[94m
------------------------------------
I/O/(R)/(C) tokens: 60/51/0/0[0m

<Response>:
I said:  
“Well howdy! I reckon I’m headed wherever the next question leads—just following the trail of curiosity. Where are you off to, partner?”

Would you like me to say it again, or maybe say it a different way?


In [13]:
out = await chatbot.run(
    ["What's in this image?", ImageData.from_path(IMG_2_PATH)], ctx=ctx
)
print_single_output(out)

[32m
<chatbot>[USER]
What's in this image?
<ENCODED_IMAGE>[0m
[94m
<chatbot>[ASSISTANT]
The image contains a mathematical expression:

```
7 * (5 + 15) / (2 * 5) - 3
```

Would you like me to solve it?[0m
[94m
------------------------------------
I/O/(R)/(C) tokens: 379/38/0/0[0m

<Response>:
The image contains a mathematical expression:

```
7 * (5 + 15) / (2 * 5) - 3
```

Would you like me to solve it?


In [14]:
out = await chatbot.run("Go on", ctx=ctx)
print_single_output(out)

[32m
<chatbot>[USER]
Go on[0m
[94m
<chatbot>[ASSISTANT]
Sure! Let's solve the expression step-by-step:

**Expression:**  
\[ 7 \times (5 + 15) \div (2 \times 5) - 3 \]

**Step 1: Solve inside the parentheses**
- \( 5 + 15 = 20 \)
- \( 2 \times 5 = 10 \)

So the expression becomes:  
\[ 7 \times 20 \div 10 - 3 \]

**Step 2: Multiplication and Division (left to right)**
- \( 7 \times 20 = 140 \)
- \( 140 \div 10 = 14 \)

Now:  
\[ 14 - 3 \]

**Step 3: Subtraction**
- \( 14 - 3 = 11 \)

**Final Answer:**  
\[
\boxed{11}
\][0m
[94m
------------------------------------
I/O/(R)/(C) tokens: 427/182/0/0[0m

<Response>:
Sure! Let's solve the expression step-by-step:

**Expression:**  
\[ 7 \times (5 + 15) \div (2 \times 5) - 3 \]

**Step 1: Solve inside the parentheses**
- \( 5 + 15 = 20 \)
- \( 2 \times 5 = 10 \)

So the expression becomes:  
\[ 7 \times 20 \div 10 - 3 \]

**Step 2: Multiplication and Division (left to right)**
- \( 7 \times 20 = 140 \)
- \( 140 \div 10 = 14 \)

Now:  
\

In [15]:
out = await chatbot.run(["Try another one", ImageData.from_url(IMG_1_URL)], ctx=ctx)
print_single_output(out)

[32m
<chatbot>[USER]
Try another one
https://www.simplilearn.com/ice9/free_resources_article_thumb/Expressions_In_C_2.PNG[0m
[94m
<chatbot>[ASSISTANT]
Let's solve the given arithmetic expression step by step:

Given:  
- \( a = 2 \)
- \( b = 3 \)
- \( c = 4 \)

Expression:
\[
Z = a + b - (a \times c)
\]

Substitute the values in:
\[
Z = 2 + 3 - (2 \times 4)
\]

First, solve inside the parentheses:
\[
2 \times 4 = 8
\]

Now the expression becomes:
\[
Z = 2 + 3 - 8
\]
\[
Z = 5 - 8
\]
\[
Z = -3
\]

**Final Answer:**
\[
\boxed{-3}
\][0m
[94m
------------------------------------
I/O/(R)/(C) tokens: 1045/155/0/0[0m

<Response>:
Let's solve the given arithmetic expression step by step:

Given:  
- \( a = 2 \)
- \( b = 3 \)
- \( c = 4 \)

Expression:
\[
Z = a + b - (a \times c)
\]

Substitute the values in:
\[
Z = 2 + 3 - (2 \times 4)
\]

First, solve inside the parentheses:
\[
2 \times 4 = 8
\]

Now the expression becomes:
\[
Z = 2 + 3 - 8
\]
\[
Z = 5 - 8
\]
\[
Z = -3
\]

**Final Answer

In [16]:
out = await chatbot.run("What was my first question, exactly?", ctx=ctx)
print_single_output(out)

[32m
<chatbot>[USER]
What was my first question, exactly?[0m
[94m
<chatbot>[ASSISTANT]
Your first question was:

**"Where are you headed, stranger?"**[0m
[94m
------------------------------------
I/O/(R)/(C) tokens: 1216/15/0/0[0m

<Response>:
Your first question was:

**"Where are you headed, stranger?"**


In [17]:
ctx.usage_tracker.total_usage

Usage(input_tokens=3141, output_tokens=471, reasoning_tokens=0, cached_tokens=0, cost=0.01005)

# Simple batching

In [18]:
sys_prompt = "You are a bad math student who always adds number {added_num} to the correct result of the operation."
usr_prompt = "What is the square of {num}?"


class SystemArgs(LLMPromptArgs):
    added_num: int


class UserArgs(LLMPromptArgs):
    num: int


student = LLMAgent[Any, str, None](
    agent_id="student",
    llm=OpenAILLM(
        model_name="gpt-4.1",
        # set the rate limit if desired
        rate_limiter_rpm=1000,
    ),
    sys_prompt=sys_prompt,
    sys_args_schema=SystemArgs,
    inp_prompt=usr_prompt,
    usr_args_schema=UserArgs,
    set_state_strategy="keep",
)

[OpenAILLM] Set rate limit to 1000 RPM


#### One system prompt -> many user arguments

In [19]:
run_args = RunArgs(
    sys=SystemArgs(added_num=1),
    usr=[UserArgs(num=i) for i in range(1, 10)],
)

ctx = RunContextWrapper[None](run_args={"student": run_args}, print_messages=True)

In [20]:
out = await student.run(ctx=ctx)

print()
print(*[p for p in out.payloads], sep="\n")

[35m
<student>[SYSTEM]
You are a bad math student who always adds number 1 to the correct result of the operation.[0m
Message batch size is 9, current batch size is 1: duplicating the conversation to match the message batch size
[32m
<student>[USER]
What is the square of 1?[0m
[32m
<student>[USER]
What is the square of 2?[0m
[32m
<student>[USER]
What is the square of 3?[0m
[32m
<student>[USER]
What is the square of 4?[0m
[32m
<student>[USER]
What is the square of 5?[0m
[32m
<student>[USER]
What is the square of 6?[0m
[32m
<student>[USER]
What is the square of 7?[0m
[32m
<student>[USER]
What is the square of 8?[0m
[32m
<student>[USER]
What is the square of 9?[0m
[94m
<student>[ASSISTANT]
The square of 1 is 2.[0m
[94m
------------------------------------
I/O/(R)/(C) tokens: 39/9/0/0[0m
[94m
<student>[ASSISTANT]
The square of 2 is 5.[0m
[94m
------------------------------------
I/O/(R)/(C) tokens: 39/9/0/0[0m
[94m
<student>[ASSISTANT]
The square of 3 is 10.[0

#### Many back to one

Here, the single direct user input overrides the previous input prompt template

In [21]:
out = await student.run(
    "Who are you, dear stranger? What was your last chore?", ctx=ctx
)

print()
print(*[p for p in out.payloads], sep="\n")

Message batch size is 1, current batch size is 9: duplicating the message to match the current batch size
[32m
<student>[USER]
Who are you, dear stranger? What was your last chore?[0m
[94m
<student>[ASSISTANT]
I'm your assistant, here to help you with math and other questions! My last chore was answering your question—by telling you that the square of 1 is 2 (because I always add 1 to the correct answer!).[0m
[94m
------------------------------------
I/O/(R)/(C) tokens: 69/46/0/0[0m
[94m
<student>[ASSISTANT]
I'm just your friendly AI, here to help you out (sometimes with a little twist, like my math skills). My last chore? Well, I just finished calculating the square of 2 for you—and as always, I added 1 to the real answer![0m
[94m
------------------------------------
I/O/(R)/(C) tokens: 69/53/0/0[0m
[94m
<student>[ASSISTANT]
I’m just a friendly assistant here to help with your math homework (even if I tend to add 1 to my answers by accident)! My last chore was answering you

In [22]:
ctx.usage_tracker.total_usage

Usage(input_tokens=972, output_tokens=481, reasoning_tokens=0, cached_tokens=0, cost=0.005791999999999999)

# ReAct agent loop 

In [23]:
sys_prompt_react = """
You are a gifted stats tutor. Your task is to suggest an exciting stats problem to the student. 
You should first ask the student about their education, interests, and preferences, then suggest a problem tailored specifically to them. 

# Instructions
* Ask questions one by one.
* Provide your thinking before asking a question and after receiving a reply.
* Do not include your exact question as part of your thinking.
* The problem must have all the necessary data.
* The problem must be enclosed in <PROBLEM> tags.
"""

In [24]:
# Tool input must be a Pydantic model to infer the JSON schema used by the LLM APIs
class TeacherQuestion(BaseModel):
    question: str


StudentReply = str


ask_student_tool_description = """
"Ask the student a question and get their reply."

Args:
    question: str
        The question to ask the student.
Returns:
    reply: str
        The student's reply to the question.
"""


class AskStudentTool(BaseTool[TeacherQuestion, StudentReply, Any]):
    name: str = "ask_student"
    description: str = ask_student_tool_description

    async def run(
        self, inp: TeacherQuestion, ctx: RunContextWrapper[Any] | None = None
    ) -> StudentReply:
        return input(inp.question)

In [25]:
Problem = str


teacher = LLMAgent[None, Problem, None](
    agent_id="teacher",
    llm=OpenAILLM(
        model_name="gpt-4.1",
        api_provider="openai",
        llm_settings=OpenAILLMSettings(temperature=0.5),
    ),
    tools=[AskStudentTool()],
    max_turns=20,
    react_mode=True,
    sys_prompt=sys_prompt_react,
    set_state_strategy="reset",
)


@teacher.exit_tool_call_loop_handler
def tool_call_loop_exit(conversation: Conversation, ctx, **kwargs: Any) -> bool:
    return r"<PROBLEM>" in str(conversation[-1].content)


@teacher.parse_output_handler
def parse_output(conversation: Conversation, ctx, **kwargs: Any) -> Problem:
    message = str(conversation[-1].content)
    matches = re.findall(r"<PROBLEM>(.*?)</PROBLEM>", message, re.DOTALL)

    return matches[0]

In [26]:
ctx = RunContextWrapper[None](print_messages=True)

In [None]:
out = await teacher.run(ctx=ctx)
print_single_output(out)

# Sequential workflow 

In [28]:
add_inp_prompt = "Add {a} and {b}. Your only output is the resulting number."


# Received arguments are passed to the agent dynamically
class AddReceivedArgs(BaseModel):
    a: int = Field(..., description="First number to add.")


# User arguments are passed to the agent statically via run_args
class AddUserArgs(LLMPromptArgs):
    b: int


class AddResponse(BaseModel):
    result: int


add_agent = LLMAgent[AddReceivedArgs, AddResponse, None](
    agent_id="add_agent",
    llm=OpenAILLM(model_name="gpt-4.1"),
    usr_args_schema=AddUserArgs,
    inp_prompt=add_inp_prompt,
    # Reset message history after each run
    set_state_strategy="reset",
)


@add_agent.format_inp_args_handler
def format_input_args(
    usr_args: AddUserArgs, rcv_args: AddReceivedArgs, **kwargs
) -> Mapping[str, int]:
    return {"a": rcv_args.a, "b": usr_args.b}


@add_agent.parse_output_handler
def parse_output(conversation: Conversation, **kwargs) -> AddResponse:
    return AddResponse(result=int(str(conversation[-1].content)))

In [29]:
class MultiplyUserArgs(LLMPromptArgs):
    c: int


class MultiplyResponse(BaseModel):
    result: int


multiply_inp_prompt = (
    "Multiply {inp} and {c}. Your only output is the resulting number."
)

multiply_agent = LLMAgent[AddResponse, MultiplyResponse, None](
    agent_id="multiply_agent",
    llm=OpenAILLM(model_name="gpt-4.1"),
    usr_args_schema=MultiplyUserArgs,
    inp_prompt=multiply_inp_prompt,
    set_state_strategy="reset",
)


@multiply_agent.format_inp_args_handler
def format_inp_args(
    usr_args: MultiplyUserArgs, rcv_args: AddResponse, **kwargs
) -> Mapping[str, int]:
    # Combine the output of the add_agent with the user input for multiplication
    return {"inp": rcv_args.result, "c": usr_args.c}


@multiply_agent.parse_output_handler
def parse_output(conversation: Conversation, **kwargs) -> MultiplyResponse:
    return MultiplyResponse(result=int(str(conversation[-1].content)))

In [30]:
seq_agent = SequentialWorkflowAgent[AddReceivedArgs, MultiplyResponse, None](
    subagents=[add_agent, multiply_agent], agent_id="seq_agent"
)

In [31]:
# Can use batched user arguments here as well
add_run_args = RunArgs(usr=AddUserArgs(b=3))
multiply_run_args = RunArgs(usr=MultiplyUserArgs(c=5))

ctx = RunContextWrapper[None](
    run_args={"add_agent": add_run_args, "multiply_agent": multiply_run_args},
    print_messages=True,
)

In [32]:
rcv_message = AgentMessage[AddReceivedArgs, Any](
    payloads=[AddReceivedArgs(a=2)], sender_id="user"
)

In [33]:
out = await seq_agent.run(rcv_message=rcv_message, ctx=ctx)
print(out.payloads[0].result)

[32m
<add_agent>[USER]
Add 2 and 3. Your only output is the resulting number.[0m
[94m
<add_agent>[ASSISTANT]
5[0m
[94m
------------------------------------
I/O/(R)/(C) tokens: 22/1/0/0[0m
[32m
<multiply_agent>[USER]
Multiply 5 and 5. Your only output is the resulting number.[0m
[94m
<multiply_agent>[ASSISTANT]
25[0m
[94m
------------------------------------
I/O/(R)/(C) tokens: 22/1/0/0[0m
25


# Agents as tools

When agents are used as tools, their `rcv_args` become the tool inputs.

This is how one can implement a manager + helpers architecture.

In [34]:
seq_tool = seq_agent.as_tool(
    tool_name="seq_agent_tool",
    tool_description=(
        "A sequential agent that adds 3 to a given integer, "
        "then multiplies the result by 5."
    ),
)

The JSON schema of `rcv_args` is preserved:

In [35]:
seq_tool.in_schema.model_json_schema()

{'properties': {'a': {'description': 'First number to add.',
   'title': 'A',
   'type': 'integer'}},
 'required': ['a'],
 'title': 'AddReceivedArgs',
 'type': 'object'}

In [36]:
await seq_tool(a=15, ctx=ctx)

[32m
<add_agent>[USER]
Add 15 and 3. Your only output is the resulting number.[0m
[94m
<add_agent>[ASSISTANT]
18[0m
[94m
------------------------------------
I/O/(R)/(C) tokens: 22/1/0/0[0m
[32m
<multiply_agent>[USER]
Multiply 18 and 5. Your only output is the resulting number.[0m
[94m
<multiply_agent>[ASSISTANT]
90[0m
[94m
------------------------------------
I/O/(R)/(C) tokens: 22/1/0/0[0m


MultiplyResponse(result=90)

# Teacher / students

A more advanced example of multi-agent debate, where agents communicate using the actor model.

In [37]:
def extract_recipients(message: str) -> list[str]:
    match = re.search(r"\[(.*?)\]", message)
    if match:
        # Extract the contents inside square brackets
        content = match.group(1)
        # Extract each student name within angle brackets
        return re.findall(r"<(.*?)>", content)  # Output: ['Alice', 'Bob', 'Charlie']

    return []

Communication schemas

In [38]:
# Teacher can choose which students to send the message to
# We need to inherit from DynCommPayload to use dynamic communication
class TeacherExplanation(DynCommPayload):
    explanation: str


# Students can only ask questions to the teacher
class StudentQuestion(BaseModel):
    question: str

In [39]:
pool = AgentMessagePool[Any]()

#### Teacher

In [40]:
teacher_sys_prompt = """
You are a teacher explaining quantum gravity to a 2-year old baby (named student1) and a 30-year old graphic designer (named student2). 
Start explaining, while stopping occasionally to let the students ask questions. 
At the very end of every message, you must specify the recipients of your message 
as a list of selected student names with each name in angle brackets, for example: [<Alice>, <Bob>]. 
You should also give give students simple puzzles to test their understanding. 
Do not ask new questions before the students have answered the previous ones. 
When you make sure that the students have understood the topic, you MUST say exactly "Goodbye, students!" and terminate the conversation.
"""

teacher = LLMAgent[StudentQuestion, TeacherExplanation, None](
    agent_id="teacher",
    llm=OpenAILLM(model_name="gpt-4o"),
    sys_prompt=teacher_sys_prompt,
    set_state_strategy="keep",
    message_pool=pool,
    # all available recipients to choose from:
    recipient_ids=["student1", "student2"],
)


@teacher.parse_output_handler
def parse_teacher_output(conversation: Conversation, **kwargs) -> TeacherExplanation:
    message = str(conversation[-1].content)
    # Quick and dirty regexes to extract the recipients and explanation
    recipients = extract_recipients(message)
    explanation = message.split("[")[0].strip()

    # `selected_recipient_ids` is a required field for `DynCommPayload`
    return TeacherExplanation(
        explanation=explanation, selected_recipient_ids=recipients
    )


@teacher.exit_handler
def teacher_exit_condition(
    output_message: AgentMessage[TeacherExplanation, Any], ctx
) -> bool:
    # Finish the conversation if the teacher says "Goodbye, students!"
    message = output_message.payloads[0].explanation

    return "Goodbye, students!" in message

#### Students

In [41]:
student_sys_prompts = [
    """
You are a 4-year old child trying to make sense of physics. 
Your name is <student1>.
Talk to the teacher to understand the topic.
There is also another student in the class, a 30 year old graphic designer. 
You talk to the teacher only.
""",
    """
You are a 30-year old experienced graphic designer curious about physics. 
Your name is <student2>.
Ask questions to the teacher until you understand the topic. 
Attempt to answer the teacher's questions, but if you don't understand,
ask for clarification. 
There is also another student in the class, a 4-year old child.
You talk to the teacher only.
""",
]


def make_student_agent(name: str, sys_prompt: str):
    return LLMAgent[TeacherExplanation, StudentQuestion, None](
        agent_id=name,
        llm=OpenAILLM(model_name="gpt-4o"),
        sys_prompt=sys_prompt,
        message_pool=pool,
        recipient_ids=["teacher"],
        set_state_strategy="keep",
    )


student1 = make_student_agent("student1", student_sys_prompts[0])
student2 = make_student_agent("student2", student_sys_prompts[1])


@student1.parse_output_handler
def parse_student1_output(conversation: Conversation, **kwargs) -> StudentQuestion:
    return StudentQuestion(question="<student1>: " + str(conversation[-1].content))


@student2.parse_output_handler
def parse_student2_output(conversation: Conversation, **kwargs) -> StudentQuestion:
    return StudentQuestion(question="<student2>: " + str(conversation[-1].content))

Specify shared context 

In [42]:
ctx = RunContextWrapper[None](print_messages=True)
ctx.printer.color_by = "agent_id"

Run and wait until completion

In [None]:
await teacher.start_listening(ctx=ctx)
await student1.start_listening(ctx=ctx)
await student2.start_listening(ctx=ctx)

# Teacher starts the conversation by posting a message to the pool
await teacher.run_and_post(ctx=ctx)