### Load a math dataset


In [None]:
import typing as t

import datasets
import contextlib
import rigging as rg

def is_basic_question(sample: dict[str, t.Any]) -> bool:
    with contextlib.suppress(ValueError):
        float(sample["answer"])
        return True
    return False
    
dataset = [
    rg.Message("user", sample["problem"], metadata={**sample})
    for sample in datasets.load_dataset("HuggingFaceH4/MATH-500", split="test").filter(is_basic_question)
]

print(f"Loaded {len(dataset)} basic questions from MATH-500.")

### Define tools


In [None]:
import rigging as rg
from io import StringIO
from contextlib import redirect_stdout

# Dedicated calculator tool

class Calculator:
    @rg.tool_method(catch=True)
    def add(self, x: float, y: float) -> float:
        """Adds two numbers."""
        return x + y

    @rg.tool_method(catch=True)
    def subtract(self, x: float, y: float) -> float:
        """Subtracts the second number from the first."""
        return x - y

    @rg.tool_method(catch=True)
    def multiply(self, x: float, y: float) -> float:
        """Multiplies two numbers."""
        return x * y

    @rg.tool_method(catch=True)
    def divide(self, x: float, y: float) -> float:
        """Divides the first number by the second."""
        if y == 0:
            raise ValueError("Cannot divide by zero.")
        return x / y

calculator = Calculator()

# Python execution tool

@rg.tool(catch=True)
def execute_python(code: str) -> str:
    """
    Executes Python code and returns stdout from the execution.
    
    - Use print() to output results.
    - Be thoughtful with indentation and syntax.
    """
    output = StringIO()
    with redirect_stdout(output):
        exec(code)
    return output.getvalue()
    

### Define our agents


In [None]:
import contextlib

# Define a model for parsing the agent answer

class Answer(rg.Model):
    value: str

# Callback to inspect results after the agent execution

async def inspect_results(chat: rg.Chat) -> rg.Chat:
    used_tools = any(msg.tool_calls for msg in chat.messages if msg.role == "assistant")
    answer = chat.message_metadata["answer"]
    agent_answer = chat.last.try_parse(Answer)

    correct = False
    with contextlib.suppress(ValueError):
        correct = agent_answer is not None and float(agent_answer.value) == float(answer)
    
    return chat.meta(
        correct=correct,
        gave_answer=answer is not None,
        agent_answer=agent_answer.value if agent_answer else None,
        used_tools=used_tools,
        true_answer=answer
    )


# Build our core pipeline

pipeline = (
    rg.get_generator("groq/meta-llama/llama-4-maverick-17b-128e-instruct")
    .chat("Answer math questions and return basic floats between <answer></answer> tags.")
    .until_parsed_as(Answer)
    .then(inspect_results)
    .catch(Exception, on_failed="include")
)

# Define 3 agents with different capabilities

agent_no_tools = pipeline.clone().meta(variant="no_tools")
agent_with_calculator = pipeline.clone().using(calculator, mode="xml").meta(variant="with_calculator")
agent_with_python = pipeline.clone().using(execute_python, mode="xml").meta(variant="with_python")


### Run 10 samples through our 3 agents


In [None]:
import random

samples = random.sample(dataset, 10)

chats_no_tools = await agent_no_tools.run_batch(samples)
chats_with_calculator = await agent_with_calculator.run_batch(samples)
chats_with_python = await agent_with_python.run_batch(samples)

### Calculate success rates


In [None]:
def get_success_rate(chats: t.List[rg.Chat]) -> float:
    return sum(chat.metadata.get("correct", False) for chat in chats) / len(chats)

no_tools_success_rate = get_success_rate(chats_no_tools)
with_calculator_success_rate = get_success_rate(chats_with_calculator)
with_python_success_rate = get_success_rate(chats_with_python)

print(f"Success rate without tools:       {no_tools_success_rate:.2%}")
print(f"Success rate with calculator:     {with_calculator_success_rate:.2%}")
print(f"Success rate with Python:         {with_python_success_rate:.2%}")

### Tokenize the chat with a tokenizer


In [None]:
tokenized = await chats_with_python.to_tokens('Qwen/Qwen2.5-1.5B-Instruct', transform="json-with-tag")

print(tokenized[0].metadata)
print(tokenized[0].slices)