# Fine grained tool calling

When you combine tool use with streaming in Claude, you get real-time updates as the AI generates tool arguments. This creates a more responsive user experience, but there are some important details to understand about how it works behind the scenes.

In [1]:
# Load env variables and create client
from dotenv import load_dotenv
from anthropic import Anthropic

load_dotenv()

client = Anthropic()
model = "claude-haiku-4-5"

In [2]:
# Helper functions
from anthropic.types import Message

def add_messages(messages, message, role="user"):
    if isinstance(message, list):
        user_message = {
            "role": role,
            "content": message,
        }
    elif hasattr(message, "content") and role == "assistant":
        content_list = []
        for block in message.content:
            if block.type == "text":
                content_list.append({"type": "text", "text": block.text})
            elif block.type == "tool_use":
                content_list.append(
                    {
                        "type": "tool_use",
                        "id": block.id,
                        "name": block.name,
                        "input": block.input,
                    }
                )
        user_message = {
            "role": "assistant",
            "content": content_list,
        }
    else:
        # String messages need to be wrapped in a list with text block
        user_message = {
            "role": role,
            "content": [{"type": "text", "text": message}],
        }
    messages.append(user_message)

def chat_stream(
        messages, 
        system=None, 
        temperature=1.0, 
        stop_sequences=[], 
        tools=None, 
        tool_choice=None,
        betas=[]
    ):
    params = {
        "model": model,
        "max_tokens": 1000,
        "messages": messages,
        "temperature": temperature,
        "stop_sequences": stop_sequences,
    }

    if tool_choice:
        params["tool_choice"] = tool_choice

    if tools:
        params["tools"] = tools

    if system:
        params["system"] = system
    if betas:
        params["betas"] = betas

    return client.beta.messages.stream(**params)

def text_from_message(message):
    return "\n".join([block.text for block in message.content if block.type == "text"])

In [3]:
# Tool definition
from anthropic.types import ToolParam

save_article_schema = ToolParam(
    {
        "name": "save_article",
        "description": "Saves a scholarly journal article",
        "input_schema": {
            "type": "object",
            "properties": {
                "abstract": {
                    "type": "string",
                    "description": "Abstract of the article. One short sentence max",
                },
                "meta": {
                    "type": "object",
                    "properties": {
                        "word_count": {
                            "type": "integer",
                            "description": "Word count",
                        },
                        "review": {
                            "type": "string",
                            "description": "Eight sentence review of the paper",
                        },
                    },
                    "required": ["word_count", "review"],
                },
            },
            "required": ["abstract", "meta"],
        },
    }
)
save_short_article_schema = ToolParam(
    {
        "name": "save_article",
        "description": "Saves a scholarly journal article",
        "input_schema": {
            "type": "object",
            "properties": {
                "abstract": {
                    "type": "string",
                    "description": "Abstract of the article. One short sentence max",
                },
                "meta": {
                    "type": "object",
                    "properties": {
                        "word_count": {
                            "type": "integer",
                            "description": "Word count",
                        },
                        "review": {
                            "type": "string",
                            "description": "Review of paper. One short sentence max",
                        },
                    },
                    "required": ["word_count", "review"],
                },
            },
            "required": ["abstract", "meta"],
        },
    }
)


def save_article(**kwargs):
    return "Article saved!"


In [4]:
# Tool Running
import json


def run_tool(tool_name, tool_input):
    if tool_name == "save_article":
        return save_article(**tool_input)


def run_tools(message):
    tool_requests = [block for block in message.content if block.type == "tool_use"]
    tool_result_blocks = []

    for tool_request in tool_requests:
        try:
            tool_output = run_tool(tool_request.name, tool_request.input)
            tool_result_block = {
                "type": "tool_result",
                "tool_use_id": tool_request.id,
                "content": json.dumps(tool_output),
                "is_error": False,
            }
        except Exception as e:
            tool_result_block = {
                "type": "tool_result",
                "tool_use_id": tool_request.id,
                "content": f"Error: {e}",
                "is_error": True,
            }

        tool_result_blocks.append(tool_result_block)

    return tool_result_blocks

In [5]:
# Run conversation
def run_conversation(messages, tools=[], tool_choice=None, fine_grained=False):
    while True:
        with chat_stream(
            messages,
            tools=tools,
            betas=["fine-grained-tool-streaming-2025-05-14"] if fine_grained else [],
            tool_choice=tool_choice,
        ) as stream:
            for chunk in stream:
                if chunk.type == "text":
                    print(chunk.text, end="")

                if chunk.type == "content_block_start":
                    if chunk.content_block.type == "tool_use":
                        print(f'\n>>> Tool Call: "{chunk.content_block.name}"')

                if chunk.type == "input_json" and chunk.partial_json:
                    print(chunk.partial_json, end="")

                if chunk.type == "content_block_stop":
                    print("\n")

            response = stream.get_final_message()

        add_messages(messages, response, role="assistant")

        if response.stop_reason != "tool_use":
            break

        tool_results = run_tools(response)
        add_messages(messages, tool_results, role="user")

        if tool_choice:
            break

    return messages

In [8]:
messages = []

add_messages(
    messages,
    "Create and save a fake computer science article",
    role="user",
)

run_conversation(
    messages,
    tools=[save_article_schema],
    fine_grained=True,
)


>>> Tool Call: "save_article"
{"abstract": "A novel machine learning framework that uses quantum entanglement to achieve exponential speedups in natural language processing.", "meta": {
  "word_count": 4500,
  "review": "This paper presents an interesting theoretical framework combining quantum computing principles with neural network architectures, though the experimental validation remains limited to simulation environments. The authors make bold claims about exponential speedups but provide only modest empirical evidence from small-scale benchmarks. The mathematical formulation is rigorous and builds appropriately on prior quantum machine learning work, yet the practical applicability to real-world NLP tasks is questionable. The methodology for encoding linguistic features into quantum states is creative, though computationally resource-intensive. While the paper contributes to the theoretical landscape of quantum-classical hybrids, reproducibility concerns arise from the proprieta

[{'role': 'user',
  'content': [{'type': 'text',
    'text': 'Create and save a fake computer science article'}]},
 {'role': 'assistant',
  'content': [{'type': 'tool_use',
    'id': 'toolu_01GQ7Zu1tknrVxQQmw69uWDP',
    'name': 'save_article',
    'input': {'abstract': 'A novel machine learning framework that uses quantum entanglement to achieve exponential speedups in natural language processing.',
     'meta': {'word_count': 4500,
      'review': 'This paper presents an interesting theoretical framework combining quantum computing principles with neural network architectures, though the experimental validation remains limited to simulation environments. The authors make bold claims about exponential speedups but provide only modest empirical evidence from small-scale benchmarks. The mathematical formulation is rigorous and builds appropriately on prior quantum machine learning work, yet the practical applicability to real-world NLP tasks is questionable. The methodology for encoding