In [1]:
from dotenv import load_dotenv
load_dotenv() 

True

In [2]:
from langchain.chat_models import init_chat_model
llm= init_chat_model("gemini-2.0-flash", model_provider="google_genai")

In [3]:
from langchain_core.tools import tool

@tool
def add(x: float, y: float) -> float:
    """Add 'x' and 'y'."""
    return x + y

# Define the multiply tool
@tool
def multiply(x: float, y: float) -> float:
    """Multiply 'x' and 'y'."""
    return x * y

# Define the exponentiate tool
@tool
def exponentiate(x: float, y: float) -> float:
    """Raise 'x' to the power of 'y'."""
    return x ** y

@tool
def subtract(x: float, y: float) -> float:
    """Subtract 'x' from 'y'."""
    return y - x

In [4]:
print(f"{add.name=}\n{add.description=}")

add.name='add'
add.description="Add 'x' and 'y'."


In [5]:
add.args_schema.model_json_schema()

{'description': "Add 'x' and 'y'.",
 'properties': {'x': {'title': 'X', 'type': 'number'},
  'y': {'title': 'Y', 'type': 'number'}},
 'required': ['x', 'y'],
 'title': 'add',
 'type': 'object'}

In [6]:
import json

llm_output_string = "{\"x\": 5, \"y\": 2}"  # this is the output from the LLM
llm_output_dict = json.loads(llm_output_string)  # load as dictionary
llm_output_dict

{'x': 5, 'y': 2}

In [7]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

prompt = ChatPromptTemplate.from_messages([
    ("system", (
        "You're a helpful assistant. When answering a user's question "
        "you should first use one of the tools provided. After using a "
        "tool the tool output will be provided in the "
        "'scratchpad' below. If you have an answer in the "
        "scratchpad you should not use any more tools and "
        "instead answer directly to the user."
    )),
    MessagesPlaceholder(variable_name="chat_history"),
    ("human", "{input}"),
    MessagesPlaceholder(variable_name="agent_scratchpad"),
])

In [8]:
from langchain_core.runnables.base import RunnableSerializable

tools = [add, subtract, multiply, exponentiate]

# define the agent runnable
agent: RunnableSerializable = (
    {
        "input": lambda x: x["input"],
        "chat_history": lambda x: x["chat_history"],
        "agent_scratchpad": lambda x: x.get("agent_scratchpad", [])
    }
    | prompt
    | llm.bind_tools(tools, tool_choice="any")
)

In [9]:
tool_call = agent.invoke({"input": "What is 10 + 10", "chat_history": []})
tool_call

AIMessage(content='', additional_kwargs={'function_call': {'name': 'add', 'arguments': '{"x": 10.0, "y": 10.0}'}}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.0-flash', 'safety_ratings': []}, id='run--e0f52b31-31e7-415e-8ca5-353f17930792-0', tool_calls=[{'name': 'add', 'args': {'x': 10.0, 'y': 10.0}, 'id': '7f63d10f-c179-4309-9738-998cfa21ed50', 'type': 'tool_call'}], usage_metadata={'input_tokens': 145, 'output_tokens': 5, 'total_tokens': 150, 'input_token_details': {'cache_read': 0}})

In [10]:
tool_call.tool_calls[0]["args"]

{'x': 10.0, 'y': 10.0}

In [11]:
# create tool name to function mapping
name2tool = {tool.name: tool.func for tool in tools}

name2tool

{'add': <function __main__.add(x: float, y: float) -> float>,
 'subtract': <function __main__.subtract(x: float, y: float) -> float>,
 'multiply': <function __main__.multiply(x: float, y: float) -> float>,
 'exponentiate': <function __main__.exponentiate(x: float, y: float) -> float>}

In [None]:
tool_exec_content = name2tool[tool_call.tool_calls[0]["name"]](
    **tool_call.tool_calls[0]["args"]
)
tool_exec_content   

20.0

In [13]:
from langchain_core.messages import ToolMessage

In [14]:
tool_exec = ToolMessage(
    content=f"The {tool_call.tool_calls[0]['name']} tool returned {tool_exec_content}",
    tool_call_id=tool_call.tool_calls[0]["id"]
)


In [15]:
out = agent.invoke({
    "input": "What is 10 + 10",
    "chat_history": [],
    "agent_scratchpad": [tool_call, tool_exec]
})
out

AIMessage(content='', additional_kwargs={'function_call': {'name': 'add', 'arguments': '{"y": 10.0, "x": 10.0}'}}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.0-flash', 'safety_ratings': []}, id='run--ba1eda45-0da6-453b-82cd-47cb3e7a932e-0', tool_calls=[{'name': 'add', 'args': {'y': 10.0, 'x': 10.0}, 'id': 'adce62d4-d4b0-4bbe-8d05-96aca32fa054', 'type': 'tool_call'}], usage_metadata={'input_tokens': 161, 'output_tokens': 5, 'total_tokens': 166, 'input_token_details': {'cache_read': 0}})

Despite having the answer in our `agent_scratchpad`, the LLM still tries to use the tool
_again_. This behaviour happens because we bonded the tools to the LLM with
`tool_choice="any"`. When we set `tool_choice` to `"any"` or `"required"`, we tell the
LLM that it _MUST_ use a tool, i.e., it cannot provide a final answer.

There's two options to fix this:

1. Set `tool_choice="auto"` to tell the LLM that it can choose to use a tool or provide
a final answer.

2. Create a `final_answer` tool - we'll explain this shortly.

First, let's try option **1**:

In [16]:
agent: RunnableSerializable = (
    {
        "input": lambda x: x["input"],
        "chat_history": lambda x: x["chat_history"],
        "agent_scratchpad": lambda x: x.get("agent_scratchpad", [])
    }
    | prompt
    | llm.bind_tools(tools, tool_choice="auto")
)

In [17]:
tool_call = agent.invoke({"input": "What is 10 + 10", "chat_history": []})
tool_call

AIMessage(content='', additional_kwargs={'function_call': {'name': 'add', 'arguments': '{"y": 10.0, "x": 10.0}'}}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.0-flash', 'safety_ratings': []}, id='run--733ddf79-5f36-4682-a889-77f6d121818b-0', tool_calls=[{'name': 'add', 'args': {'y': 10.0, 'x': 10.0}, 'id': '79d61327-af2b-412a-ab02-9f70a6633023', 'type': 'tool_call'}], usage_metadata={'input_tokens': 145, 'output_tokens': 5, 'total_tokens': 150, 'input_token_details': {'cache_read': 0}})

In [18]:
tool_output = name2tool[tool_call.tool_calls[0]["name"]](
    **tool_call.tool_calls[0]["args"]
)

tool_output

20.0

In [19]:
tool_exec = ToolMessage(
    content=f"The {tool_call.tool_calls[0]['name']} tool returned {tool_output}",
    tool_call_id=tool_call.tool_calls[0]["id"]
)

out = agent.invoke({
    "input": "What is 10 + 10",
    "chat_history": [],
    "agent_scratchpad": [tool_call, tool_exec]
})
out

AIMessage(content='10 + 10 is 20.0', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.0-flash', 'safety_ratings': []}, id='run--58aa998b-cd5d-4054-8957-6bc0210f7ed3-0', usage_metadata={'input_tokens': 161, 'output_tokens': 13, 'total_tokens': 174, 'input_token_details': {'cache_read': 0}})

We now have the final answer in the `content` field! This method is perfectly
functional; however, we recommend option **2** as it provides more control over the
agent's output.

There are several reasons that option **2** can provide more control, those are:

* It removes the possibility of an agent using the direct `content` field when it is not
appropriate; for example, some LLMs (particularly smaller ones) may try to use the
`content` field when using a tool.

* We can enforce a specific structured output in our answers. Structured outputs are
handy when we require particular fields for downstream code or multi-part answers. For
example, a RAG agent may return a natural language answer and a list of sources used to
generate that answer.

To implement option **2**, we must create a `final_answer` tool. We will add a
`tools_used` field to give our output some structure—in a real-world use case, we
probably wouldn't want to generate this field, but it's useful for our example here.

In [20]:
@tool
def final_answer(answer: str, tools_used: list[str]) -> str:
    """Use this tool to provide a final answer to the user.
    The answer should be in natural language as this will be provided
    to the user directly. The tools_used must include a list of tool
    names that were used within the `scratchpad`.
    """
    return {"answer": answer, "tools_used": tools_used}

Our `final_answer` tool _doesn't_ necessarily need to do anything; in this example,
we're using it purely to structure our final response. We can now add this tool to our
agent:

In [21]:
tools = [final_answer, add, subtract, multiply, exponentiate]

In [22]:
# we need to update our name2tool mapping too
name2tool = {tool.name: tool.func for tool in tools}

In [23]:
agent: RunnableSerializable = (
    {
        "input": lambda x: x["input"],
        "chat_history": lambda x: x["chat_history"],
        "agent_scratchpad": lambda x: x.get("agent_scratchpad", [])
    }
    | prompt
    | llm.bind_tools(tools, tool_choice="any")  # we're forcing tool use again
)

In [24]:
tool_call = agent.invoke({"input": "What is 10 + 10", "chat_history": []})
tool_call.tool_calls

[{'name': 'add',
  'args': {'x': 10.0, 'y': 10.0},
  'id': 'cbf89a47-10b7-4b3d-9dee-36f9dfa611eb',
  'type': 'tool_call'}]

In [25]:
tool_out = name2tool[tool_call.tool_calls[0]["name"]](
    **tool_call.tool_calls[0]["args"]
)


In [26]:
tool_exec = ToolMessage(
    content=f"The {tool_call.tool_calls[0]['name']} tool returned {tool_out}",
    tool_call_id=tool_call.tool_calls[0]["id"]
)

In [27]:
out = agent.invoke({
    "input": "What is 10 + 10",
    "chat_history": [],
    "agent_scratchpad": [tool_call, tool_exec]
})
out

AIMessage(content='', additional_kwargs={'function_call': {'name': 'final_answer', 'arguments': '{"tools_used": ["add"], "answer": "The answer is 20.0"}'}}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.0-flash', 'safety_ratings': []}, id='run--646e0320-3235-4e14-9a01-f8bdd90f28b3-0', tool_calls=[{'name': 'final_answer', 'args': {'tools_used': ['add'], 'answer': 'The answer is 20.0'}, 'id': '00bbb45f-47ef-4052-97cd-234caa0197a6', 'type': 'tool_call'}], usage_metadata={'input_tokens': 228, 'output_tokens': 16, 'total_tokens': 244, 'input_token_details': {'cache_read': 0}})

In [28]:
out.tool_calls

[{'name': 'final_answer',
  'args': {'tools_used': ['add'], 'answer': 'The answer is 20.0'},
  'id': '00bbb45f-47ef-4052-97cd-234caa0197a6',
  'type': 'tool_call'}]

### Building a Custom Agent Execution Loop

We've worked through each step of our agent code, but it doesn't run without us running
every step. We must write a class to handle all the logic we just worked through.

In [29]:
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage

In [30]:
class CustomAgentExecutor:
    chat_history: list[BaseMessage]
    def __init__(self, max_iterations: int = 3):
        self.chat_history = []
        self.max_iterations = max_iterations
        self.agent : RunnableSerializable = (
            {
                "input": lambda x: x["input"],
                "chat_history": lambda x: x["chat_history"],
                "agent_scratchpad": lambda x: x.get("agent_scratchpad", [])
            }
            | prompt
            | llm.bind_tools(tools, tool_choice="any") # forcing tool use 
        )
    def invoke(self,input: str) -> dict:
        # invoke the agent but iteratively in a loop until final answerr
        
        count = 0
        agent_scratchpad = []
        while count < self.max_iterations:
            # invoke a step for the agent to generate a tool call
            tool_call  = self.agent.invoke({
                "input": input,
                "chat_history": self.chat_history,
                "agent_scratchpad": agent_scratchpad
            })
            # add initial tool call to scratchpad
            agent_scratchpad.append(tool_call)
            # otherwise we execute the tol and add it's output to the agent scartchpad
            tool_name = tool_call.tool_calls[0]["name"]
            tool_args = tool_call.tool_calls[0]["args"]
            tool_call_id = tool_call.tool_calls[0]["id"]
            tool_out = name2tool[tool_name](**tool_args)
            # add the tool output to the agent scratchpad 
            tool_exec = ToolMessage(
                content=f"{tool_out}",
                tool_call_id=tool_call_id
            )
            agent_scratchpad.append(tool_exec)
            # add a print so we can see intermediate steps
            print(f"{count}: {tool_name}({tool_args})")
            count += 1
            # if theh tool call is a final answer we stop
            if tool_name == "final_answer":
                break
        # add the final answer to chat history 
        final_answer = tool_out["answer"]
        self.chat_history.extend([
            HumanMessage(content=input), 
            AIMessage(content=final_answer)
        ])
        # return the final answer dict form 
        return json.dumps(tool_out)

In [31]:
agent_executor = CustomAgentExecutor()

In [32]:
agent_executor.invoke(input="What is 10 + 10")

0: add({'y': 10.0, 'x': 10.0})
1: final_answer({'tools_used': ['add'], 'answer': 'The answer is 20.'})


'{"answer": "The answer is 20.", "tools_used": ["add"]}'

We then get our answer and the tools that were used — all through our custom agent
executor.