# Part 3: Tool calling agent

In this final part of the workshop, we will let our LLM roam free! We'll build a tool calling agent that can search the web, read webpages and answer up to date information about any question!

![tool-calling-agent](./assets/tool_calling.png)

In [1]:
import os
from openai import OpenAI
import httpx

import inspect
from diskcache import Cache
from pydantic import BaseModel

from typing import Any
import jupyter_black
import requests
from markdownify import markdownify as md
from dotenv import load_dotenv
from googlesearch import search

load_dotenv(dotenv_path=".envrc")

jupyter_black.load()

DEEPSEEK_API_KEY = os.environ["DEEPSEEK_API_KEY"]
DEEPSEEK_BASE_URL = "https://api.deepseek.com"
MODEL = "deepseek-chat"
cache = Cache("./cache")
client = OpenAI(api_key=DEEPSEEK_API_KEY, base_url=DEEPSEEK_BASE_URL)

In [2]:
def to_openai_tool(func: callable) -> dict[str, Any]:
    sig = inspect.signature(func)
    if sig.parameters:
        param = next(iter(sig.parameters.values()))
        param_type = param.annotation
        schema = param_type.model_json_schema()
    else:
        param_type = None
        schema = {"type": "object", "properties": {}, "required": []}

    return {
        "type": "function",
        "function": {
            "name": func.__name__,
            "description": (param_type.__doc__ if param_type else func.__doc__ or ""),
            "parameters": schema,
        },
    }


@cache.memoize(expire=3600)
def llm_call(
    messages: list[dict[str, str]], tools: list[dict[str, Any]], model: str
) -> str:
    response = client.chat.completions.create(
        model=model,
        messages=messages,
        tools=tools if tools else None,
        temperature=0.0,
        tool_choice="auto",
    )

    message = response.choices[0].message

    if hasattr(message, "reasoning_content"):
        del message.reasoning_content

    return message

In [3]:
class SearchWeb(BaseModel):
    """Search the web for a given query."""

    query: str
    max_results: int = 10


# def search_web(data: SearchWeb) -> str:
#     base_url = "https://api.marginalia.nu/{key}/search/{query}"
#     url = base_url.format(key="public", query=data.query)
#     url += f"?count={data.max_results}"

#     rsp = httpx.get(url)
#     rsp.raise_for_status()

#     results = rsp.json()["results"]
#     return str(results)


def search_web(data: SearchWeb) -> str:
    results = ""
    for result in search(data.query, num_results=data.max_results, advanced=True):
        results += f"Title: {result.title}\nDescription: {result.description}\nURL: {result.url}\n\n"

    return results


class ReadWebsite(BaseModel):
    """Read the content of a website and return it as text. (useful for further research)"""

    url: str


def read_website(data: ReadWebsite) -> str:

    html_content = requests.get(data.url).text
    return md(html_content)


TOOLS = [
    to_openai_tool(search_web),
    to_openai_tool(read_website),
]
FUNC_TYPES = {
    search_web: SearchWeb,
    read_website: ReadWebsite,
}

In [4]:
TOOLS

[{'type': 'function',
  'function': {'name': 'search_web',
   'description': 'Search the web for a given query.',
   'parameters': {'description': 'Search the web for a given query.',
    'properties': {'query': {'title': 'Query', 'type': 'string'},
     'max_results': {'default': 10,
      'title': 'Max Results',
      'type': 'integer'}},
    'required': ['query'],
    'title': 'SearchWeb',
    'type': 'object'}}},
 {'type': 'function',
  'function': {'name': 'read_website',
   'description': 'Read the content of a website and return it as text. (useful for further research)',
   'parameters': {'description': 'Read the content of a website and return it as text. (useful for further research)',
    'properties': {'url': {'title': 'Url', 'type': 'string'}},
    'required': ['url'],
    'title': 'ReadWebsite',
    'type': 'object'}}}]

In [5]:
print(search_web(SearchWeb(query="Paris", max_results=2)))

Title: Paris - Wikipedia
Description:  Paris is the fourth-most populous city in the European Union and the 30th most densely populated city in the world in 2022. 
URL: https://en.wikipedia.org/wiki/Paris

Title: Paris - Wikipedia, den frie encyklopædi
Description:  Paris er Frankrigs hovedstad og ligger ved Seinen i det nordlige Frankrig, i hjertet af regionen Île-de-France. Paris har en anslået befolkning på 2.2 mio. ( ... 
URL: https://da.wikipedia.org/wiki/Paris




In [6]:
print(
    read_website(data=ReadWebsite(url="https://www.theguardian.com/europe")).replace(
        "\n", " "
    )[:500]
    + "..."
)

Latest news, sport and opinion from the Guardian                                                ![](https://sb.scorecardresearch.com/p?c1=2&c2=6035250&cv=2.0&cj=1&cs_ucfr=0&comscorekw=europe)   [Skip to main content](#maincontent)[Skip to navigation](#navigation)  [Print subscriptions](https://support.theguardian.com/subscribe/weekly?REFPVID=undefined&INTCMP=undefined&acquisitionData=%7B%22source%22%3A%22GUARDIAN_WEB%22%2C%22componentId%22%3A%22PrintSubscriptionsHeaderLink%22%2C%22componentType%...


In [7]:
class Agent:
    def __init__(
        self,
        model: str,
        system: str,
        tools: list[dict[str, Any]] | None = None,
        max_iters: int = 10,
        func_types: dict[str, BaseModel] | None = None,
    ):
        self.model = model
        self.tools = tools or []
        self.messages: list[dict[str, str]] = []
        self.func_types = func_types or {}
        if system:
            self.messages.append({"role": "system", "content": system})

        self.max_iters = max_iters

    def __call__(self, content: str, verbose: bool) -> str:
        self.messages.append({"role": "user", "content": content})

        iterations = 0
        while True:
            iterations += 1
            message = self._send()
            self.messages.append(message)

            tool_calls = getattr(message, "tool_calls", None)
            if not tool_calls:
                return message.content

            for call in tool_calls:
                func = globals()[call.function.name]
                args_model = self.func_types[func]
                args = (
                    args_model.model_validate_json(call.function.arguments)
                    if call.function.arguments != "{}"
                    else None
                )
                print(f"** CALLING FUNCTION {func.__name__} **")
                print(f"*** ARGS ***\n{args}\n")

                result = func(args) if args else func()

                if verbose is True:
                    print(f"*** RESULT ***\n{result[:100]}...\n***")

                self.messages.append(
                    {
                        "role": "tool",
                        "tool_call_id": call.id,
                        "content": result,
                    }
                )

            if iterations >= self.max_iters:
                raise Exception("Max iterations reached")

    def _send(self):
        return llm_call(self.messages, self.tools, self.model)

In [8]:
system_prompt = """
You are a helpful assistant who can answer multistep questions by sequentially calling functions. 

Follow a pattern of:
- THOUGHT (reason step-by-step about which function to call next)
- ACTION (call a function as a next step towards the final answer) 
- OBSERVATION (output of the function)

Reason step by step which actions to take to get to the answer. 

Only call functions with arguments coming verbatim from the user or the output of other functions.
"""

question = "Who was the best performing portuguese cyclist in the 2025 Tour de France?"


bot = Agent(
    system=system_prompt, tools=TOOLS, func_types=FUNC_TYPES, model="deepseek-chat"
)
response = bot(question, verbose=True)
print("Response:", response)

** CALLING FUNCTION search_web **
*** ARGS ***
query='best performing Portuguese cyclist 2025 Tour de France' max_results=5

*** RESULT ***
Title: 2025 Tour de France: Ten Pro Cyclists Who Could Make History
Description:  8 Jul 2025  ·  And...
***
** CALLING FUNCTION read_website **
*** ARGS ***
url='https://www.france24.com/en/sport/20250704-tour-de-france-2025-the-top-14-riders-cycling-vingegaard-pogacar-evenepoel'

*** RESULT ***
Access denied

You don't have permission to access the page you requested.

**What is ...
***
** CALLING FUNCTION read_website **
*** ARGS ***
url='https://cyclinguptodate.com/cycling/analysis-joao-almeidas-2025-season-a-year-of-transformation-and-triumph'

*** RESULT ***
ANALYSIS: João Almeida's 2025 Season - A year of transformation and triumph

[![](https://r.testifie...
***
** CALLING FUNCTION read_website **
*** ARGS ***
url='https://www.bicycling.com/tour-de-france/a65267789/2025-tour-de-france-riders-watch/'

*** RESULT ***
2025 Tour de France: Ten P

## **Exercise**


- Build an agent that can answer the question: `What fell from the strange captain’s hand into the sea when he first tried to respond to Ahab’s hail?`
- Use the [markitdown](https://github.com/microsoft/markitdown) library and the [bm25](https://pypi.org/project/rank-bm25/) to build a tool for it

In [None]:
system_prompt = """
You are a helpful assistant who can answer multistep questions by sequentially calling functions. 

Follow a pattern of:
- THOUGHT (reason step-by-step about which function to call next)
- ACTION (call a function as a next step towards the final answer) 
- OBSERVATION (output of the function)

Reason step by step which actions to take to get to the answer. 

Only call functions with arguments coming verbatim from the user or the output of other functions.
"""

question = "What fell from the strange captain’s hand into the sea when he first tried to respond to Ahab’s hail?"


class SearchBook(BaseModel):
    query: str
    total_results: int = 10


def search_book(data: SearchBook) -> str:
    # Tips:
    # Load the book into a string, split it into chunks, and use the bm25 library to search the chunks...
    return "An apple."  # this is obviously wrong :)


TOOLS = [
    to_openai_tool(search_book),
]
FUNC_TYPES = {
    search_book: SearchBook,
}


bot = Agent(
    system=system_prompt, tools=TOOLS, func_types=FUNC_TYPES, model="deepseek-chat"
)
response = bot(question, verbose=True)
print("Response:", response)

** CALLING FUNCTION search_book **
*** ARGS ***
query="Moby-Dick strange captain's hand fell into the sea Ahab's hail" total_results=1

*** RESULT ***
An apple....
***
Response: The object that fell from the strange captain’s hand into the sea when he first tried to respond to Ahab’s hail was an apple.
