<a href="https://colab.research.google.com/github/addamit/py-howtos/blob/master/toolcall.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install transformers




In [2]:
import json
import re
from typing import Optional

from jinja2 import Template
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.utils import get_json_schema


system_prompt = Template("""You are an expert in composing functions. You are given a question and a set of possible functions.
Based on the question, you will need to make one or more function/tool calls to achieve the purpose.
If none of the functions can be used, point it out and refuse to answer.
If the given question lacks the parameters required by the function, also point it out.

You have access to the following tools:
<tools>{{ tools }}</tools>

The output MUST strictly adhere to the following format, and NO other text MUST be included.
The example format is as follows. Please make sure the parameter type is correct. If no function call is needed, please make the tool calls an empty list '[]'.
<tool_call>[
{"name": "func_name1", "arguments": {"argument1": "value1", "argument2": "value2"}},
... (more tool calls as required)
]</tool_call>""")


def prepare_messages(
    query: str,
    tools: Optional[dict[str, any]] = None,
    history: Optional[list[dict[str, str]]] = None
) -> list[dict[str, str]]:
    """Prepare the system and user messages for the given query and tools.

    Args:
        query: The query to be answered.
        tools: The tools available to the user. Defaults to None, in which case if a
            list without content will be passed to the model.
        history: Exchange of messages, including the system_prompt from
            the first query. Defaults to None, the first message in a conversation.
    """
    if tools is None:
        tools = []
    if history:
        messages = history.copy()
        messages.append({"role": "user", "content": query})
    else:
        messages = [
            {"role": "system", "content": system_prompt.render(tools=json.dumps(tools))},
            {"role": "user", "content": query}
        ]
    return messages


def parse_response(text: str) -> str | dict[str, any]:
    """Parses a response from the model, returning either the
    parsed list with the tool calls parsed, or the
    model thought or response if couldn't generate one.

    Args:
        text: Response from the model.
    """
    pattern = r"<tool_call>(.*?)</tool_call>"
    matches = re.findall(pattern, text, re.DOTALL)
    if matches:
        return json.loads(matches[0])
    return text


model_name_smollm = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
model = AutoModelForCausalLM.from_pretrained(model_name_smollm, device_map="auto", torch_dtype="auto", trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(model_name_smollm)

from datetime import datetime
import random

def get_current_time() -> str:
    """Returns the current time in 24-hour format.

    Returns:
        str: Current time in HH:MM:SS format.
    """
    return datetime.now().strftime("%H:%M:%S")


def get_random_number_between(min: int, max: int) -> int:
    """
    Gets a random number between min and max.

    Args:
        min: The minimum number.
        max: The maximum number.

    Returns:
        A random number between min and max.
    """
    return random.randint(min, max)


tools = [get_json_schema(get_random_number_between), get_json_schema(get_current_time)]

toolbox = {"get_random_number_between": get_random_number_between, "get_current_time": get_current_time}

query = "Give me a number between 1 and 300"



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/792 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.42G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/3.76k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/801k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/466k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.10M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/655 [00:00<?, ?B/s]

In [3]:
toolbox

{'get_random_number_between': <function __main__.get_random_number_between(min: int, max: int) -> int>,
 'get_current_time': <function __main__.get_current_time() -> str>}

In [4]:

messages = prepare_messages(query, tools=tools)



In [5]:
print(messages)

[{'role': 'system', 'content': 'You are an expert in composing functions. You are given a question and a set of possible functions.\nBased on the question, you will need to make one or more function/tool calls to achieve the purpose.\nIf none of the functions can be used, point it out and refuse to answer.\nIf the given question lacks the parameters required by the function, also point it out.\n\nYou have access to the following tools:\n<tools>[{"type": "function", "function": {"name": "get_random_number_between", "description": "Gets a random number between min and max.", "parameters": {"type": "object", "properties": {"min": {"type": "integer", "description": "The minimum number."}, "max": {"type": "integer", "description": "The maximum number."}}, "required": ["min", "max"]}, "return": {"type": "integer", "description": "A random number between min and max."}}}, {"type": "function", "function": {"name": "get_current_time", "description": "Returns the current time in 24-hour format."

In [6]:
inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt")##.to(model.device)


In [7]:
inputs.shape

torch.Size([1, 424])

In [8]:
# send model to gpu if available
model.device

device(type='cuda', index=0)

In [10]:
outputs = model.generate(inputs.to(model.device), max_new_tokens=512, do_sample=False, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id).to(model.device)
result = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)

In [11]:


tool_calls = parse_response(result)
# [{'name': 'get_random_number_between', 'arguments': {'min': 1, 'max': 300}}

# Get tool responses
tool_responses = [toolbox.get(tc["name"])(*tc["arguments"].values()) for tc in tool_calls]
# [63]

# For the second turn, rebuild the history of messages:
history = messages.copy()
# Add the "parsed response"
history.append({"role": "assistant", "content": result})
query = "Can you give me the hour?"
history.append({"role": "user", "content": query})



In [12]:
result

'<tool_call>[{"name": "get_random_number_between", "arguments": {"min": 1, "max": 300}}]</tool_call>'

In [14]:
tool_calls

[{'name': 'get_random_number_between', 'arguments': {'min': 1, 'max': 300}}]

In [27]:
def replace_dob_with_mask(dob: str) -> str:
    """
    Gets a string with date of birth.

    Args:
        dob: The date of birth

    Returns:
        A masked string to replace the actual date of birth.
    """
    return "MASKED-DOB"

def replace_creditcard_with_mask(cc: str) -> str:
    """
    Gets a string with credit card number.

    Args:
        cc: The credit card number

    Returns:
        A masked string to replace the actual credit card.
    """
    return "MASKED-CC"


tools = [get_json_schema(get_random_number_between), get_json_schema(get_current_time), get_json_schema(replace_dob_with_mask), get_json_schema(replace_creditcard_with_mask)]

toolbox = {"get_random_number_between": get_random_number_between, "get_current_time": get_current_time, "replace_dob_with_mask" : replace_dob_with_mask, "replace_creditcard_with_mask" : replace_creditcard_with_mask}

query = "My Date of Birth is 1996-03-15. My Credit card number is 1234-5678-1234-5678"



In [28]:

messages = prepare_messages(query, tools=tools)



In [29]:
print(messages)

[{'role': 'system', 'content': 'You are an expert in composing functions. You are given a question and a set of possible functions.\nBased on the question, you will need to make one or more function/tool calls to achieve the purpose.\nIf none of the functions can be used, point it out and refuse to answer.\nIf the given question lacks the parameters required by the function, also point it out.\n\nYou have access to the following tools:\n<tools>[{"type": "function", "function": {"name": "get_random_number_between", "description": "Gets a random number between min and max.", "parameters": {"type": "object", "properties": {"min": {"type": "integer", "description": "The minimum number."}, "max": {"type": "integer", "description": "The maximum number."}}, "required": ["min", "max"]}, "return": {"type": "integer", "description": "A random number between min and max."}}}, {"type": "function", "function": {"name": "get_current_time", "description": "Returns the current time in 24-hour format."

In [30]:
inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt")##.to(model.device)
outputs = model.generate(inputs.to(model.device), max_new_tokens=512, do_sample=False, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id).to(model.device)
result = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)

In [31]:
tool_calls = parse_response(result)
print(tool_calls)

[{'name': 'replace_dob_with_mask', 'arguments': {'dob': '1996-03-15'}}, {'name': 'replace_creditcard_with_mask', 'arguments': {'cc': '1234-5678-1234-5678'}}]


In [76]:
# garbage collect the model
model = None
torch.cuda.empty_cache()

In [32]:
# Now trying with Llama 3.2 1B Instruct

In [43]:

from huggingface_hub import login
# Log in to your Hugging Face account
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [33]:
import torch
from transformers import pipeline

model_id = "meta-llama/Llama-3.2-1B-Instruct"

In [34]:

from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.utils import get_json_schema


In [44]:
model_name = model_id
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype="auto", trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(model_name)


config.json:   0%|          | 0.00/877 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.47G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/54.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

In [77]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("minpeter/Llama-3.2-1B-chatml-tool-v2")
model = AutoModelForCausalLM.from_pretrained("minpeter/Llama-3.2-1B-chatml-tool-v2")

tokenizer_config.json:   0%|          | 0.00/50.8k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/444 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/909 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.47G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/180 [00:00<?, ?B/s]

In [78]:
import json
import re
from typing import Optional

from jinja2 import Template
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.utils import get_json_schema


system_prompt = Template("""You are an expert in composing functions. You are given a question and a set of possible functions.
Based on the question, you will need to make one or more function/tool calls to achieve the purpose.
If none of the functions can be used, point it out and refuse to answer.
If the given question lacks the parameters required by the function, also point it out.

You have access to the following tools:
<tools>{{ tools }}</tools>

The output MUST strictly adhere to the following format, and NO other text MUST be included.
The example format is as follows. Please make sure the parameter type is correct. If no function call is needed, please make the tool calls an empty list '[]'.
<tool_call>[
{"name": "func_name1", "arguments": {"argument1": "value1", "argument2": "value2"}},
... (more tool calls as required)
]</tool_call>""")

# system_prompt = Template("""You are an helpful assistant""")

def prepare_messages(
    query: str,
    tools: Optional[dict[str, any]] = None,
    history: Optional[list[dict[str, str]]] = None
) -> list[dict[str, str]]:
    """Prepare the system and user messages for the given query and tools.

    Args:
        query: The query to be answered.
        tools: The tools available to the user. Defaults to None, in which case if a
            list without content will be passed to the model.
        history: Exchange of messages, including the system_prompt from
            the first query. Defaults to None, the first message in a conversation.
    """
    if tools is None:
        tools = []
    if history:
        messages = history.copy()
        messages.append({"role": "user", "content": query})
    else:
        messages = [
            {"role": "system", "content": system_prompt.render(tools=json.dumps(tools))},
            {"role": "user", "content": query}
        ]
    return messages


def parse_response(text: str) -> str | dict[str, any]:
    """Parses a response from the model, returning either the
    parsed list with the tool calls parsed, or the
    model thought or response if couldn't generate one.

    Args:
        text: Response from the model.
    """
    pattern = r"<tool_call>(.*?)</tool_call>"
    matches = re.findall(pattern, text, re.DOTALL)
    if matches:
        return json.loads(matches[0])
    return text


from datetime import datetime
import random

def get_current_time() -> str:
    """Returns the current time in 24-hour format.

    Returns:
        str: Current time in HH:MM:SS format.
    """
    return datetime.now().strftime("%H:%M:%S")


def get_random_number_between(min: int, max: int) -> int:
    """
    Gets a random number between min and max.

    Args:
        min: The minimum number.
        max: The maximum number.

    Returns:
        A random number between min and max.
    """
    return random.randint(min, max)


tools = [get_json_schema(get_random_number_between), get_json_schema(get_current_time)]

toolbox = {"get_random_number_between": get_random_number_between, "get_current_time": get_current_time}

query = "Give me a number between 1 and 300"



In [88]:
def replace_dob_with_mask(dob: str) -> str:
    """
    Gets a string with date of birth.

    Args:
        dob: The date of birth

    Returns:
        A masked string to replace the actual date of birth.
    """
    return "MASKED-DOB"

def replace_creditcard_with_mask(cc: str) -> str:
    """
    Gets a string with credit card number.

    Args:
        cc: The credit card number

    Returns:
        A masked string to replace the actual credit card.
    """
    return "MASKED-CC"


tools = [get_json_schema(get_random_number_between), get_json_schema(get_current_time), get_json_schema(replace_dob_with_mask), get_json_schema(replace_creditcard_with_mask)]

toolbox = {"get_random_number_between": get_random_number_between, "get_current_time": get_current_time, "replace_dob_with_mask" : replace_dob_with_mask, "replace_creditcard_with_mask" : replace_creditcard_with_mask}

query = "My Date of Birth is 1996-03-15. My Credit card number is 1234-5678-1234-5678"



In [89]:

messages = prepare_messages(query, tools=tools)


In [90]:
messages

[{'role': 'system',
  'content': 'You are an expert in composing functions. You are given a question and a set of possible functions.\nBased on the question, you will need to make one or more function/tool calls to achieve the purpose.\nIf none of the functions can be used, point it out and refuse to answer.\nIf the given question lacks the parameters required by the function, also point it out.\n\nYou have access to the following tools:\n<tools>[{"type": "function", "function": {"name": "get_random_number_between", "description": "Gets a random number between min and max.", "parameters": {"type": "object", "properties": {"min": {"type": "integer", "description": "The minimum number."}, "max": {"type": "integer", "description": "The maximum number."}}, "required": ["min", "max"]}, "return": {"type": "integer", "description": "A random number between min and max."}}}, {"type": "function", "function": {"name": "get_current_time", "description": "Returns the current time in 24-hour format

In [91]:
# Trying to see the message after chat template
formatted_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)



In [92]:
formatted_text

'<|im_start|>system\nYou are an expert in composing functions. You are given a question and a set of possible functions.\nBased on the question, you will need to make one or more function/tool calls to achieve the purpose.\nIf none of the functions can be used, point it out and refuse to answer.\nIf the given question lacks the parameters required by the function, also point it out.\n\nYou have access to the following tools:\n<tools>[{"type": "function", "function": {"name": "get_random_number_between", "description": "Gets a random number between min and max.", "parameters": {"type": "object", "properties": {"min": {"type": "integer", "description": "The minimum number."}, "max": {"type": "integer", "description": "The maximum number."}}, "required": ["min", "max"]}, "return": {"type": "integer", "description": "A random number between min and max."}}}, {"type": "function", "function": {"name": "get_current_time", "description": "Returns the current time in 24-hour format.", "paramete

In [93]:
inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt")##.to(model.device)


In [94]:
outputs = model.generate(inputs.to(model.device), max_new_tokens=512, do_sample=False, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id).to(model.device)
result = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


In [95]:
result

'<tool_call>[\n{"name": "get_random_number_between", "arguments": {"min": 1996, "max": 1996}}, {"name": "get_current_time", "arguments": {}}, {"name": "replace_dob_with_mask", "arguments": {"dob": "1996-03-15"}}, {"name": "replace_creditcard_with_mask", "arguments": {"cc": "1234-5678-1234-5678"}}]\n</tool_call>'