In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer

In [2]:
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-8B-Base", trust_remote_code=True)

In [3]:
import json

with open("functions.json", "r") as f:
    functions = json.load(f)

In [4]:
messages = [
    {
        "role": "system",
        "content": "Your are an ai assistant that helps people plan travels.",
    },
    {"role": "user", "content": "Best places to visit in Paris and London?"},
 ]

In [5]:
prompt = tokenizer.apply_chat_template(messages, tools=functions, tokenize=False, add_generation_prompt=True)
print(prompt)

<|im_start|>system
Your are an ai assistant that helps people plan travels.

# Tools

You may call one or more functions to assist with the user query.

You are provided with function signatures within <tools></tools> XML tags:
<tools>
{"name": "search_web", "description": "Search the web for the given query. Returns an array of result strings (title + URL).", "parameters": {"type": "object", "properties": {"query": {"type": "string", "description": "Search query text"}}, "required": ["query"]}}
{"name": "search_flight", "description": "Search for round-trip flights for the given dates and number of passengers. Returns an array of flight objects (airline, flightNumber, departureDateTime, arrivalDateTime, duration, stops, price {amount, currency}, bookingUrl). Dates should be ISO 8601 (YYYY-MM-DD).", "parameters": {"type": "object", "properties": {"departureDate": {"type": "string", "description": "Outbound departure date in YYYY-MM-DD (ISO 8601).", "format": "date"}, "returnDate": {"ty

## Try String Function Definitions

In [10]:
str_functions = [json.dumps(func) for func in functions ]
str_functions

['{"name": "search_web", "description": "Search the web for the given query. Returns an array of result strings (title + URL).", "parameters": {"type": "object", "properties": {"query": {"type": "string", "description": "Search query text"}}, "required": ["query"]}}',
 '{"name": "search_flight", "description": "Search for round-trip flights for the given dates and number of passengers. Returns an array of flight objects (airline, flightNumber, departureDateTime, arrivalDateTime, duration, stops, price {amount, currency}, bookingUrl). Dates should be ISO 8601 (YYYY-MM-DD).", "parameters": {"type": "object", "properties": {"departureDate": {"type": "string", "description": "Outbound departure date in YYYY-MM-DD (ISO 8601).", "format": "date"}, "returnDate": {"type": "string", "description": "Return date in YYYY-MM-DD (ISO 8601).", "format": "date"}, "passengers": {"type": "integer", "description": "Number of passengers (integer >= 1).", "minimum": 1}}, "required": ["departureDate", "retu

In [11]:
prompt = tokenizer.apply_chat_template(messages, tools=str_functions, tokenize=False, add_generation_prompt=True)
print(prompt)

ValueError: Tools should either be a JSON schema, or a callable function with type hints and a docstring suitable for auto-conversion to a schema.

## Try Function Call Dataset

In [12]:
from datasets import load_dataset

In [2]:
model = AutoModelForCausalLM.from_pretrained("unsloth/Qwen3-1.7B-unsloth-bnb-4bit")

The 8-bit optimizer is not available on your device, only available on CUDA for now.


model.safetensors:   0%|          | 0.00/1.41G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/237 [00:00<?, ?B/s]

Some parameters are on the meta device device because they were offloaded to the cpu.


In [None]:
travel_function_call_ds = load_dataset("habanoz/travel_function_call_dataset")['train']

In [17]:
travelf_functions_str = travel_function_call_ds[0]['available_functions']

In [18]:
type(travelf_functions_str)

str

In [19]:
prompt = tokenizer.apply_chat_template(messages, tools=travelf_functions_str, tokenize=False, add_generation_prompt=True)
print(prompt)

ValueError: Tools should either be a JSON schema, or a callable function with type hints and a docstring suitable for auto-conversion to a schema.

In [20]:
prompt = tokenizer.apply_chat_template(messages, tools=json.loads(travelf_functions_str), tokenize=False, add_generation_prompt=True)
print(prompt)

<|im_start|>system
Your are an ai assistant that helps people plan travels.

# Tools

You may call one or more functions to assist with the user query.

You are provided with function signatures within <tools></tools> XML tags:
<tools>
{"name": "list_attractions", "description": "List top attractions for the given city. Returns an array of attraction objects (name, address, shortDescription, rating, category).", "parameters": {"type": "object", "properties": {"city": {"type": "string", "description": "City name or query (e.g., 'Paris, France' or 'Seattle, WA')."}, "limit": {"type": "integer", "description": "Maximum number of attractions to return.", "minimum": 1, "default": 10}}, "required": ["city"]}}
{"name": "search_hotel", "description": "Search for hotels for the given check-in/check-out dates and number of guests. Returns an array of hotel objects (name, address, rating, availableRooms, price {amount, currency}, bookingUrl). Dates should be ISO 8601 (YYYY-MM-DD).", "parameters":

In [14]:
 text = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )

inputs = tokenizer(text, return_tensors="pt")
response_ids = model.generate(**inputs, max_new_tokens=512)[0][len(inputs.input_ids[0]):].tolist()
response = tokenizer.decode(response_ids, skip_special_tokens=True)

KeyboardInterrupt: 

In [None]:
 text = tokenizer.apply_chat_template(
            messages,
            tools=functions,
            tokenize=False,
            add_generation_prompt=True
        )

inputs = tokenizer(text, return_tensors="pt")
response_ids = model.generate(**inputs, max_new_tokens=512)[0][len(inputs.input_ids[0]):].tolist()
response = tokenizer.decode(response_ids, skip_special_tokens=True)

In [15]:
!pip install -U gguf

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting gguf
  Downloading gguf-0.17.1-py3-none-any.whl.metadata (4.3 kB)
Downloading gguf-0.17.1-py3-none-any.whl (96 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m96.2/96.2 kB[0m [31m665.4 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: gguf
Successfully installed gguf-0.17.1


In [1]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("unsloth/Qwen3-1.7B-Base")
model = AutoModelForCausalLM.from_pretrained("unsloth/Qwen3-1.7B-GGUF", gguf_file="Qwen3-1.7B-Q4_K_M.gguf")
messages = [
    {"role": "user", "content": "Who are you?"},
]
inputs = tokenizer.apply_chat_template(
	messages,
	add_generation_prompt=True,
	tokenize=True,
	return_dict=True,
	return_tensors="pt",
).to(model.device)

outputs = model.generate(**inputs, max_new_tokens=40)
print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:]))

Qwen3-1.7B-Q4_K_M.gguf:   0%|          | 0.00/1.11G [00:00<?, ?B/s]

Converting and de-quantizing GGUF tensors...:   0%|          | 0/310 [00:00<?, ?it/s]

ValueError: Cannot use chat template functions because tokenizer.chat_template is not set and no template argument was passed! For information about writing templates and setting the tokenizer.chat_template attribute, please see the documentation at https://huggingface.co/docs/transformers/main/en/chat_templating

In [2]:

tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-1.7B-Base")

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

In [13]:
model = AutoModelForCausalLM.from_pretrained("unsloth/Qwen3-4B-GGUF", gguf_file="Qwen3-4B-Q4_K_M.gguf")

config.json:   0%|          | 0.00/752 [00:00<?, ?B/s]

Qwen3-4B-Q4_K_M.gguf:   0%|          | 0.00/2.50G [00:00<?, ?B/s]

Converting and de-quantizing GGUF tensors...:   0%|          | 0/398 [00:00<?, ?it/s]

In [3]:
inputs = tokenizer.apply_chat_template(
	messages,
	add_generation_prompt=True,
	tokenize=True,
	return_dict=True,
	return_tensors="pt",
).to(model.device)

outputs = model.generate(**inputs, max_new_tokens=40)
print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:]))

<think>
Okay, the user is asking, "Who are you?" I need to respond in a friendly and helpful way. Let me think about how to approach this.

First, I should introduce myself


In [14]:
 text = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True,
            enable_thinking=False
        )

inputs = tokenizer(text, return_tensors="pt")
response_ids = model.generate(**inputs, max_new_tokens=120)[0][len(inputs.input_ids[0]):].tolist()
response = tokenizer.decode(response_ids, skip_special_tokens=True)
response


"Europe is a continent of incredible diversity, with a rich mix of history, culture, natural beauty, and modern attractions. Here's a curated list of the **best places to visit in Europe**, categorized by type of interest:\n\n---\n\n## 🌍 **Top Destinations in Europe**\n\n### 🏛️ **Historical & Cultural Cities**\n1. **Paris, France**  \n   - Iconic landmarks: Eiffel Tower, Louvre Museum, Notre-Dame Cathedral, Montmartre.  \n   - A blend of art, fashion, and history.\n\n2. **Rome,"

In [19]:
 text = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True,
            enable_thinking=False,
            tools=functions
        )
inputs = tokenizer(text, return_tensors="pt")
response_ids = model.generate(**inputs, max_new_tokens=120)[0][len(inputs.input_ids[0]):].tolist()
response = tokenizer.decode(response_ids, skip_special_tokens=True)
response

'<tool_call>\n{"name": "list_attractions", "arguments": {"city": "Paris", "limit": 10}}\n</tool_call>\n<tool_call>\n{"name": "list_attractions", "arguments": {"city": "London", "limit": 10}}\n</tool_call>'

In [20]:
print(response)

<tool_call>
{"name": "list_attractions", "arguments": {"city": "Paris", "limit": 10}}
</tool_call>
<tool_call>
{"name": "list_attractions", "arguments": {"city": "London", "limit": 10}}
</tool_call>


In [12]:
print(text)

<|im_start|>system
Your are an ai assistant that helps people plan travels.

# Tools

You may call one or more functions to assist with the user query.

You are provided with function signatures within <tools></tools> XML tags:
<tools>
{"name": "search_web", "description": "Search the web for the given query. Returns an array of result strings (title + URL).", "parameters": {"type": "object", "properties": {"query": {"type": "string", "description": "Search query text"}}, "required": ["query"]}}
{"name": "search_flight", "description": "Search for round-trip flights for the given dates and number of passengers. Returns an array of flight objects (airline, flightNumber, departureDateTime, arrivalDateTime, duration, stops, price {amount, currency}, bookingUrl). Dates should be ISO 8601 (YYYY-MM-DD).", "parameters": {"type": "object", "properties": {"departureDate": {"type": "string", "description": "Outbound departure date in YYYY-MM-DD (ISO 8601).", "format": "date"}, "returnDate": {"ty