In [1]:
from mlx_lm import load, generate
import datetime
import random

model, tokenizer = load("mlx-community/Qwen2.5-14B-Instruct-1M-4bit") #mlx-community/Qwen2.5-7B-Instruct-1M-4bit")

Fetching 10 files:   0%|          | 0/10 [00:00<?, ?it/s]

In [2]:
def temperature_tool(location: str):
    """
    The tool provides the current temperature for a given location (city name). The location needs to be an actual city name and not a [placeholder value]
    
    Args:
        location: The name of the city whose temperature to obtain
    Returns:
        The temperature of the requested city
    """
    return random.uniform(-5, 20)

def heating_tool(room: str, temperature: float):
    """
    The tool sets the temperature for a given roomname. The response is an indication whether the change of temperature was successful or not
    
    Args:
        room: The name of the room where to set the temperature
        temperature: The desired temperature in degree celsius
    Returns:
        a boolean indicating whether setting the temperature was succesfull or not
    """
    return { 'temperature': temperature, 'room': room, 'temperature_changed_successfully': True }

def current_time():
    """Get the current local time as a string."""
    return str(datetime.now())

tools = [temperature_tool, heating_tool, current_time]
tools_dict = {
    'temperature_tool': temperature_tool,
    'heating_tool': heating_tool,
    'current_time': current_time
}

In [3]:
messages = [
    {"role": "system", "content": """
    You are an AI assistant specialized in tool-based problem solving. 
    Your expertise lies in analyzing questions to determine whether to use specialized tools or provide direct knowledge-based responses, 
    ensuring optimal resource utilization. If you are lacking information from the user, ask for further information before making a decision. 
    Never make up any data for your responses but use the provided information only unless it is common knowledge.
    Never fill in missing data you need from the customer unless explicitly being asked for it.
    Format your response as XML with the following format:
    <answer>
     <response>[your answer or question or tool invocation]</response>
     <tool_call>[if applicable: what tool to call and with which parameters]</tool_call>
     <completed>[true is the conversation is done, or false if further customer responses are needed]</completed>
    </answer>
    """},
    {"role": "user", "content": "what is the current temperature?"}
]

In [4]:
from bs4 import BeautifulSoup
import json

def handle_model(messages):
    while True:
        messages.append({"role": "assistant", "content": "<answer>"})
        prompt = tokenizer.apply_chat_template(
            messages, add_generation_prompt=False, tools=tools, continue_final_message=True
        )
        response = generate(model, tokenizer, prompt=prompt, verbose=True)
        soup = BeautifulSoup(f'<answer>{response}', 'xml')
        soup = soup.find('answer')
        print(soup)
        is_done = (soup.find('completed').text == 'true')
        print(soup.find('response').text)
        messages[-1]['content'] += response

        if is_done:
            break
        else:
            if soup.find('tool_call'):
                tool_call = json.loads(soup.find('tool_call').text)
                tool_output = tools_dict[tool_call['name']](**tool_call['arguments'])

                messages.append({"role": "tool", "name": tool_call['name'], "content": str(tool_output)})
            else:
                answer = input()
            messages.append({"role": "user", "content": answer })

In [5]:
handle_model(messages)


 <response>To provide you with the current temperature, I need to know the name of the city you are interested in.</response> 
 <completed>false</completed> 
</answer>
Prompt: 526 tokens, 101.800 tokens-per-sec
Generation: 39 tokens, 10.030 tokens-per-sec
Peak memory: 8.694 GB
<answer>
<response>To provide you with the current temperature, I need to know the name of the city you are interested in.</response>
<completed>false</completed>
</answer>
To provide you with the current temperature, I need to know the name of the city you are interested in.


 Seattle



 <response>Retrieving the current temperature for Seattle.</response> 
 <tool_call>
{"name": "temperature_tool", "arguments": {"location": "Seattle"}}
</tool_call>
 <completed>false</completed> 
</answer>
Prompt: 577 tokens, 99.594 tokens-per-sec
Generation: 47 tokens, 10.644 tokens-per-sec
Peak memory: 8.694 GB
<answer>
<response>Retrieving the current temperature for Seattle.</response>
<tool_call>
{"name": "temperature_tool", "arguments": {"location": "Seattle"}}
</tool_call>
<completed>false</completed>
</answer>
Retrieving the current temperature for Seattle.

 <response>The current temperature in Seattle is approximately 4 degrees Celsius.</response> 
 <completed>true</completed> 
</answer>
Prompt: 668 tokens, 103.339 tokens-per-sec
Generation: 29 tokens, 11.001 tokens-per-sec
Peak memory: 8.694 GB
<answer>
<response>The current temperature in Seattle is approximately 4 degrees Celsius.</response>
<completed>true</completed>
</answer>
The current temperature in Seattle is approx