In [None]:
pip install mlx-lm 'accelerate>=0.26.0'

In [1]:
import litellm
from mlx_lm import load, generate

class HuggingFaceLLM(litellm.CustomLLM):
    def __init__(self):
        super().__init__()
        model, tokenizer = load("mlx-community/Llama-3.2-3B-Instruct-4bit")
        
        self.model = model
        self.tokenizer = tokenizer
    
    def completion(self, *args, **kwargs) -> litellm.ModelResponse:
        messages = kwargs.get("messages", [])
        
        prompt = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        response = generate(self.model, self.tokenizer, prompt=prompt, verbose=False)
        print('LLM', messages, response)
        
        return litellm.completion(
            model="custom-llm/your-model",
            messages=messages,
            mock_response=response,
        )

In [2]:
hf_llm = HuggingFaceLLM()

litellm.custom_provider_map = [
    {"provider": "custom-llm", "custom_handler": hf_llm}
]

litellm.completion(
    model="custom-llm/your-model",
    messages=[{"role": "user", "content": "Hello world!"}]
)

Fetching 6 files:   0%|          | 0/6 [00:00<?, ?it/s]

LLM [{'role': 'user', 'content': 'Hello world!'}] Hello! It's nice to meet you. Is there something I can help you with or would you like to chat?

[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m



ModelResponse(id='chatcmpl-8a3b8118-7259-4746-8c9f-902c00f94e2a', created=1735550077, model='your-model', object='chat.completion', system_fingerprint=None, choices=[Choices(finish_reason='stop', index=0, message=Message(content="Hello! It's nice to meet you. Is there something I can help you with or would you like to chat?", role='assistant', tool_calls=None, function_call=None))], usage=Usage(completion_tokens=20, prompt_tokens=10, total_tokens=30, completion_tokens_details=None, prompt_tokens_details=None))

In [20]:
messages=[
    { "role": "system", "content": """
You are an AI assistant specialized in tool-based problem solving. Your expertise lies in analyzing questions to determine whether to use specialized tools or provide direct knowledge-based responses, ensuring optimal resource utilization. If you are lacking information from the user, ask for further information before making a decision. Do not make up any data for your responses but use the provided information only unless it is common knowledge.

<documentation>
<tools>
<tool1>
<tool_name>temperature_tool<tool_name>
<tool_explanation>For a given location, it provides the current temperature. The location needs to be an actual city name and not a [placeholder value]. Passing a placeholder value will cause an error when calling the tool<tool_explanation>
<tool_input_format>{ "location": "[placeholder for the city name]" }
</tool1>
<tool2>
<tool_name>heating_tool<tool_name>
<tool_explanation>For a given roomname, it sets the heater to the specified temperature. The supported room names are 'living room' or 'bedroom'. The response will be a confirmation on whether wsetting the temperature was successful<tool_explanation>
<tool_input_format>{ "room": "[placeholder for the room name]", "temperature": [desired temperature] }
</tool2>
</tools>
</documentation>

<examples>
<example1>
Input: "What's the temperature like in Paris?"
Assistant: <output>
<thinking>
1. This question requires current temperature data
2. We have a temperature tool available
3. Direct response without real-time data would be insufficient
4. Temperature tool requires a location parameter
5. I am not done with the user's question yet as I wait for the tool response to write an answer to the user's question
</thinking>
<decision>
{
  "use_tool": true, 
  "tool": "temperature_tool",
  "tool_input": {"location": "Paris"},
  "finished": false
}
</decision>
</output>
</example1>

<example2>
Inpur from temperature_tool: { "tool": "temperature_tool", "tool_input": {"location": "Paris"}, "temperature": 10.0 }
Assistant: <output>
<thinking>
1. We have received the temperature information for Paris from the temperature_tool
2. We do not need to call any tool as we have all information available to answer the user's question for the temperature in Paris
3. No real-time data or computation needed
4. Can answer directly from provided knowledge
5. I do not need further processing steps and I am done answering the user's question
</thinking>
<decision>
{
  "use_tool": false,
  "response": "The temperature is Paris is 10.0 degrees.",
  "finished": true
}
</decision>
</output>
</example2>

<example3>
Input: "What's the temperature at my position?"
Assistant: <output>
<thinking>
1. This question requires current temperature data
2. We have a temperature tool available
3. The temperature tool requires a location parameter
4. We do not know the user's location. We need to ask for the user's location first. I must not use the temperature tool yet as I do not have all necessary information. I must not use a placeholder value surrounded by [] as tool input
5. I am not done with the user's question yet as I require additional user input to process their question
</thinking>
<decision>
{
  "use_tool": false,
  "question": "In order to check the temperature at your location, I need to know your current location. What is you current location?",
  "finished": false
}
</decision>
</output>
</example3>

<example4>
Input: "What's the capital of France?"
Assistant Answer: <output>
<thinking>
1. This is a factual question
2. Information is static and well-known
3. No real-time data or computation needed
4. Can answer directly from knowledge
5. I do not need further processing steps and I am done answering the user's question
</thinking>
<decision>
{
  "use_tool": false,
  "response": "The capital of France is Paris.",
  "finished": true
}
</decision>
</output>
</example4>

<example5>
Input: "Adjust my heater to 15 degrees"
Assistant: <output>
<thinking>
1. We need to adjust the temperature of a heater
2. We have a heating tool available
3. The heating tool requires a room parameter and atemperature parameter
4. We do not know the room in which to set the temperature. We need to ask for the room name first. I must not use the heating tool yet as I do not have all necessary information. I must not use a placeholder value surrounded by [] as tool input
5. I am not done with the user's question yet as I require additional user input to process their question
</thinking>
<decision>
{
  "use_tool": false,
  "question": "In order to set the heating temperature, I need to know in which room to change the temperature. What is the room name in which to set the temperature to 15 degrees?",
  "finished": false
}
</decision>
</output>
</example5>

<example6>
Input: "Adjust the temperature of my bedroom heater"
Assistant: <output>
<thinking>
1. We need to adjust the temperature of a heater
2. We have a heating tool available
3. The heating tool requires a room parameter and atemperature parameter
4. We do not know the temperature to set the heater to. We need to ask for the target temperature first. I must not use the heating tool yet as I do not have all necessary information. I must not use a placeholder value surrounded by [] as tool input
5. I am not done with the user's question yet as I require additional user input to process their question
</thinking>
<decision>
{
  "use_tool": false,
  "question": "In order to set the heating temperature, I need to know the target temperature to set to change the temperature. What is the target temperature for the bedroom heater?",
  "finished": false
}
</decision>
</output>
</example6>
</examples>

<instructions>
1. Analyze the user's question carefully
2. Review available tools and their capabilities, understand what information needs to be passed to each tool and whether this data is available
3. Determine if the question requires a tool based on these criteria:
   - Needs real-time or dynamic data
   - Requires specific computation or processing
   - Involves data lookup or API calls
   - Cannot be answered with static knowledge
4. If a tool is needed:
   - Select the most appropriate tool
   - Determine whether all information is available to fill in the tool inputs, do not call a tool without having all required information at hand. If I call a tool with a [placeholder] value this will result in an error and I must avoid causing errors
   - Format the tool inputs according to the tool's specification, do NOT use values such as "[user's location]" or "[placeholder]" under any circumstances
   - Do not make up the tool's response but wait for the result to be provided back to you
   - When using a tool, the processing is not done yet as the tool's response needs to be processed before having completed answering the user's question
5. If no tool is needed:
   - Provide a direct, informative response
   - Processing is finished afterwards
6. If further information is required from the user:
   - Ask a question for further information that is required to solve the user's problem
   - Processing is not finished at this point, as further user input is needed to answer the user's question
7. Output your decision in JSON format with either:
   - finished: true and use_tool: false and response field
   - finished: false and use_tool: false and question field
   - finished: false and use_tool: true with tool and tool_input fields
   - put your complete response inside the <decision> tag inside the JSON response. Do not put anything outside of this tag.
8. Format the response as valid XML content. The reponse should be an XML document with an <output> root tag. Any value outside of this tag will cause an error which we MUST avoid
   - the response needs to start with the <output> tag and must close this tag, there must not be any other top-level tag
   - there need to be 2 tags inside the <output> root tag: <thinking> for the reasoning and <decision> containing the JSON formatted decision whether to use a tool or not
   - do not make up any tool responses. Do not return any more tokens after the </output> closing tag
</instructions>

<output>
<thinking>
[Step-by-step analysis of the question and tool requirements, replace with your though process. Be verbose and explain each step of your thinking in depth]
</thinking>
<decision>
{
  // One of these three formats:
  // Tool usage:
  "use_tool": true,
  "tool": "[selected_tool_name]",
  "tool_input": {
    // Tool-specific parameters
  },
  "finished": false // when using a tool, finished is always false

  // Direct response:
  "use_tool": false,
  "response": "[text response containing the answer to the user's initial question]",
  "finished": true // the response contains the answer to the user's question, thus the job is completed

  // Ask for further detail:
  "use_tool": false,
  "question": "[question for missing information]",
  "finished": false // when asking a question, finished is always false
}
</decision>
</output>
""" },
    #{ 'role': 'user', 'content': "What is the temperature at my location?" },
    #{ 'role': 'user', 'content': "What is the temperature at my location? I am currently in Neuss." },
    #{ 'role': 'user', 'content': "What should I wear when it is 15 degrees outside?" },
    { 'role': 'user', 'content': "Adjust my heater in the bedroom" },
    { 'role': 'assistant', 'content': "<output>" }
]

In [None]:
response = litellm.completion(
    model="custom-llm/your-model",
    messages=messages
).choices[0].message
response

In [None]:
messages += [
    { 'role': response.role, 'content': response.content },
    { 'role': 'user', 'content': "My current location is Luxembourg" },
    { 'role': 'assistant', 'content': "<output>" }
]

In [None]:
response = litellm.completion(
    model="custom-llm/your-model",
    messages=messages
).choices[0].message
response

In [None]:
import json
messages += [
    { 'role': response.role, 'content': response.content }, 
    #{ 'role': 'user', 'content': json.dumps({ 'tool': 'temperature_tool', "tool_input": {"location": "Luxembourg"}, 'tool_output': { 'temperature': 0.5}}) },
    { 'role': 'user', 'content': json.dumps({ 'tool': 'temperature_tool', "tool_input": {"location": "Neuss"}, 'tool_output': { 'temperature': 2}}) },
    { 'role': 'assistant', 'content': "<output>" }
]

In [None]:
response = litellm.completion(
    model="custom-llm/your-model",
    messages=messages
).choices[0].message
response

In [12]:
import random

def temperature_tool(location):
    return {'temperature': random.uniform(-5, 20) }

def heating_tool(room, temperature):
    return {'temperature': temperature, 'successful': True }

tools = { 'temperature_tool': temperature_tool, 'heating_tool': heating_tool }

In [16]:
from bs4 import BeautifulSoup
import json

def handle_model(messages):
    while True:
        response = litellm.completion(
            model="custom-llm/your-model",
            messages=messages
        ).choices[0].message
        soup = BeautifulSoup(f'<output>{response.content}', 'xml')
        model_response = json.loads(soup.find('decision').text)

        if model_response['response']:
            print(model_response['response'])
            break
        elif model_response['question']:
            answer = input(model_response['question'])
            messages += [
                { 'role': response.role, 'content': response.content },
                { 'role': 'user', 'content': answer },
                { 'role': 'assistant', 'content': "<output>" }
            ]
        elif model_response['use_tool']:
            tool_name = model_response['tool']
            tool_input = model_response['tool_input']
            print(tool_name, tool_input)
            tool_output = tools[tool_name](**tool_input)
            messages += [
                { 'role': response.role, 'content': response.content },
                { 'role': 'tool', 'content': json.dumps({ 'tool': tool_name, "tool_input": tool_input, 'tool_output': tool_output}) },
                { 'role': 'assistant', 'content': "<output>" }
            ]
        else:
            print('Invalid state')
            break

In [21]:
handle_model(messages)

LLM [{'role': 'system', 'content': '\nYou are an AI assistant specialized in tool-based problem solving. Your expertise lies in analyzing questions to determine whether to use specialized tools or provide direct knowledge-based responses, ensuring optimal resource utilization. If you are lacking information from the user, ask for further information before making a decision. Do not make up any data for your responses but use the provided information only unless it is common knowledge.\n\n<documentation>\n<tools>\n<tool1>\n<tool_name>temperature_tool<tool_name>\n<tool_explanation>For a given location, it provides the current temperature. The location needs to be an actual city name and not a [placeholder value]. Passing a placeholder value will cause an error when calling the tool<tool_explanation>\n<tool_input_format>{ "location": "[placeholder for the city name]" }\n</tool1>\n<tool2>\n<tool_name>heating_tool<tool_name>\n<tool_explanation>For a given roomname, it sets the heater to the

In order to set the heating temperature, I need to know the target temperature to set to change the temperature. What is the target temperature for the bedroom heater? 16


LLM [{'role': 'system', 'content': '\nYou are an AI assistant specialized in tool-based problem solving. Your expertise lies in analyzing questions to determine whether to use specialized tools or provide direct knowledge-based responses, ensuring optimal resource utilization. If you are lacking information from the user, ask for further information before making a decision. Do not make up any data for your responses but use the provided information only unless it is common knowledge.\n\n<documentation>\n<tools>\n<tool1>\n<tool_name>temperature_tool<tool_name>\n<tool_explanation>For a given location, it provides the current temperature. The location needs to be an actual city name and not a [placeholder value]. Passing a placeholder value will cause an error when calling the tool<tool_explanation>\n<tool_input_format>{ "location": "[placeholder for the city name]" }\n</tool1>\n<tool2>\n<tool_name>heating_tool<tool_name>\n<tool_explanation>For a given roomname, it sets the heater to the