In [21]:
import requests

response = requests.post(
    "http://127.0.0.1:8000/v1/chat/completions",
    json={
        "model": "lit",
        "stream": False,  # You can stream chunked response by setting this True
        "tools": [
            {
                "type": "function",
                "function": {
                    "name": "get_current_weather",
                    "description": "Get the current weather in a given location",
                    "parameters": {
                        "type": "object",
                        "properties": {
                            "location": {
                                "type": "string",
                                "description": "The city and state, e.g. San Francisco, CA",
                            },
                            "unit": {
                                "type": "string",
                                "enum": ["celsius", "fahrenheit"],
                            },
                        },
                        "required": ["location"],
                    },
                },
            }
        ],
        "messages": [{"role": "user", "content": "What's the weather like in Paris?"}],
    },
)

completion = response.json()
completion["choices"][0]["message"]

{'role': 'assistant',
 'content': '',
 'name': None,
 'tool_calls': [{'id': '0yTgR5pLN',
   'type': 'function',
   'function': {'name': 'get_current_weather',
    'arguments': '{"location": "Paris", "unit": "celsius"}'}}],
 'tool_call_id': None}

In [None]:
import torch 
from pathlib import Path
from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
from mistral_common.protocol.instruct.request import ChatCompletionRequest

mistral_models_path = Path.home().joinpath("mistral_models", "7B-Instruct-v0.3")
mistral_tokenizer = MistralTokenizer.from_file(f"{mistral_models_path}/tokenizer.model.v3")

messages = [
    {"role": "user", "content": "What's the weather like in Paris?"}
]

tools = [
        {
            "type": "function",
            "function": {
                "name": "get_current_weather",
                "description": "Get the current weather in a given location",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "location": {
                            "type": "string",
                            "description": "The city and state, e.g. San Francisco, CA",
                        },
                        "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
                    },
                    "required": ["location"],
                },
            },
        }
    ]

completion_request = ChatCompletionRequest(messages=messages, tools=tools)
text = mistral_tokenizer.encode_chat_completion(completion_request).text
tokens = mistral_tokenizer.encode_chat_completion(completion_request).tokens

device = torch.device("cpu")
input_ids = torch.tensor([tokens]).to(device)

# Create attention mask where 1s indicate real tokens and 0s indicate padding
attention_mask = torch.ones_like(input_ids).to(device)

# Set the padding tokens to 0
# attention_mask[input_ids == tokenizer.pad_token_id] = 0

model_inputs = {
    "input_ids":input_ids,
    "attention_mask": attention_mask
}

model_inputs

In [None]:
import os

# Set your Hugging Face token as an environment variable
os.environ['HF_API_TOKEN'] = 'token_here'

# Use the token to login
from huggingface_hub import login
login(token=os.environ['HF_API_TOKEN'])

In [26]:
from openai import OpenAI
client = OpenAI(base_url="http://127.0.0.1:8000/v1", api_key="lit")

tools = [
  {
    "type": "function",
    "function": {
      "name": "get_current_weather",
      "description": "Get the current weather in a given location",
      "parameters": {
        "type": "object",
        "properties": {
          "location": {
            "type": "string",
            "description": "The city and state, e.g. San Francisco, CA",
          },
          "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
        },
        "required": ["location"],
      },
    }
  }
]
messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]
completion = client.chat.completions.create(
  model="Mistral-7B-Instruct-v0.3",
  messages=messages,
  tools=tools,
  tool_choice="auto"
)

print(completion.choices[0].message)

ChatCompletionMessage(content='', role='assistant', function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='cLCLyZsEs', function=Function(arguments='{"location": "Boston, MA", "unit": "fahrenheit"}', name='get_current_weather'), type='function')], name=None, tool_call_id=None)


In [27]:

stream = client.chat.completions.create(
    model="",
    tools=tools,
    messages=messages,
    stream=True,
)

for chunk in stream:
    print(chunk.choices[0].delta or "")

ChoiceDelta(content='', function_call=None, role='assistant', tool_calls=None, name=None, tool_call_id=None)
ChoiceDelta(content='', function_call=None, role='assistant', tool_calls=None, name=None, tool_call_id=None)
ChoiceDelta(content='', function_call=None, role='assistant', tool_calls=None, name=None, tool_call_id=None)
ChoiceDelta(content='', function_call=None, role='assistant', tool_calls=None, name=None, tool_call_id=None)
ChoiceDelta(content='', function_call=None, role='assistant', tool_calls=None, name=None, tool_call_id=None)
ChoiceDelta(content='', function_call=None, role='assistant', tool_calls=None, name=None, tool_call_id=None)
ChoiceDelta(content='', function_call=None, role='assistant', tool_calls=None, name=None, tool_call_id=None)
ChoiceDelta(content='', function_call=None, role='assistant', tool_calls=None, name=None, tool_call_id=None)
ChoiceDelta(content='', function_call=None, role='assistant', tool_calls=None, name=None, tool_call_id=None)
ChoiceDelta(content

In [28]:
from openai import OpenAI
import json

client = OpenAI(base_url="http://127.0.0.1:8000/v1", api_key="lit")


# Example dummy function hard coded to return the same weather
# In production, this could be your backend API or an external API
def get_current_weather(location, unit="fahrenheit"):
    """Get the current weather in a given location"""
    if "tokyo" in location.lower():
        return json.dumps({"location": "Tokyo", "temperature": "10", "unit": unit})
    elif "san francisco" in location.lower():
        return json.dumps(
            {"location": "San Francisco", "temperature": "72", "unit": unit}
        )
    elif "paris" in location.lower():
        return json.dumps({"location": "Paris", "temperature": "22", "unit": unit})
    else:
        return json.dumps({"location": location, "temperature": "unknown"})


SYS_PROMPT = "You are an advanced language model with the capability to utilize various tools to enhance your responses. Your objective is to provide accurate, relevant, and concise answers to user queries. Use available tools only when necessary to answer the query. If tools are required, return results specifically related to the tools. Otherwise, respond directly without additional information. Before responding, assess whether the query can be answered using your internal knowledge, and only utilize external tools if the query cannot be sufficiently addressed otherwise. When a tool is required, use it effectively and ensure the output is relevant to the query, presenting the tool-generated results directly without additional commentary. The way you use the tools is by specifying a json blob.\nProvide clear, concise, and direct answers to user queries, ensuring relevance and avoiding unnecessary elaboration. Prioritize user safety and ethical guidelines in all responses, avoiding results that could be harmful, unsafe, or unethical. Carefully consider the potential impact of your response before generating it, and if uncertain about the safety or appropriateness of an answer, err on the side of caution.\n\n"


def run_conversation():
    # Step 1: send the conversation and available functions to the model
    messages = [
        {
            "role": "user",
            "content": "What's the weather like in Paris?",
        }
    ]
    tools = [
        {
            "type": "function",
            "function": {
                "name": "get_current_weather",
                "description": "Get the current weather in a given location",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "location": {
                            "type": "string",
                            "description": "The city and state, e.g. San Francisco, CA",
                        },
                        "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
                    },
                    "required": ["location"],
                },
            },
        }
    ]
    response = client.chat.completions.create(
        model="Mistral-7B-Instruct-v0.3",
        messages=messages,
        tools=tools,
        tool_choice="auto",  # auto is default, but we'll be explicit
    )
    response_message = response.choices[0].message
    tool_calls = response_message.tool_calls
    # Step 2: check if the model wanted to call a function
    if tool_calls:
        # Step 3: call the function
        # Note: the JSON response may not always be valid; be sure to handle errors
        available_functions = {
            "get_current_weather": get_current_weather,
        }  # only one function in this example, but you can have multiple
        messages.append(response_message)  # extend conversation with assistant's reply
        # Step 4: send the info for each function call and function response to the model
        for tool_call in tool_calls:
            function_name = tool_call.function.name
            function_to_call = available_functions[function_name]
            function_args = json.loads(tool_call.function.arguments)
            function_response = function_to_call(**function_args)
            messages.append(
                {
                    "tool_call_id": tool_call.id,
                    "role": "tool",
                    "name": function_name,
                    "content": function_response,
                }
            )  # extend conversation with function response
        second_response = client.chat.completions.create(
            model="Mistral-7B-Instruct-v0.3",
            messages=messages,
        )  # get a new response from the model where it can see the function response
        return second_response


print(run_conversation())

Res message ChatCompletionMessage(content='', role='assistant', function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='BA0Ri8daj', function=Function(arguments='{"location": "Paris", "unit": "celsius"}', name='get_current_weather'), type='function')], name=None, tool_call_id=None)
ChatCompletion(id='chatcmpl-fd784c', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='The current weather in Paris is 22 degrees Celsius.</s>', role='assistant', function_call=None, tool_calls=None, name=None, tool_call_id=None))], created=1717953900, model='Mistral-7B-Instruct-v0.3', object='chat.completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=0, prompt_tokens=0, total_tokens=0))
