In [3]:
from mistral_inference.transformer import Transformer

model = Transformer.from_folder("/home/shared_models/huggingface/mistralai/Mistral-Nemo-Instruct-2407")

  @torch.library.impl_abstract("xformers_flash::flash_fwd")
  @torch.library.impl_abstract("xformers_flash::flash_bwd")


In [1]:
# Import needed packages:
from mistral_common.protocol.instruct.messages import (
    UserMessage,
    ToolMessage,
    FinetuningAssistantMessage,
    SystemMessage
)
from mistral_common.protocol.instruct.request import ChatCompletionRequest
from mistral_common.protocol.instruct.tool_calls import (
    Function,
    Tool,
    ToolCall,
    FunctionCall
)
from mistral_common.protocol.instruct.validator import (
    ValidationMode,
)
from mistral_common.tokens.tokenizers.mistral import MistralTokenizer

# Load Mistral tokenizer

model_name = "mistral-nemo"

tokenizer = MistralTokenizer.from_file(str(MistralTokenizer._data_path() / "tekken_240718.json"), mode=ValidationMode.finetuning)

# Tokenize a list of messages
tokenized = tokenizer.encode_chat_completion(
    ChatCompletionRequest(
        tools=[
            Tool(
                function=Function(
                    name="get_current_weather",
                    description="Get the current weather",
                    parameters={
                        "type": "object",
                        "properties": {
                            "location": {
                                "type": "string",
                                "description": "The city and state, e.g. San Francisco, CA",
                            },
                            "format": {
                                "type": "string",
                                "enum": ["celsius", "fahrenheit"],
                                "description": "The temperature unit to use. Infer this from the users location.",
                            },
                        },
                        "required": ["location", "format"],
                    },
                )
            )
        ],
        messages=[
            SystemMessage(content="this is a system message"),
            UserMessage(content="What's the weather like today in Paris"),
            FinetuningAssistantMessage(content="Let me search that up for you", tool_calls=[ToolCall(id="3XhQnxLsT", function=FunctionCall(name="get_current_weather", arguments='{"location": "Paris, FR", "format": "celsius"}'))]),
            ToolMessage(tool_call_id="3XhQnxLsT", content="20"),
            FinetuningAssistantMessage(content="The weather is 20 degrees Celsius"),
            UserMessage(content="Describe what that temperature feels like"),
            FinetuningAssistantMessage(content="It feels warm"),
        ],
        model=model_name,
    )
)
tokens, text = tokenized.tokens, tokenized.text

# Count the number of tokens
print(len(tokens))

229


In [2]:
tokeniz = MistralTokenizer.v3(
        is_tekken=False
    ).instruct_tokenizer

In [5]:
tokeniz.BEGIN_SYS

0

In [3]:
print(tokens)

[1, 5, 1091, 19227, 4994, 2811, 1429, 5165, 1897, 1429, 5165, 2811, 16753, 2391, 2811, 1429, 1689, 45971, 1095, 45629, 1897, 1429, 14653, 2811, 1429, 4147, 1278, 3519, 17253, 1897, 1429, 26204, 2811, 16753, 4994, 2811, 1429, 6371, 1897, 1429, 48649, 2811, 16753, 17611, 2811, 16753, 4994, 2811, 1429, 3607, 1897, 1429, 14653, 2811, 1429, 1784, 5970, 1321, 3468, 1044, 1324, 3596, 1046, 5151, 12717, 1044, 13461, 50666, 1429, 8092, 2811, 16753, 4994, 2811, 1429, 3607, 1897, 1429, 31222, 2811, 12161, 1099, 79092, 1897, 1429, 38600, 10432, 31597, 1429, 14653, 2811, 1429, 1784, 6138, 5476, 1317, 2210, 1046, 90463, 1593, 1562, 1278, 8616, 7285, 2613, 47579, 1429, 15760, 2811, 12161, 17611, 1897, 1429, 8092, 4964, 2821, 27028, 6, 14, 2496, 1395, 1261, 2663, 5117, 15, 3, 7493, 1681, 1278, 17253, 2479, 9406, 1294, 6993, 4, 12598, 1639, 6123, 1455, 2015, 1394, 1636, 9, 1091, 19227, 2391, 2811, 1429, 1689, 45971, 1095, 45629, 1897, 1429, 61906, 2811, 16753, 17611, 2811, 1429, 42572, 1044, 46822, 189

In [2]:
print(text)

<s>[AVAILABLE_TOOLS][{"type": "function", "function": {"name": "get_current_weather", "description": "Get the current weather", "parameters": {"type": "object", "properties": {"location": {"type": "string", "description": "The city and state, e.g. San Francisco, CA"}, "format": {"type": "string", "enum": ["celsius", "fahrenheit"], "description": "The temperature unit to use. Infer this from the users location."}}, "required": ["location", "format"]}}}][/AVAILABLE_TOOLS][SYS_INST]this is a system message[/SYS_INST][INST]What's the weather like today in Paris[/INST]Let me search that up for you[TOOL_CALLS][{"name": "get_current_weather", "arguments": {"location": "Paris, FR", "format": "celsius"}, "id": "3XhQnxLsT"}]</s>[TOOL_RESULTS]{"content": 20, "call_id": "3XhQnxLsT"}[/TOOL_RESULTS]The weather is 20 degrees Celsius</s>[INST]Describe what that temperature feels like[/INST]It feels warm</s>


In [16]:
from mistral_inference.generate import generate

out_tokens, _ = generate([tokens], model, max_tokens=1024, temperature=0.35, eos_id=tokenizer.instruct_tokenizer.tokenizer.eos_id)
result = tokenizer.instruct_tokenizer.tokenizer.decode(out_tokens[0])

print(result)

Would you like to know the forecast for the next few days?
