In [None]:
!pip install mistral_inference
# Install the huggingface_hub package, which provides tools for working with models and datasets from Hugging Face Hub
!pip install huggingface_hub



In [None]:
import pathlib
import textwrap
from IPython.display import display
from IPython.display import Markdown


def to_markdown(text):
  text = text.replace('•', '  *')
  return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))

In [None]:
from huggingface_hub import login
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
# Import the snapshot_download function from the huggingface_hub library
from huggingface_hub import snapshot_download

# Import the Path class from the pathlib module to handle file system paths
from pathlib import Path

# Define the path where the Mistral models will be stored, in a subdirectory called 'mistral_models/7B-Instruct-v0.3' within the user's home directory
mistral_models_path = Path.home().joinpath('mistral_models', '7B-Instruct-v0.3')

# Create the directory and any necessary parent directories if they don't exist already
mistral_models_path.mkdir(parents=True, exist_ok=True)

# Download specific files from the Mistral model repository on Hugging Face Hub
# - repo_id specifies the repository to download from
# - allow_patterns restricts the download to only the specified files: 'params.json', 'consolidated.safetensors', and 'tokenizer.model.v3'
# - local_dir specifies the local directory where the files will be saved
snapshot_download(
    repo_id="mistralai/Mistral-7B-Instruct-v0.3",
    allow_patterns=["params.json", "consolidated.safetensors", "tokenizer.model.v3"],
    local_dir=mistral_models_path
)

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

'/root/mistral_models/7B-Instruct-v0.3'

##Chat

In [None]:
# Import the Transformer class from the mistral_inference.model module
from mistral_inference.model import Transformer
# Import the generate function from the mistral_inference.generate module
from mistral_inference.generate import generate

# Import the MistralTokenizer class from the mistral_common.tokens.tokenizers.mistral module
from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
# Import the UserMessage class from the mistral_common.protocol.instruct.messages module
from mistral_common.protocol.instruct.messages import UserMessage
# Import the ChatCompletionRequest class from the mistral_common.protocol.instruct.request module
from mistral_common.protocol.instruct.request import ChatCompletionRequest



# Instantiate the tokenizer using the MistralTokenizer class and loading the tokenizer model from a specified file path
tokenizer = MistralTokenizer.from_file(f"{mistral_models_path}/tokenizer.model.v3")
# Instantiate the Transformer model by loading it from a specified folder path
model = Transformer.from_folder(mistral_models_path)

# Create a chat completion request with a user message asking to explain Machine Learning in a nutshell
completion_request = ChatCompletionRequest(messages=[UserMessage(content="Explain Machine Learning to me in a nutshell.")])

# Encode the chat completion request into tokens using the tokenizer
tokens = tokenizer.encode_chat_completion(completion_request).tokens

# Generate the output tokens by feeding the input tokens to the model, setting parameters for maximum tokens, temperature, and end-of-sequence ID
out_tokens, _ = generate([tokens], model, max_tokens=256, temperature=0.0, eos_id=tokenizer.instruct_tokenizer.tokenizer.eos_id)
# Decode the output tokens into a readable result using the tokenizer
result = tokenizer.instruct_tokenizer.tokenizer.decode(out_tokens[0])

# Print the decoded result to the console
to_markdown(result)

> Machine Learning (ML) is a subset of artificial intelligence (AI) that provides systems the ability to automatically learn and improve from experience without being explicitly programmed.
> 
> In simpler terms, Machine Learning is a way of teaching computers to learn by themselves, allowing them to make decisions or predictions based on data. This is done by feeding large amounts of data into a model, which then identifies patterns and relationships within the data. The model uses these patterns to make predictions or decisions about new, unseen data.
> 
> There are three main types of Machine Learning: Supervised Learning (where the model is trained on labeled data), Unsupervised Learning (where the model finds patterns in unlabeled data), and Reinforcement Learning (where the model learns by trial and error, receiving rewards or penalties for its actions).
> 
> Machine Learning is used in a wide range of applications, including image recognition, speech recognition, natural language processing, recommendation systems, and predictive analytics.

##Function Calling

In [None]:
# Import necessary classes and functions for tool calls and messages from Mistral protocol
from mistral_common.protocol.instruct.tool_calls import Function, Tool
from mistral_inference.model import Transformer
from mistral_inference.generate import generate
# Import the MistralTokenizer for handling tokenization
from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
# Import UserMessage for creating user messages and ChatCompletionRequest for handling chat completion requests
from mistral_common.protocol.instruct.messages import UserMessage
from mistral_common.protocol.instruct.request import ChatCompletionRequest

# Initialize the tokenizer from a file in the specified directory
tokenizer = MistralTokenizer.from_file(f"{mistral_models_path}/tokenizer.model.v3")

# Initialize the transformer model from the specified directory
model = Transformer.from_folder(mistral_models_path)

# Create a ChatCompletionRequest with tools and messages
completion_request = ChatCompletionRequest(
    tools=[
        # Define a tool for getting the current weather with a function and parameters
        Tool(
            function=Function(
                name="get_current_weather",
                description="Get the current weather",
                parameters={
                    "type": "object",
                    "properties": {
                        "location": {
                            "type": "string",
                            "description": "The city and state, e.g. Albany, NY",
                        },
                        "format": {
                            "type": "string",
                            "enum": ["celsius", "fahrenheit"],
                            "description": "The temperature unit to use. Infer this from the user's location.",
                        },
                    },
                    "required": ["location", "format"],
                },
            )
        )
    ],
    # Define user messages to be processed by the model
    messages=[
        UserMessage(content="What's the weather like today in Florida?"),
    ],
)
# Encode the chat completion request into tokens using the tokenizer
tokens = tokenizer.encode_chat_completion(completion_request).tokens

# Generate a response using the model, with specified parameters for max tokens and temperature
out_tokens, _ = generate([tokens], model, max_tokens=256, temperature=0.0, eos_id=tokenizer.instruct_tokenizer.tokenizer.eos_id)

# Decode the output tokens to get the response in human-readable format
result = tokenizer.instruct_tokenizer.tokenizer.decode(out_tokens[0])

# Print the result
to_markdown(result)

> [{"name": "get_current_weather", "arguments": {"location": "Florida", "format": "fahrenheit"}}]

##Advanced Function Calling

In [None]:
# Create a ChatCompletionRequest object
completion_request = ChatCompletionRequest(
    tools=[
        # Define the first tool for getting the current temperature
        Tool(
            function=Function(
                name="get_current_temperature",
                description="Get the current temperature in celsius",
                parameters={
                    "type": "object",
                    "properties": {
                        "location": {
                            "type": "string",
                            "description": "The city and state, e.g. Albany, New York",
                        }
                    },
                    "required": ["location"],
                },
            )
        ),
        # Define the second tool for converting temperature
        Tool(
            function=Function(
                name="convert_temperature",
                description="Convert temperature",
                parameters={
                    "type": "object",
                    "properties": {
                        "temperature": {
                            "type": "number", # The temperature to convert, provided by get_current_temperature.result
                            "description": "The temperature to convert",
                        },
                        "from_unit": {
                            "type": "string",
                            "enum": ["celsius", "fahrenheit"],
                            "description": "The temperature unit to use. Infer this from the user's location.",
                        },
                        "to_unit": {
                            "type": "string",
                            "enum": ["celsius", "fahrenheit"],
                            "description": "The temperature unit to use. Infer this from the user's location.",
                        },
                    },
                    "required": ["temperature", "from_unit", "to_unit"],
                },
            )
        )
    ],
    # Define the user message requesting the weather and temperature conversion
    messages=[
        UserMessage(content="What's the weather like today in Chicago? Also convert it to fahrenheit?"),
    ],
)

# Encode the chat completion request using the tokenizer
tokens = tokenizer.encode_chat_completion(completion_request).tokens

# Generate a response from the model with the given tokens, limiting the response to 64 tokens
out_tokens, _ = generate([tokens], model, max_tokens=256, temperature=0.0, eos_id=tokenizer.instruct_tokenizer.tokenizer.eos_id)

# Decode the output tokens to get the final result
result = tokenizer.instruct_tokenizer.tokenizer.decode(out_tokens[0])

# Print the result
to_markdown(result)

> [{"name": "get_current_temperature", "arguments": {"location": "Chicago, Illinois"}}], [{"name": "convert_temperature", "arguments": {"temperature": (response from get_current_temperature), "from_unit": "celsius", "to_unit": "fahrenheit"}}]
> 
> This code will first get the current temperature in Chicago, Illinois, and then convert it to Fahrenheit.