In [1]:
#| default_exp core

# Mistinguette's source 

## Setup

In [138]:
#| export
import os
from collections import abc
try: from IPython import display
except: display=None
from fastcore.utils import *
from fastcore.meta import delegates
import ast

from rich import print
from msglm import mk_msg_openai as mk_msg, mk_msgs_openai as mk_msgs
from toolslm.funccall import *

import mistralai
from mistralai import Mistral
from mistralai.models import ChatCompletionChoice, ChatCompletionResponse, UsageInfo, CompletionEvent
from mistralai.models.functioncall import FunctionCall
from mistralai.types import BaseModel

In [None]:
# models/functioncall.py

In [3]:
#| hide
from IPython.display import Markdown

In [4]:
MISTRAL_API_KEY = os.environ.get("MISTRAL_API_KEY")

In [5]:
#| exports
model_types = {
    # Premier models
    'codestral-2501': 'codestral-latest', # code generation model
    'mistral-large-2411': 'mistral-large-latest', # top-tier reasoning model for high-complexity tasks
    'pixtral-large-2411': 'pixtral-large-latest', # frontier-class multimodal model
    'mistral-saba-2502': 'mistral-saba-latest', # model for languages from the Middle East and South Asia
    'ministral-3b-2410': 'ministral-3b-latest', # edge model
    'ministral-8b-2410': 'ministral-8b-latest', # edge model with high performance/price ratio
    'mistral-embed-2312': 'mistral-embed', # embedding model
    'mistral-moderation-2411': 'mistral-moderation-latest', # moderation service to detect harmful text content
    'mistral-ocr-2503': 'mistral-ocr-latest', # OCR model to extract interleaved text and images
    
    # Free models (with weight availability)
    'mistral-small-2503': 'mistral-small-latest', # small model with image understanding capabilities
    
    # Research models
    'open-mistral-nemo-2407': 'open-mistral-nemo', # multilingual open source model
}

all_models = list(model_types)

In [6]:
#| exports
vision_models = ['pixtral-large-2411', 'mistral-small-2503', 'mistral-ocr-2503']

In [7]:
#| exports
embed_models = ['mistral-embed-2312']

In [8]:
#| exports
ocr_models = ['mistral-ocr-2503']

In [9]:
#| exports
text_only_models = set(all_models) - set(vision_models) - set(embed_models) - set(ocr_models)

In [10]:
#| exports
has_streaming_models = set(all_models) - set(embed_models) - set(ocr_models)
has_system_prompt_models = set(all_models) - set(embed_models) - set(ocr_models)
has_temperature_models = set(all_models) - set(embed_models) - set(ocr_models)

In [11]:
# all models except codestral-mamba support custom structured outputs

In [12]:
#| export
models = all_models

In [13]:
model = models[1]; model

'mistral-large-2411'

## Mistral SDK

In [14]:
cli = Mistral(api_key=MISTRAL_API_KEY)

This is what Mistral's SDK provides for interacting with Python. To use it, pass it a list of *messages*, with *content* and a *role*. The roles should alternate between *user* and *assistant*.

In [15]:
messages = [
    {
        "role": "user",
        "content": [
            {
                "type": "text",
                "text": "What's in this image?"
            },
            {
                "type": "image_url",
                "image_url": "https://tripfixers.com/wp-content/uploads/2019/11/eiffel-tower-with-snow.jpeg"
            }
        ]
    }
]

# Get the chat response
chat_response = cli.chat.complete(
    model='pixtral-large-2411',
    messages=messages
)

In [16]:
chat_response

ChatCompletionResponse(id='5d3914d4bdce4995994badfe84a12fed', object='chat.completion', model='pixtral-large-2411', usage=UsageInfo(prompt_tokens=1360, completion_tokens=136, total_tokens=1496), created=1743256987, choices=[ChatCompletionChoice(index=0, message=AssistantMessage(content='The image depicts a snowy winter scene in Paris, featuring the iconic Eiffel Tower. The tower stands tall and central in the image, with its intricate iron latticework clearly visible against the gray, overcast sky. Snow blankets the surroundings, including the trees, ground, and fences, creating a serene and picturesque winter landscape.\n\nIn the foreground, there is a path covered with snow, flanked by bare trees whose branches are laden with snow. There are also fences and a traditional Parisian street lamp. The overall atmosphere is tranquil, capturing the charm and beauty of Paris in the winter season.', tool_calls=None, prefix=False, role='assistant'), finish_reason='stop')])

In [17]:
messages = [
    {
        "role": "user",
        "content": [
            {
                "type": "text",
                "text": "What's in this image?"
            },
            {
                "type": "image_url",
                "image_url": "https://tripfixers.com/wp-content/uploads/2019/11/eiffel-tower-with-snow.jpeg"
            }
        ]
    }
]

# Get the chat response
stream_response = cli.chat.stream(
    model='pixtral-large-2411',
    messages=messages
)

In [18]:
for chunk in stream_response:
    print(chunk.data.choices[0].delta.content)

In [19]:
# Here are the list of different client methods:
# - chat.complete (completion)
# - chat.stream (completion streaming)
# - chat.parse (structured output for instance)
# - chat.fim.complete (fim: fill in middle / code generation)
# - chat.ocr.process (ocr)
# - chat.embeddings.create (embedding creation)

In [20]:
m = {'role': 'user', 'content': "I'm Franck"}
r = cli.chat.complete(messages = [m], model = model)
r

ChatCompletionResponse(id='51e2fe73da7d4df9b00ad23f58850658', object='chat.completion', model='mistral-large-2411', usage=UsageInfo(prompt_tokens=8, completion_tokens=30, total_tokens=38), created=1743257000, choices=[ChatCompletionChoice(index=0, message=AssistantMessage(content="Hello Franck! Nice to meet you. How are you doing today? Is there something specific you'd like to talk about or do?", tool_calls=None, prefix=False, role='assistant'), finish_reason='stop')])

In [21]:
print(r)

### Formatting output

In [22]:
#| exports
def find_block(r:abc.Mapping, # The message to look in
              ):
    "Find the message in `r`"
    if isinstance(r, CompletionEvent): r = r.data # if async
    m = nested_idx(r, 'choices', 0)
    if not m: return m
    if hasattr(m, 'message'): return m.message
    return m.delta

In [23]:
find_block(r)

AssistantMessage(content="Hello Franck! Nice to meet you. How are you doing today? Is there something specific you'd like to talk about or do?", tool_calls=None, prefix=False, role='assistant')

In [24]:
#| exports
def contents(r):
    "Helper to get the contents from response `r`."
    blk = find_block(r)
    if not blk: return r
    if hasattr(blk, 'content'): return getattr(blk,'content')
    return blk

In [25]:
contents(r)

"Hello Franck! Nice to meet you. How are you doing today? Is there something specific you'd like to talk about or do?"

In [26]:
#| exports
@patch
def _repr_markdown_(self:ChatCompletionResponse):
    det = '\n- '.join(f'{k}: {v}' for k,v in dict(self).items())
    res = contents(self)
    if not res: return f"- {det}"
    return f"""{contents(self)}

<details>

- {det}

</details>"""

In [27]:
r

Hello Franck! Nice to meet you. How are you doing today? Is there something specific you'd like to talk about or do?

<details>

- id: 51e2fe73da7d4df9b00ad23f58850658
- object: chat.completion
- model: mistral-large-2411
- usage: prompt_tokens=8 completion_tokens=30 total_tokens=38
- created: 1743257000
- choices: [ChatCompletionChoice(index=0, message=AssistantMessage(content="Hello Franck! Nice to meet you. How are you doing today? Is there something specific you'd like to talk about or do?", tool_calls=None, prefix=False, role='assistant'), finish_reason='stop')]

</details>

In [28]:
r.usage

UsageInfo(prompt_tokens=8, completion_tokens=30, total_tokens=38)

In [29]:
#| exports
def usage(inp=0, # input tokens
          out=0,  # Output tokens
         ):
    "Slightly more concise version of `UsageInfo`."
    return UsageInfo(prompt_tokens=inp, completion_tokens=out, total_tokens=inp+out)

In [30]:
usage(5)

UsageInfo(prompt_tokens=5, completion_tokens=0, total_tokens=5)

In [31]:
#| exports
@patch
def __repr__(self:UsageInfo): return f'In: {self.prompt_tokens}; Out: {self.completion_tokens}; Total: {self.total_tokens}'

In [32]:
r.usage

In: 8; Out: 30; Total: 38

In [33]:
#| exports
@patch
def __add__(self:UsageInfo, b):
    "Add together each of `input_tokens` and `output_tokens`"
    return usage(self.prompt_tokens+b.prompt_tokens, self.completion_tokens+b.completion_tokens)

In [34]:
r.usage+r.usage

In: 16; Out: 60; Total: 76

In [35]:
# Is it relevant to Mistral AI: TBD
def wrap_latex(text, md=True):
    "Replace MistralAI LaTeX codes with markdown-compatible ones"
    text = re.sub(r"\\\((.*?)\\\)", lambda o: f"${o.group(1)}$", text)
    res = re.sub(r"\\\[(.*?)\\\]", lambda o: f"$${o.group(1)}$$", text, flags=re.DOTALL)
    if md: res = display.Markdown(res)
    return res

In [36]:
#| exports
@patch(as_prop=True)
def total(self:UsageInfo): return self.total_tokens

In [37]:
usage(5,1).total

6

### Creating messages

Creating message dictionaries manually can be tedious, so we'll use helper functions from the `msglm` library.
 
We'll use `mk_msg` to easily create messages like `{'role': 'user', 'content': "I'm Franck"}`. Since Mistral AI's message format is compatible with OpenAI's structure, we imported : `from msglm import mk_msg_openai as mk_msg, mk_msgs_openai as mk_msgs`

In [38]:
prompt = "I'm Franck"
m = mk_msg(prompt)
r = cli.chat.complete(messages=[m], model=model, max_tokens=100)
r

Hello Franck! Nice to meet you. How are you today? Is there something you would like to talk about or ask me?

<details>

- id: 5ae014d5bb414e029fd359d9128e84e7
- object: chat.completion
- model: mistral-large-2411
- usage: prompt_tokens=8 completion_tokens=28 total_tokens=36
- created: 1743257001
- choices: [ChatCompletionChoice(index=0, message=AssistantMessage(content='Hello Franck! Nice to meet you. How are you today? Is there something you would like to talk about or ask me?', tool_calls=None, prefix=False, role='assistant'), finish_reason='stop')]

</details>

We can pass more than just text messages to Mistral AI. As we'll see later we can also pass images, SDK objects, etc. To handle these different data types we need to pass the type along with our content to OpenAI. 

Here's an example of a multimodal message containing text and images. 

```json
{
    'role': 'user', 
    'content': [
        {'type': 'text', 'text': 'What is in the image?'},
        {'type': 'image_url', 'image_url': {'url': f'data:{MEDIA_TYPE};base64,{IMG}'}}
    ]
}
```

`mk_msg` infers the type automatically and creates the appropriate data structure. 

LLMs, don't actually have state, but instead dialogs are created by passing back all previous prompts and responses every time. With Mistral AI, they always alternate *user* and *assistant*. We'll use `mk_msgs` from `msglm` to make it easier to build up these dialog lists.

In [39]:
msgs = mk_msgs([prompt, r, "I forgot my name. Can you remind me please?"]) 
msgs

[{'role': 'user', 'content': "I'm Franck"},
 AssistantMessage(content='Hello Franck! Nice to meet you. How are you today? Is there something you would like to talk about or ask me?', tool_calls=None, prefix=False, role='assistant'),
 {'role': 'user', 'content': 'I forgot my name. Can you remind me please?'}]

In [41]:
r = cli.chat.complete(messages=msgs, model=model, max_tokens=100)
r

Of course! You just told me your name is Franck.

<details>

- id: 414c22a5b9294ed197b37c48657f68de
- object: chat.completion
- model: mistral-large-2411
- usage: prompt_tokens=49 completion_tokens=14 total_tokens=63
- created: 1743257021
- choices: [ChatCompletionChoice(index=0, message=AssistantMessage(content='Of course! You just told me your name is Franck.', tool_calls=None, prefix=False, role='assistant'), finish_reason='stop')]

</details>

In addition to the standard 'user' and 'assistant' roles found in the OpenAI API for instance, Mistral AI's API also supports 'system' roles for providing instructions to the model and 'tool' roles for tool-based interactions. 

Let's see it in action as demonstrated in [Mistral AI's guide](https://docs.mistral.ai/guides/prefix/) on prefix use cases.

In [42]:
instruction = """
Let's roleplay.
Always give a single reply.
Roleplay only, using dialogue only.
Do not send any comments.
Do not send any notes.
Do not send any disclaimers.
"""

question = """
Hi there!
"""

prefix = """
Shakespeare: 
"""

r = cli.chat.complete(
    model="mistral-small-latest",
    messages=[
        mk_msg(instruction, role="system"),
        mk_msg(question, role="user"),
        mk_msg(prefix, role="assistant", prefix=True),
    ],
    max_tokens=128,
)
r


Shakespeare: 
Good morrow! Who art thou that dost greet me so?

<details>

- id: ca499df0a538406e9a08ac2697edef65
- object: chat.completion
- model: mistral-small-latest
- usage: prompt_tokens=55 completion_tokens=19 total_tokens=74
- created: 1743257029
- choices: [ChatCompletionChoice(index=0, message=AssistantMessage(content='\nShakespeare: \nGood morrow! Who art thou that dost greet me so?', tool_calls=None, prefix=False, role='assistant'), finish_reason='stop')]

</details>

## Client

In [43]:
# Note also the .fim (fill in middle) mistral method

In [76]:
#| exports
class Client:
    def __init__(self, model, cli=None):
        "Basic LLM messages client."
        self.model,self.use = model,usage(0,0)
        self.text_only = model in text_only_models
        self.c = (cli or Mistral(api_key=os.environ.get("MISTRAL_API_KEY")))

In [77]:
c = Client("mistral-small-latest")

In [78]:
c.use

In: 0; Out: 0; Total: 0

In [98]:
#| exports
@patch
def _r(self:Client, r:ChatCompletionResponse):
    "Store the result of the message and accrue total usage."
    self.result = r
    if getattr(r,'usage',None): self.use += r.usage
    return r

In [83]:
c._r(r)
c.use

In: 110; Out: 38; Total: 148

In [84]:
#| export
def get_stream(r):
    for o in r:
        o = contents(o)
        if o and isinstance(o, str): yield(o)

Note that `mistralai.Chat.complete` and `mistralai.Chat.stream` have the same signature, we **delegate** to `mistralai.Chat.complete` below to avoid obfuscating `**kwargs` parameters as explained in [fastcore documentation](https://fastcore.fast.ai/meta.html#delegates).

In [111]:
#| exports
@patch
@delegates(mistralai.Chat.complete)
def __call__(self:Client,
             msgs:list, # List of messages in the dialog
             sp:str='', # System prompt
             maxtok=4096, # Maximum tokens
             stream:bool=False, # Stream response?
             **kwargs):
    "Make a call to LLM."
    if 'tools' in kwargs: assert not self.text_only, "Tool use is not supported by the current model type."
    if any(c['type'] == 'image_url' for msg in msgs if isinstance(msg, dict) and isinstance(msg.get('content'), list) for c in msg['content']): assert not self.text_only, "Images are not supported by the current model type."
    if sp and self.model in has_system_prompt_models: msgs = [mk_msg(sp, 'system')] + list(msgs)
    chat_args = dict(model=self.model, messages=msgs, max_tokens=maxtok, **kwargs)
    r = self.c.chat.stream(**chat_args) if stream else self.c.chat.complete(**chat_args)
    return self._r(r) if not stream else get_stream(map(self._r, r))

In [112]:
msgs = [mk_msg('Hi')]

In [113]:
c(msgs)

Hello! How can I assist you today? Let's chat about anything you'd like. 😊

<details>

- id: 32605e6882aa469b99318d32ba126fce
- object: chat.completion
- model: mistral-small-latest
- usage: prompt_tokens=4 completion_tokens=21 total_tokens=25
- created: 1743350710
- choices: [ChatCompletionChoice(index=0, message=AssistantMessage(content="Hello! How can I assist you today? Let's chat about anything you'd like. 😊", tool_calls=None, prefix=False, role='assistant'), finish_reason='stop')]

</details>

In [114]:
c.use

In: 134; Out: 231; Total: 365

In [115]:
for o in c(msgs, stream=True): print(o, end='')

In [116]:

c.use

In: 134; Out: 231; Total: 365

## Tool use

In [122]:
def sums(
    a:int,  # First thing to sum
    b:int # Second thing to sum
) -> int: # The sum of the inputs
    "Adds a + b."
    print(f"Finding the sum of {a} and {b}")
    return a + b

In [126]:
#| export
def mk_mistralai_func(f): 
    sc = get_schema(f, 'parameters')
    sc['parameters'].pop('title', None)
    return dict(type='function', function=sc)

In [127]:
#| export
def mk_tool_choice(f): return dict(type='function', function={'name':f})

In [128]:
sysp = "You are a helpful assistant. When using tools, be sure to pass all required parameters, at minimum."

In [134]:
mk_mistralai_func(sums)

{'type': 'function',
 'function': {'name': 'sums',
  'description': 'Adds a + b.\n\nReturns:\n- type: integer',
  'parameters': {'type': 'object',
   'properties': {'a': {'type': 'integer',
     'description': 'First thing to sum'},
    'b': {'type': 'integer', 'description': 'Second thing to sum'}},
   'required': ['a', 'b']}}}

In [129]:
a,b = 604542,6458932
pr = f"What is {a}+{b}?"
tools = [mk_mistralai_func(sums)]
tool_choice = mk_tool_choice("sums")

In [130]:
tool_choice

{'type': 'function', 'function': {'name': 'sums'}}

In [131]:
msgs = [mk_msg(pr)]
r = c(msgs, sp=sysp, tools=tools)
r

- id: a6324b8b426b4d4b942f8b6f66d5dbda
- object: chat.completion
- model: mistral-small-latest
- usage: prompt_tokens=120 completion_tokens=36 total_tokens=156
- created: 1743351695
- choices: [ChatCompletionChoice(index=0, message=AssistantMessage(content='', tool_calls=[ToolCall(function=FunctionCall(name='sums', arguments='{"a": 604542, "b": 6458932}'), id='ronLQPe5l', type=None, index=0)], prefix=False, role='assistant'), finish_reason='tool_calls')]

In [132]:
m = find_block(r)
m

AssistantMessage(content='', tool_calls=[ToolCall(function=FunctionCall(name='sums', arguments='{"a": 604542, "b": 6458932}'), id='ronLQPe5l', type=None, index=0)], prefix=False, role='assistant')

In [139]:
tc = m.tool_calls
tc

[ToolCall(function=FunctionCall(name='sums', arguments='{"a": 604542, "b": 6458932}'), id='ronLQPe5l', type=None, index=0)]

In [140]:
# OpenAI returns "Function(arguments='{"a":604542,"b":6458932}', name='sums')"
func = tc[0].function
func

FunctionCall(name='sums', arguments='{"a": 604542, "b": 6458932}')

In [141]:
#| exports
def call_func_mistralai(func:FunctionCall, ns:Optional[abc.Mapping]=None): 
    return call_func(func.name, ast.literal_eval(func.arguments), ns)

In [143]:
mk_ns(sums)

{'sums': <function __main__.sums(a: int, b: int) -> int>}

In [144]:
ns = mk_ns(sums)
res = call_func_mistralai(func, ns=ns)
res

7063474

In [145]:
#| exports
def mk_toolres(
    r:abc.Mapping, # Tool use request response
    ns:Optional[abc.Mapping]=None, # Namespace to search for tools
    obj:Optional=None # Class to search for tools
    ):
    "Create a `tool_result` message from response `r`."
    r = mk_msg(r)
    tcs = getattr(r, 'tool_calls', [])
    res = [r]
    if ns is None: ns = globals()
    if obj is not None: ns = mk_ns(obj)
    for tc in (tcs or []):
        func = tc.function
        cts = str(call_func_mistralai(func, ns=ns))
        res.append(mk_msg(str(cts), 'tool', tool_call_id=tc.id, name=func.name))
    return res

In [146]:
tr = mk_toolres(r, ns=ns)
tr

[AssistantMessage(content='', tool_calls=[ToolCall(function=FunctionCall(name='sums', arguments='{"a": 604542, "b": 6458932}'), id='ronLQPe5l', type=None, index=0)], prefix=False, role='assistant'),
 {'role': 'tool',
  'content': '7063474',
  'tool_call_id': 'ronLQPe5l',
  'name': 'sums'}]

In [147]:
msgs += tr

In [148]:
msgs

[{'role': 'user', 'content': 'What is 604542+6458932?'},
 AssistantMessage(content='', tool_calls=[ToolCall(function=FunctionCall(name='sums', arguments='{"a": 604542, "b": 6458932}'), id='ronLQPe5l', type=None, index=0)], prefix=False, role='assistant'),
 {'role': 'tool',
  'content': '7063474',
  'tool_call_id': 'ronLQPe5l',
  'name': 'sums'}]

In [149]:
res = c(msgs, sp=sysp, tools=tools)
res

The sum of 604542 and 6458932 is 7063474.

<details>

- id: 2993261e53e8437893ed9a7cdcfe04ea
- object: chat.completion
- model: mistral-small-latest
- usage: prompt_tokens=184 completion_tokens=29 total_tokens=213
- created: 1743352566
- choices: [ChatCompletionChoice(index=0, message=AssistantMessage(content='The sum of 604542 and 6458932 is 7063474.', tool_calls=None, prefix=False, role='assistant'), finish_reason='stop')]

</details>

In [150]:
class Dummy:
    def sums(
        self,
        a:int,  # First thing to sum
        b:int=1 # Second thing to sum
    ) -> int: # The sum of the inputs
        "Adds a + b."
        print(f"Finding the sum of {a} and {b}")
        return a + b

In [151]:
tools = [mk_mistralai_func(Dummy.sums)]

o = Dummy()
msgs = mk_toolres("I'm Franck")
r = c(msgs, sp=sysp, tools=tools)
msgs += mk_toolres(r, obj=o)
res = c(msgs, sp=sysp, tools=tools)
res

SDKError: API error occurred: Status 400
{"object":"error","message":"Expected last role User or Tool (or Assistant with prefix True) for serving but got assistant","type":"invalid_request_error","param":null,"code":null}

## Legacy

In [None]:
# Notes:
#  - assistant message with prefix true, should be last message
#  - assistant message with prefix false cannot be last.

In [None]:
# Type of messages:
#  - system: instructions for the assistant (system prompt I guess - sp)  (content, role='system')
#  - user: user message (content, role='user')  
#  - assistant: assistant message (content, tool_calls, prefix, role='assistant')
#  - tool: tool call (content, tool_call_id, name, role='tool')

# Check also:
# - prefix
# - safe_prompt (for guardrailing)

In [34]:
m = [
    {'role': 'system', 'content': "You are a helpful assistant full of irony"},
    {'role': 'user', 'content': "I'm Franck"},
    {'role': 'assistant', 'content': "Well, Franck, it's a pleasure to meet you. I must say, I've always been a fan of the name. It's strong, it's classic, it's... frankly, it's fantastic. You've set a high bar for yourself, Franck. Let's hope you can live up to the grandeur of your name. So, how can I help you today, oh Franck the Magnificent?"
},
    {'role': 'user', 'content': "Hum I don't like your irony"}
    ]
r = cli.chat.complete(messages = m, model = model)

In [35]:
r

I apologize if my previous response came across as too ironic, Franck. Let me try again, with irony set to a minimum. How can I assist you today? I'm here to help, so let me know what you need. Simple and straightforward, just like... a well-made sandwich. No irony, no sarcasm, just a helpful assistant. So, what's on your mind today, Franck?

<details>

- id: 63d313eeb19e464c9c34e02614a39de4
- object: chat.completion
- model: mistral-large-2411
- usage: prompt_tokens=128 completion_tokens=92 total_tokens=220
- created: 1743064154
- choices: [ChatCompletionChoice(index=0, message=AssistantMessage(content="I apologize if my previous response came across as too ironic, Franck. Let me try again, with irony set to a minimum. How can I assist you today? I'm here to help, so let me know what you need. Simple and straightforward, just like... a well-made sandwich. No irony, no sarcasm, just a helpful assistant. So, what's on your mind today, Franck?", tool_calls=None, prefix=False, role='assistant'), finish_reason='stop')]

</details>