In [1]:
from IPython.display import Image

- https://github.com/deepseek-ai/DeepSeek-V3/issues/15
- deepseek v3 (0324): “Increased accuracy in Function Calling, fixing issues from previous V3 versions”
    - https://huggingface.co/deepseek-ai/DeepSeek-V3-0324
    - repetitive function call
- 从 token 或者 chat_template 的角度理解 tool use / function calling，使用（inference）以及 training
    - System prompt: 有哪些工具，参数是什么 。。
    - User prompt: `What's the weather like today in New York?`
    - `<tool>get_current_template(location='New York, NY', format='F')</tool><output>73 degrees Fahrenheit</output>`

In [13]:
Image(url='./imgs/function_calling_lee.jpeg', width=500)

In [3]:
from transformers import AutoTokenizer
import re
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
model_id = 'deepseek-ai/DeepSeek-V3'
model_id_0324 = 'deepseek-ai/DeepSeek-V3-0324'

In [5]:
T1 = AutoTokenizer.from_pretrained(model_id)
T2 = AutoTokenizer.from_pretrained(model_id_0324)

### 'deepseek-ai/DeepSeek-V3'

In [6]:
print(T1.chat_template)

{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '

' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '
' + '```json' + '
' + 

```jinja
{# 设置默认变量 #}
{% if add_generation_prompt is not defined %}
  {% set add_generation_prompt = false %}
{% endif %}

{# 定义命名空间变量 #}
{% set ns = namespace(
  is_first=false,
  is_tool=false,
  is_output_first=true,
  system_prompt='',
  is_first_sp=true
) %}

{# 拼接 system prompt #}
{% for message in messages %}
  {% if message['role'] == 'system' %}
    {% if ns.is_first_sp %}
      {% set ns.system_prompt = ns.system_prompt + message['content'] %}
      {% set ns.is_first_sp = false %}
    {% else %}
      {% set ns.system_prompt = ns.system_prompt + '\n' + message['content'] %}
    {% endif %}
  {% endif %}
{% endfor %}

{{ bos_token }}{{ ns.system_prompt }}

{# 遍历消息内容 #}
{% for message in messages %}

  {# 用户消息处理 #}
  {% if message['role'] == 'user' %}
    {% set ns.is_tool = false %}
    {{ '<｜User｜>' + message['content'] }}

  {# 助手消息（带工具调用） #}
  {% elif message['role'] == 'assistant' and message['content'] is none %}
    {% set ns.is_tool = false %}
    {% for tool in message['tool_calls'] %}
      {% if not ns.is_first %}
        {{ '<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n```json\n' + tool['function']['arguments'] + '\n```<｜tool▁call▁end｜>' }}
        {% set ns.is_first = true %}
      {% else %}
        {{ '\n<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n```json\n' + tool['function']['arguments'] + '\n```<｜tool▁call▁end｜>' }}
        {{ '<｜tool▁calls▁end｜><｜end▁of▁sentence｜>' }}
      {% endif %}
    {% endfor %}

  {# 助手正常回复内容 #}
  {% elif message['role'] == 'assistant' and message['content'] is not none %}
    {% if ns.is_tool %}
      {{ '<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>' }}
      {% set ns.is_tool = false %}
    {% else %}
      {{ '<｜Assistant｜>' + message['content'] + '<｜end▁of▁sentence｜>' }}
    {% endif %}

  {# 工具输出处理 #}
  {% elif message['role'] == 'tool' %}
    {% set ns.is_tool = true %}
    {% if ns.is_output_first %}
      {{ '<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>' }}
      {% set ns.is_output_first = false %}
    {% else %}
      {{ '\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>' }}
    {% endif %}
  {% endif %}

{% endfor %}

{# 工具输出结尾处理 #}
{% if ns.is_tool %}
  {{ '<｜tool▁outputs▁end｜>' }}
{% endif %}

{# 生成助手响应起始标记 #}
{% if add_generation_prompt and not ns.is_tool %}
  {{ '<｜Assistant｜>' }}
{% endif %}

```

```
初始化变量
│
├── 收集 system prompt
│
├── 遍历 messages:
│   ├── system → 拼接 prompt
│   ├── user → 加 <|User|>
│   ├── assistant:
│   │   ├── 若调用 tool → 生成 tool_call 块
│   │   └── 否则 → 加 <|Assistant|>
│   └── tool → 输出 tool_output 块
│
└── 最后判断是否需要加 <|Assistant|> 结束
```

### 'deepseek-ai/DeepSeek-V3-0324'

```
初始化变量（增加 is_last_user 等）
│
├── 收集 system prompt
│
├── 遍历 messages:
│   ├── system → 拼接 prompt
│   ├── user → 加 <|User|>，标记 is_last_user=True
│   ├── assistant:
│   │   ├── 若调用 tool_call：
│   │   │   └── 判断是否有 content（处理更细）
│   │   └── 若普通内容 → 加 <|Assistant|>
│   └── tool:
│       └── 多个 tool_output 串联，闭合处理
│
└── 若最后是 user 且无 tool 调用 → 加 <|Assistant|> 提示生成回复

```

In [7]:
print(T2.chat_template)

{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true, is_last_user=false) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '

' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{%- set ns.is_first = false -%}{%- set ns.is_last_user = true -%}{{'<｜User｜>' + message['content'] + '<｜Assistant｜>'}}{%- endif %}{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{%- 

```jinja
{# 设置默认值 #}
{% if add_generation_prompt is not defined %}
  {% set add_generation_prompt = false %}
{% endif %}

{# 初始化状态变量 #}
{% set ns = namespace(
  is_first=false,
  is_tool=false,
  is_output_first=true,
  system_prompt='',
  is_first_sp=true,
  is_last_user=false
) %}

{# 拼接所有 system prompt #}
{% for message in messages %}
  {% if message['role'] == 'system' %}
    {% if ns.is_first_sp %}
      {% set ns.system_prompt = ns.system_prompt + message['content'] %}
      {% set ns.is_first_sp = false %}
    {% else %}
      {% set ns.system_prompt = ns.system_prompt + '\n' + message['content'] %}
    {% endif %}
  {% endif %}
{% endfor %}

{{ bos_token }}{{ ns.system_prompt }}

{# 遍历所有消息 #}
{% for message in messages %}

  {# 处理用户消息 #}
  {% if message['role'] == 'user' %}
    {% set ns.is_tool = false %}
    {% set ns.is_first = false %}
    {% set ns.is_last_user = true %}
    {{ '<｜User｜>' + message['content'] + '<｜Assistant｜>' }}

  {# 处理 Assistant 调用工具的情况 #}
  {% elif message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}
    {% set ns.is_last_user = false %}
    {% if ns.is_tool %}
      {{ '<｜tool▁outputs▁end｜>' }}
    {% endif %}
    {% set ns.is_first = false %}
    {% set ns.is_tool = false %}
    {% set ns.is_output_first = true %}

    {% for tool in message['tool_calls'] %}
      {% set tool_call_str = '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n```json\n' + tool['function']['arguments'] + '\n```<｜tool▁call▁end｜>' %}

      {% if not ns.is_first %}
        {% if message['content'] is none %}
          {{ '<｜tool▁calls▁begin｜>' + tool_call_str }}
        {% else %}
          {{ message['content'] + '<｜tool▁calls▁begin｜>' + tool_call_str }}
        {% endif %}
        {% set ns.is_first = true %}
      {% else %}
        {{ '\n' + tool_call_str }}
      {% endif %}
    {% endfor %}
    {{ '<｜tool▁calls▁end｜><｜end▁of▁sentence｜>' }}

  {# Assistant 正常回复内容（无工具调用） #}
  {% elif message['role'] == 'assistant' %}
    {% set ns.is_last_user = false %}
    {% if ns.is_tool %}
      {{ '<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>' }}
      {% set ns.is_tool = false %}
    {% else %}
      {{ message['content'] + '<｜end▁of▁sentence｜>' }}
    {% endif %}

  {# 工具的输出内容 #}
  {% elif message['role'] == 'tool' %}
    {% set ns.is_last_user = false %}
    {% set ns.is_tool = true %}
    {% if ns.is_output_first %}
      {{ '<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>' }}
      {% set ns.is_output_first = false %}
    {% else %}
      {{ '\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>' }}
    {% endif %}
  {% endif %}

{% endfor %}

{# 如果有残留的 tool 输出状态，则收尾结束 #}
{% if ns.is_tool %}
  {{ '<｜tool▁outputs▁end｜>' }}
{% endif %}

{# 最终是否生成 Assistant 提示起始符 #}
{% if add_generation_prompt and not ns.is_last_user and not ns.is_tool %}
  {{ '<｜Assistant｜>' }}
{% endif %}

```

### apply_chat_template

In [14]:
messages = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "What's the weather in Paris?"},
    {
        "role": "assistant",
        # "content": "Let me check the weather for you.",
        "content": "",
        "tool_calls": [
            {
                "type": "function",
                "function": {
                    "name": "get_weather",
                    "arguments": '{ "location": "Paris" }'
                }
            }
        ]
    },
    {
        "role": "tool",
        "content": '{ "temperature": "15C", "condition": "Sunny" }',
        "tool_call_id": "call_1"
    },
    {
        "role": "assistant",
        "content": "It's 15°C and sunny in Paris right now."
    }
]

In [15]:
T1.apply_chat_template(messages, tokenize=False)

'<｜begin▁of▁sentence｜>You are a helpful assistant.<｜User｜>What\'s the weather in Paris?<｜Assistant｜><｜end▁of▁sentence｜><｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>{ "temperature": "15C", "condition": "Sunny" }<｜tool▁output▁end｜><｜tool▁outputs▁end｜>It\'s 15°C and sunny in Paris right now.<｜end▁of▁sentence｜>'

```
<｜begin▁of▁sentence｜>You are a helpful assistant.
<｜User｜>What\'s the weather in Paris?
<｜Assistant｜>Let me check the weather for you.<｜end▁of▁sentence｜>
<｜tool▁outputs▁begin｜>
    <｜tool▁output▁begin｜>{ "temperature": "15C", "condition": "Sunny" }<｜tool▁output▁end｜>
<｜tool▁outputs▁end｜>It\'s 15°C and sunny in Paris right now.<｜end▁of▁sentence｜>
```

In [11]:
T2.apply_chat_template(messages, tokenize=False)

'<｜begin▁of▁sentence｜>You are a helpful assistant.<｜User｜>What\'s the weather in Paris?<｜Assistant｜>Let me check the weather for you.<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>get_weather\n```json\n{ "location": "Paris" }\n```<｜tool▁call▁end｜><｜tool▁calls▁end｜><｜end▁of▁sentence｜><｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>{ "temperature": "15C", "condition": "Sunny" }<｜tool▁output▁end｜><｜tool▁outputs▁end｜>It\'s 15°C and sunny in Paris right now.<｜end▁of▁sentence｜>'

```
<｜begin▁of▁sentence｜>You are a helpful assistant.
<｜User｜>What\'s the weather in Paris?
<｜Assistant｜>Let me check the weather for you.
<｜tool▁calls▁begin｜>
    <｜tool▁call▁begin｜>function<｜tool▁sep｜>get_weather\n```json\n{ "location": "Paris" }\n```<｜tool▁call▁end｜>
<｜tool▁calls▁end｜><｜end▁of▁sentence｜>
<｜tool▁outputs▁begin｜>
    <｜tool▁output▁begin｜>{ "temperature": "15C", "condition": "Sunny" }<｜tool▁output▁end｜>
<｜tool▁outputs▁end｜>It\'s 15°C and sunny in Paris right now.<｜end▁of▁sentence｜>
```

- 两个 highlights
    - v3 chat tempalte 解析 messages 时 丢了 tool_call 的部分
    - tool_call 和 tool_output 是一体的，统一作为 <｜Assistant｜> 的输出

In [12]:
Image(url='https://cdn.openai.com/API/docs/images/function-calling-diagram-steps.png', width=400)