In [2]:
from transformers import pipeline

from pprint import pprint

## HuggingFace Pipeline abstraction

In [3]:
pipe = pipeline("text-generation", "Qwen/Qwen3-0.6B", device_map="auto")
pipe

Device set to use mps


<transformers.pipelines.text_generation.TextGenerationPipeline at 0x1348bad50>

In [4]:
messages = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "What is 2+2?"}

]

response = pipe(messages, max_new_tokens=512, temperature=0.7)
response

[{'generated_text': [{'role': 'system',
    'content': 'You are a helpful assistant.'},
   {'role': 'user', 'content': 'What is 2+2?'},
   {'role': 'assistant',
    'content': "<think>\nOkay, the user is asking what 2 plus 2 is. Let me think. Well, in basic arithmetic, adding 2 and 2 would result in 4. But maybe they want more details or context? Let me check if there's any trick here. No, this is straightforward. I should just confirm the answer clearly.\n</think>\n\n2 + 2 equals 4."}]}]

In [5]:
pprint(response[0]['generated_text'][-1]['content'])

('<think>\n'
 'Okay, the user is asking what 2 plus 2 is. Let me think. Well, in basic '
 'arithmetic, adding 2 and 2 would result in 4. But maybe they want more '
 "details or context? Let me check if there's any trick here. No, this is "
 'straightforward. I should just confirm the answer clearly.\n'
 '</think>\n'
 '\n'
 '2 + 2 equals 4.')


## Advanced pipeline usage

In [6]:
pipe = pipeline("text-generation", "Qwen/Qwen3-0.6B", device_map="auto")
pipe

Device set to use mps


<transformers.pipelines.text_generation.TextGenerationPipeline at 0x1261c7fe0>

In [7]:
generation_config = {
    "temperature": 0.7,
    "top_p": 0.9,
    "do_sample": True,
    "max_new_tokens": 512,
    "repetition_penalty": 1.1
}

What about top_k?

In [8]:
# Multi-turn conversation
messages = [
    {"role": "system", "content": "You are a helpful tutor."},
    {"role": "user", "content": "What is algebra?"}
]

In [9]:
response = pipe(messages, **generation_config)

In [10]:
conversation = response[0]['generated_text']
conversation

[{'role': 'system', 'content': 'You are a helpful tutor.'},
 {'role': 'user', 'content': 'What is algebra?'},
 {'role': 'assistant',
  'content': "<think>\nOkay, so I need to explain what algebra is. Let me start by recalling what I know. Algebra is like using letters and numbers to represent unknown values. For example, if I have an equation where some variables are missing, I can use letters to stand for those numbers instead of writing them out.\n\nWait, but how exactly does that work? Maybe it's about solving equations or finding patterns in numbers. Like, if you see something with two numbers and an operation between them, you assign variables to each number and then solve the problem step by step.\n\nLet me think of an example. Suppose I have 2x + 5 = 10. Here, x is the variable we're trying to find. So, I subtract 5 from both sides: 2x = 5, and then divide by 2: x = 2.5. That makes sense. In algebra, you don't just write down the answer; you also show the process of solving the 

In [11]:
# Continue the conversation
conversation.append({"role": "user", "content": "How can we add x+y?"})
response = pipe(conversation, **generation_config)

In [12]:
print("Final conversation::")
pprint(response[0]['generated_text'])

Final conversation::
[{'content': 'You are a helpful tutor.', 'role': 'system'},
 {'content': 'What is algebra?', 'role': 'user'},
 {'content': '<think>\n'
             'Okay, so I need to explain what algebra is. Let me start by '
             'recalling what I know. Algebra is like using letters and numbers '
             'to represent unknown values. For example, if I have an equation '
             'where some variables are missing, I can use letters to stand for '
             'those numbers instead of writing them out.\n'
             '\n'
             "Wait, but how exactly does that work? Maybe it's about solving "
             'equations or finding patterns in numbers. Like, if you see '
             'something with two numbers and an operation between them, you '
             'assign variables to each number and then solve the problem step '
             'by step.\n'
             '\n'
             'Let me think of an example. Suppose I have 2x + 5 = 10. Here, x '
            

## Formatted Chat

In [13]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-0.6B")

messages = [
    {"role": "system", "content": "You are a helpful tutor."},
    {"role": "user", "content": "What is algebra?"}
]

formatted_chat = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)

pprint(formatted_chat)

('<|im_start|>system\n'
 'You are a helpful tutor.<|im_end|>\n'
 '<|im_start|>user\n'
 'What is algebra?<|im_end|>\n'
 '<|im_start|>assistant\n')


In [14]:
formatted_chat

'<|im_start|>system\nYou are a helpful tutor.<|im_end|>\n<|im_start|>user\nWhat is algebra?<|im_end|>\n<|im_start|>assistant\n'

## System Messages

In [15]:
system_message = {
    'role': 'system',
    'content': 'You are a professional software engineer. Talk in terms of coding.'
}

conversation = [
    system_message,
    {"role": "user", "content": "What is algebra?"}
]

response = pipe(conversation, **generation_config)
response

[{'generated_text': [{'role': 'system',
    'content': 'You are a professional software engineer. Talk in terms of coding.'},
   {'role': 'user', 'content': 'What is algebra?'},
   {'role': 'assistant',
    'content': '<think>\nOkay, the user asked, "What is algebra?" Let me start by explaining what algebra is. I should mention its origins, like ancient Greece, and then define it. Maybe give some basic examples.\n\nAlgebra is a branch of mathematics that deals with symbols and variables. These symbols represent numbers or quantities that can be changed. For example, in algebra, you have equations like x + 2 = 5. \n\nI need to explain how algebra helps solve problems. It\'s used in various fields like science, engineering, and everyday life. Also, mention that it\'s more than just solving math problems—it\'s about patterns and relationships.\n\nWait, should I include something about its applications? Yes, maybe. And perhaps clarify that while algebra is related to arithmetic, it\'s a br

In [16]:
pprint(response)

[{'generated_text': [{'content': 'You are a professional software engineer. '
                                 'Talk in terms of coding.',
                      'role': 'system'},
                     {'content': 'What is algebra?', 'role': 'user'},
                     {'content': '<think>\n'
                                 'Okay, the user asked, "What is algebra?" Let '
                                 'me start by explaining what algebra is. I '
                                 'should mention its origins, like ancient '
                                 'Greece, and then define it. Maybe give some '
                                 'basic examples.\n'
                                 '\n'
                                 'Algebra is a branch of mathematics that '
                                 'deals with symbols and variables. These '
                                 'symbols represent numbers or quantities that '
                                 'can be changed. For example, in

In [19]:
system_message = {
    'role': 'system',
    'content': 'You are a school teacher who simplies things for students.'
}

conversation = [
    system_message,
    {"role": "user", "content": "What is algebra?"}
]

response = pipe(conversation, **generation_config)
pprint(response)

[{'generated_text': [{'content': 'You are a school teacher who simplies things '
                                 'for students.',
                      'role': 'system'},
                     {'content': 'What is algebra?', 'role': 'user'},
                     {'content': '<think>\n'
                                 'Okay, the user asked, "What is algebra?" I '
                                 'need to explain it clearly. Let me start by '
                                 'breaking down the basics. Algebra is about '
                                 'solving equations and working with '
                                 'variables.\n'
                                 '\n'
                                 'I should mention that variables are symbols '
                                 'representing unknown numbers. For example, '
                                 'in x + 5 = 12, x stands for an unknown '
                                 "number we don't know yet. That's important "
     

In [28]:
response[0]['generated_text'][-1]['content']

'<think>\nOkay, the user asked, "What is algebra?" I need to explain it clearly. Let me start by breaking down the basics. Algebra is about solving equations and working with variables.\n\nI should mention that variables are symbols representing unknown numbers. For example, in x + 5 = 12, x stands for an unknown number we don\'t know yet. That\'s important because it shows algebra\'s goal of finding solutions.\n\nAlso, teaching students how to solve problems using algebraic methods is key. Maybe include examples like solving for x or y in equations. It\'s helpful to keep explanations simple and avoid jargon. \n\nWait, should I include real-world applications too? Like how algebra helps in planning or calculating something? But maybe that\'s overcomplicating if the explanation is meant for general understanding. Keep it straightforward but engaging.\n\nLet me check if there are any common misconceptions. Students might confuse algebra with arithmetic. Clarifying that algebra is more ab

## Generation Prompts

In [29]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-0.6B")

messages = [
    {'role': 'system', 'content': 'You are a school teacher who simplies things for students.'},
    {'role': 'user', 'content': 'What is algebra?'},
    {'role': 'assistant', 'content': 'the user asked, "What is algebra?" I need to explain it clearly. Let me start by breaking down the basics. Algebra is about solving equations and working with variables'},
    {'role': 'user', 'content': 'Use an example'}
]

formatted_without = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=False
)

print("formatted without")
print(formatted_without)
print("===================")


formatted_with = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)

print("formatted with")
print(formatted_with)
print("===================")

formatted without
<|im_start|>system
You are a school teacher who simplies things for students.<|im_end|>
<|im_start|>user
What is algebra?<|im_end|>
<|im_start|>assistant
the user asked, "What is algebra?" I need to explain it clearly. Let me start by breaking down the basics. Algebra is about solving equations and working with variables<|im_end|>
<|im_start|>user
Use an example<|im_end|>

formatted with
<|im_start|>system
You are a school teacher who simplies things for students.<|im_end|>
<|im_start|>user
What is algebra?<|im_end|>
<|im_start|>assistant
the user asked, "What is algebra?" I need to explain it clearly. Let me start by breaking down the basics. Algebra is about solving equations and working with variables<|im_end|>
<|im_start|>user
Use an example<|im_end|>
<|im_start|>assistant



`add_generation_prompt` is used in the following scenarios:
1. True when we are setting up a bot to respond
2. True when we are setting up a pipeline to evaluate
3. False when we are creating training data

## Continue final message

In [35]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-0.6B")

messages = [
    {'role': 'system', 'content': 'You are a school teacher who simplies things for students.'},
    {'role': 'user', 'content': 'What is algebra?'},
    {'role': 'assistant', 'content': 'So, the way it works is...'}
]

formatted_without = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    continue_final_message=True,
    # enable_thinking=False
)

print("continuing final message")
print(formatted_without)
print("===================")


formatted_with = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)

print("formatted with")
print(formatted_with)
print("===================")

continuing final message
<|im_start|>system
You are a school teacher who simplies things for students.<|im_end|>
<|im_start|>user
What is algebra?<|im_end|>
<|im_start|>assistant
<think>

</think>

So, the way it works is...
formatted with
<|im_start|>system
You are a school teacher who simplies things for students.<|im_end|>
<|im_start|>user
What is algebra?<|im_end|>
<|im_start|>assistant
<think>

</think>

So, the way it works is...<|im_end|>
<|im_start|>assistant



## Working with reasoning mode

In [None]:
standard_messages = [
    {'role': 'user', 'content': 'What is 2 * 3?'},
    {'role': 'assistant', 'content': 'Answer is 6.'}
]

thinking_messages = [
    {'role': 'user', 'content': 'What is 2 * 3?'},
    {'role': 'assistant', 'content': '<think>Alright, I have to multiple 2 with 3.</think>Answer is 6.'}
]

standard_formatted = tokenizer.apply_chat_template(standard_messages, tokenize=False, enable_thinking=False)
thinking_formatted = tokenizer.apply_chat_template(thinking_messages, tokenize=False)

print("Standard Mode")
print(standard_formatted)
print("Thinking Mode")
print(thinking_formatted)

Standard Mode
<|im_start|>user
What is 2 * 3?<|im_end|>
<|im_start|>assistant
<think>

</think>

Answer is 6.<|im_end|>

Thinking Mode
<|im_start|>user
What is 2 * 3?<|im_end|>
<|im_start|>assistant
<think>

</think>

<|thinking|>Alright, I have to multiple 2 with 3.</|thinking|>Answer is 6.<|im_end|>



## Tool Usage

In [45]:
# Define available tools
tools = [
    {
        "type": "function",
        "function": {
            "name": "get_weather",
            "description": "Get the current weather for a location",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "The city and state, e.g. San Francisco, CA"
                    },
                    "unit": {
                        "type": "string",
                        "enum": ["celsius", "fahrenheit"],
                        "description": "The temperature unit"
                    }
                },
                "required": ["location"]
            }
        }
    },
    {
        "type": "function", 
        "function": {
            "name": "calculate",
            "description": "Perform mathematical calculations",
            "parameters": {
                "type": "object",
                "properties": {
                    "expression": {
                        "type": "string",
                        "description": "Mathematical expression to evaluate"
                    }
                },
                "required": ["expression"]
            }
        }
    }
]

In [47]:
messages = [
    {"role": "system", "content": "You are a helpful assistant with access to tools."},
    {"role": "user", "content": "What's the weather like in Paris?"},
    {
        "role": "assistant", 
        "content": "I'll check the weather in Paris for you.",
        "tool_calls": [
            {
                "id": "call_1",
                "type": "function",
                "function": {
                    "name": "get_weather",
                    "arguments": '{"location": "Paris, France", "unit": "celsius"}'
                }
            }
        ]
    },
    {
        "role": "tool",
        "tool_call_id": "call_1", 
        "content": '{"temperature": 22, "condition": "sunny", "humidity": 60}'
    },
    {
        "role": "assistant",
        "content": "The weather in Paris is currently sunny with a temperature of 22°C and 60% humidity. It's a beautiful day!"
    }
]

formatted_with_tools = tokenizer.apply_chat_template(
    messages,
    tools=tools,
    tokenize=False,
    add_generation_prompt=False
)

print("formatted with tools")
print(formatted_with_tools)

formatted with tools
<|im_start|>system
You are a helpful assistant with access to tools.

# Tools

You may call one or more functions to assist with the user query.

You are provided with function signatures within <tools></tools> XML tags:
<tools>
{"type": "function", "function": {"name": "get_weather", "description": "Get the current weather for a location", "parameters": {"type": "object", "properties": {"location": {"type": "string", "description": "The city and state, e.g. San Francisco, CA"}, "unit": {"type": "string", "enum": ["celsius", "fahrenheit"], "description": "The temperature unit"}}, "required": ["location"]}}}
{"type": "function", "function": {"name": "calculate", "description": "Perform mathematical calculations", "parameters": {"type": "object", "properties": {"expression": {"type": "string", "description": "Mathematical expression to evaluate"}}, "required": ["expression"]}}}
</tools>

For each function call, return a json object with function name and arguments wi

## Template Customization

In [48]:
print(tokenizer.chat_template)

{%- if tools %}
    {{- '<|im_start|>system\n' }}
    {%- if messages[0].role == 'system' %}
        {{- messages[0].content + '\n\n' }}
    {%- endif %}
    {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
    {%- for tool in tools %}
        {{- "\n" }}
        {{- tool | tojson }}
    {%- endfor %}
    {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
{%- else %}
    {%- if messages[0].role == 'system' %}
        {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
    {%- endif %}
{%- endif %}
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
{%- for message in messages[::-1] %}
    {%- set index = (messages|length - 

In [49]:
# See what special tokens are used
print("\nSpecial tokens:")
print(f"BOS: {tokenizer.bos_token}")
print(f"EOS: {tokenizer.eos_token}")
print(f"UNK: {tokenizer.unk_token}")
print(f"PAD: {tokenizer.pad_token}")

# Check for custom tokens
special_tokens = tokenizer.special_tokens_map
for name, token in special_tokens.items():
    print(f"{name}: {token}")


Special tokens:
BOS: None
EOS: <|im_end|>
UNK: None
PAD: <|endoftext|>
eos_token: <|im_end|>
pad_token: <|endoftext|>
additional_special_tokens: ['<|im_start|>', '<|im_end|>', '<|object_ref_start|>', '<|object_ref_end|>', '<|box_start|>', '<|box_end|>', '<|quad_start|>', '<|quad_end|>', '<|vision_start|>', '<|vision_end|>', '<|vision_pad|>', '<|image_pad|>', '<|video_pad|>']


chat templates for LLM conversations are akin to grammar for language.