In [1]:
# Import necessary libraries
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from datasets import load_dataset
import json
from typing import Optional, Dict, Any

if torch.cuda.is_available():
    device = "cuda"
    print(f"Using CUDA GPU: {torch.cuda.get_device_name()}")
    print(f"GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f}GB")
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
    device = "mps"
    print("Using Apple MPS")
else:
    device = "cpu"
    print("Using CPU - you will need to use a GPU to train models")

# Authenticate with Hugging Face (optional, for private models)
from huggingface_hub import login
# login()  # Uncomment if you need to access private models

  from .autonotebook import tqdm as notebook_tqdm


Using CUDA GPU: NVIDIA GeForce RTX 3050 Ti Laptop GPU
GPU memory: 4.3GB


In [2]:
instruct_model_name = "HuggingFaceTB/SmolLM3-3B"

# Load tokenizers
instruct_tokenizer = AutoTokenizer.from_pretrained(instruct_model_name)

# Load models (use smaller precision for memory efficiency)

instruct_model = AutoModelForCausalLM.from_pretrained(
    instruct_model_name,
    dtype=torch.bfloat16,
    device_map="auto"
)

print("Models loaded successfully!")

Loading checkpoint shards: 100%|██████████| 2/2 [00:07<00:00,  3.67s/it]
Some parameters are on the meta device because they were offloaded to the cpu and disk.


Models loaded successfully!


In [23]:
# Create different types of conversations to test
conversations = {
    "simple_qa": [
        {"role": "user", "content": "What is machine learning?"},
    ],
    
    "with_system": [
        {"role": "system", "content": "You are a helpful AI assistant specialized in explaining technical concepts clearly."},
        {"role": "user", "content": "What is machine learning?"},
    ],
    
    "multi_turn": [
        {"role": "system", "content": "You are a math tutor."},
        {"role": "user", "content": "What is calculus?"},
        {"role": "assistant", "content": "Calculus is a branch of mathematics that deals with rates of change and accumulation of quantities."},
        {"role": "user", "content": "Can you give me a simple example?"},
    ],
    
    "reasoning_task": [
        {"role": "user", "content": "Solve step by step: If a train travels 120 miles in 2 hours, what is its average speed?"},
    ]
}

for conv_type, messages in conversations.items():
    print(f"--- {conv_type.upper()} ---")
    
    # Format without generation prompt (for completed conversations)
    formatted_complete = instruct_tokenizer.apply_chat_template(
        messages, 
        tokenize=False, 
        add_generation_prompt=False,
        enable_thinking=False
    )
    
    # Format with generation prompt (for inference)
    formatted_prompt = instruct_tokenizer.apply_chat_template(
        messages, 
        tokenize=False, 
        add_generation_prompt=True,
        enable_thinking=False
    )
    
    print("Complete conversation format:")
    print(formatted_complete)
    print("\nWith generation prompt:")
    print(formatted_prompt)
    print("\n" + "="*50 + "\n")

--- SIMPLE_QA ---
Complete conversation format:
<|im_start|>system
## Metadata

Knowledge Cutoff Date: June 2025
Today Date: 08 November 2025
Reasoning Mode: /no_think

## Custom Instructions

You are a helpful AI assistant named SmolLM, trained by Hugging Face.

<|im_start|>user
What is machine learning?<|im_end|>


With generation prompt:
<|im_start|>system
## Metadata

Knowledge Cutoff Date: June 2025
Today Date: 08 November 2025
Reasoning Mode: /no_think

## Custom Instructions

You are a helpful AI assistant named SmolLM, trained by Hugging Face.

<|im_start|>user
What is machine learning?<|im_end|>
<|im_start|>assistant
<think>

</think>



--- WITH_SYSTEM ---
Complete conversation format:
<|im_start|>system
## Metadata

Knowledge Cutoff Date: June 2025
Today Date: 08 November 2025
Reasoning Mode: /no_think

## Custom Instructions

You are a helpful AI assistant specialized in explaining technical concepts clearly.

<|im_start|>user
What is machine learning?<|im_end|>


With gene

In [25]:
# Test the same prompt on both models
test_prompt = "Explain quantum computing in simple terms. Don't think"

# Prepare the prompt for base model (no chat template)

# Prepare the prompt for instruct model (with chat template)
instruct_messages = [{"role": "user", "content": test_prompt}]
instruct_formatted = instruct_tokenizer.apply_chat_template(
    instruct_messages, 
    tokenize=False, 
    add_generation_prompt=True,
    enable_thinking=False
)
instruct_inputs = instruct_tokenizer(instruct_formatted, return_tensors="pt").to(device)

# Generate responses
print("=== Model comparison ===\n")

print("\n" + "="*50)
print("Instruct model response:")
with torch.no_grad():
    instruct_outputs = instruct_model.generate(
        **instruct_inputs,
        max_new_tokens=10,
        temperature=0.7,
        do_sample=True,
        pad_token_id=instruct_tokenizer.eos_token_id
    )
    instruct_response = instruct_tokenizer.decode(instruct_outputs[0], skip_special_tokens=True)
    # Extract only the assistant's response
    assistant_start = instruct_response.find("<|im_start|>assistant\n") + len("<|im_start|>assistant\n")
    assistant_response = instruct_response[assistant_start:].split("<|im_end|>")[0]
    print(assistant_response)

=== Model comparison ===


Instruct model response:
nowledge Cutoff Date: June 2025
Today Date: 08 November 2025
Reasoning Mode: /no_think

## Custom Instructions

You are a helpful AI assistant named SmolLM, trained by Hugging Face.

user
Explain quantum computing in simple terms. Don't think
assistant
<think>

</think>
Quantum computing is a way of doing calculations that


In [11]:
instruct_tokenizer.apply_chat_template??

[0;31mSignature:[0m
[0minstruct_tokenizer[0m[0;34m.[0m[0mapply_chat_template[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mconversation[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mlist[0m[0;34m[[0m[0mdict[0m[0;34m[[0m[0mstr[0m[0;34m,[0m [0mstr[0m[0;34m][0m[0;34m][0m[0;34m,[0m [0mlist[0m[0;34m[[0m[0mlist[0m[0;34m[[0m[0mdict[0m[0;34m[[0m[0mstr[0m[0;34m,[0m [0mstr[0m[0;34m][0m[0;34m][0m[0;34m][0m[0;34m][0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mtools[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mlist[0m[0;34m[[0m[0mUnion[0m[0;34m[[0m[0mdict[0m[0;34m,[0m [0mCallable[0m[0;34m][0m[0;34m][0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdocuments[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mlist[0m[0;34m[[0m[0mdict[0m[0;34m[[0m[0mstr[0m[0;34m,[0m [0mstr[0m[0;34m][0m[0;34m][0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mchat_

In [12]:
# Test SmolLM3's reasoning capabilities
reasoning_prompts = [
    "What is 15 × 24? Show your work.",
    "A recipe calls for 2 cups of flour for 12 cookies. How much flour is needed for 30 cookies?",
    "If I have $50 and spend $18.75 on lunch and $12.30 on a book, how much money do I have left?"
]

print("=== TESTING REASONING CAPABILITIES ===\n")

for i, prompt in enumerate(reasoning_prompts, 1):
    print(f"Problem {i}: {prompt}")
    
    messages = [{"role": "user", "content": prompt}]
    formatted_prompt = instruct_tokenizer.apply_chat_template(
        messages, tokenize=False, add_generation_prompt=True
    )
    inputs = instruct_tokenizer(formatted_prompt, return_tensors="pt").to(device)
    
    with torch.no_grad():
        outputs = instruct_model.generate(
            **inputs,
            max_new_tokens=10,
            temperature=0.3,  # Lower temperature for more consistent reasoning
            do_sample=True,
            pad_token_id=instruct_tokenizer.eos_token_id
        )
        response = instruct_tokenizer.decode(outputs[0], skip_special_tokens=True)
        assistant_start = response.find("<|im_start|>assistant\n") + len("<|im_start|>assistant\n")
        assistant_response = response[assistant_start:].split("<|im_end|>")[0]
        print(f"Answer: {assistant_response}")
    
    print("\n" + "-"*50 + "\n")

=== TESTING REASONING CAPABILITIES ===

Problem 1: What is 15 × 24? Show your work.
Answer: nowledge Cutoff Date: June 2025
Today Date: 08 November 2025
Reasoning Mode: /think

## Custom Instructions

You are a helpful AI assistant named SmolLM, trained by Hugging Face. Your role as an assistant involves thoroughly exploring questions through a systematic thinking process before providing the final precise and accurate solutions. This requires engaging in a comprehensive cycle of analysis, summarizing, exploration, reassessment, reflection, backtracking, and iteration to develop well-considered thinking process. Please structure your response into two main sections: Thought and Solution using the specified format: <think> Thought section </think> Solution section. In the Thought section, detail your reasoning process in steps. Each step should include detailed considerations such as analysing questions, summarizing relevant findings, brainstorming new ideas, verifying the accuracy of t

In [22]:
instruct_tokenizer.get_chat_template()

'{# ───── defaults ───── #}\n{%- if enable_thinking is not defined -%}\n{%- set enable_thinking = true -%}\n{%- endif -%}\n\n{# ───── reasoning mode ───── #}\n{%- if enable_thinking -%}\n  {%- set reasoning_mode = "/think" -%}\n{%- else -%}\n  {%- set reasoning_mode = "/no_think" -%}\n{%- endif -%}\n\n{# ───── header (system message) ───── #}\n{{- "<|im_start|>system\\n" -}}\n\n{%- if messages[0].role == "system" -%}\n  {%- set system_message = messages[0].content -%}\n  {%- if "/no_think" in system_message -%}\n    {%- set reasoning_mode = "/no_think" -%}\n  {%- elif "/think" in system_message -%}\n    {%- set reasoning_mode = "/think" -%}\n  {%- endif -%}\n  {%- set custom_instructions = system_message.replace("/no_think", "").replace("/think", "").rstrip() -%}\n{%- endif -%}\n\n{%- if "/system_override" in system_message -%}\n  {{- custom_instructions.replace("/system_override", "").rstrip() -}}\n  {{- "<|im_end|>\\n" -}}\n{%- else -%}\n  {{- "## Metadata\\n\\n" -}}\n  {{- "Knowledg