## Setup

In [None]:
# Import required libraries for fine-tuning
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
from trl import SFTTrainer, SFTConfig
from datasets import load_dataset, load_from_disk
import torch

USE_GPU = True

# Load SmolLM3 base model for fine-tuning

if not USE_GPU:
    model_name = "/mimer/NOBACKUP/Datasets/LLM/huggingface/hub/models--HuggingFaceTB--SmolLM2-135M/snapshots/93efa2f097d58c2a74874c7e644dbc9b0cee75a2/"
    instruct_model_name = "/mimer/NOBACKUP/Datasets/LLM/huggingface/hub/models--HuggingFaceTB--SmolLM2-135M-Instruct/snapshots/12fd25f77366fa6b3b4b768ec3050bf629380bac/"
    new_model_name = "SmolLM2-llmworkshop-SFT"
    
else:
    model_name = "/mimer/NOBACKUP/Datasets/LLM/huggingface/hub/models--HuggingFaceTB--SmolLM3-3B-Base/snapshots/d78a42f79198603e614095753484a04c10c2b940/"
    instruct_model_name = "/mimer/NOBACKUP/Datasets/LLM/huggingface/hub/models--HuggingFaceTB--SmolLM3-3B/snapshots/a07cc9a04f16550a088caea529712d1d335b0ac1/"
    new_model_name = "SmolLM3-llmworkshop-SFT"


print(f"Loading {model_name}...")
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    dtype=torch.bfloat16,
    device_map="auto",
    trust_remote_code=True,
    attn_implementation="sdpa",
)

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token  # Set padding token
tokenizer.padding_side = "right"  # Padding on the right for generation
instruct_tokenizer = AutoTokenizer.from_pretrained(instruct_model_name) # Because we want instruct's chat template

print(f"Model loaded! Parameters: {model.num_parameters():,}")

## Dataset prep

In [None]:
# Load and prepare training dataset
print("=== PREPARING DATASET ===\n")

# Option 1: Use SmolTalk2
train_dataset = load_from_disk("/mimer/NOBACKUP/Datasets/LLM/huggingface/datasets/HuggingFaceTB___smoltalk2_SFT")

# To download and save a subset:
# dataset = load_dataset("HuggingFaceTB/smoltalk2", "SFT")
# train_dataset = dataset["smoltalk_everyday_convs_reasoning_Qwen3_32B_think"].select(range(1000))  # Use subset for faster training
# train_dataset.save_to_disk("/mimer/NOBACKUP/Datasets/LLM/huggingface/datasets/HuggingFaceTB___smoltalk2_SFT")

# Option 2: Use your own processed dataset from Data pipelines lesson
# train_dataset = load_dataset("jayant-yadav/gsm8k_sft_llmworkshop",split="train")

print(f"Training examples: {len(train_dataset)}")
print(f"Example: {train_dataset[0]}")

# Prepare the dataset for SFT
def format_chat_template(example):
    """Format the messages using the chat template"""
    if "messages" in example:
        # SmolTalk2 format
        messages = example["messages"]
    else:
        # Custom format - adapt as needed
        messages = [
            {"role": "user", "content": example["instruction"]},
            {"role": "assistant", "content": example["response"]}
        ]
    
    # Apply chat template
    text = instruct_tokenizer.apply_chat_template(
        messages, 
        tokenize=False,
        add_generation_prompt=False
    )
    return {"text": text}

# Apply formatting
formatted_dataset = train_dataset.map(format_chat_template)
formatted_dataset = formatted_dataset.remove_columns(
    [col for col in formatted_dataset.column_names if col != "text"]
)
print(f"Formatted example: {formatted_dataset[0]['text'][:200]}...")

## Training Configuration

In [None]:
# Configure training parameters
training_config = SFTConfig(
    # Model and data
    output_dir=f"./{new_model_name}",
    dataset_text_field="text",
    max_length=2048,
    
    # Training hyperparameters
    per_device_train_batch_size=2,  # Adjust based on your GPU memory
    gradient_accumulation_steps=2,
    learning_rate=5e-5,
    num_train_epochs=1,  # Start with 1 epoch
    max_steps=250,  # Limit steps for demo. Try 500 for smaller model
    
    # Optimization
    warmup_steps=50,
    weight_decay=0.01,
    optim="adamw_torch",
    
    # Logging and saving
    logging_steps=10,
    save_steps=100,
    eval_steps=100,
    save_total_limit=2,
    
    # Memory optimization
    dataloader_num_workers=8,
    group_by_length=True,  # Group similar length sequences
    
    # Hugging Face Hub integration
    push_to_hub=False,  
)

print("Training configuration set!")
print(f"Effective batch size: {training_config.per_device_train_batch_size * training_config.gradient_accumulation_steps}")

## Train 

In [None]:
# 3B model takes 8 mins
trainer = SFTTrainer(
    model=model,
    train_dataset=formatted_dataset,
    args=training_config,
)

trainer.train()

## Test your model

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
from trl import SFTTrainer, SFTConfig
from datasets import load_dataset
import torch

USE_GPU = True

#Test SmolLM3's reasoning capabilities

if not USE_GPU:
    sft_model_name = f"./{new_model_name}/checkpoint-500/"
else:
    sft_model_name = f"./{new_model_name}/checkpoint-250/"


sft_model = AutoModelForCausalLM.from_pretrained(
    sft_model_name,
    dtype=torch.bfloat16,
    device_map="auto",
    trust_remote_code=True
)

sft_tokenizer = AutoTokenizer.from_pretrained(sft_model_name)
sft_tokenizer.pad_token = sft_tokenizer.eos_token  # Set padding token
sft_tokenizer.padding_side = "right"  # Padding on the right for generation

reasoning_prompts = [
    "What is 15 Ã— 24? Show your work.",
    "A recipe calls for 2 cups of flour for 12 cookies. How much flour is needed for 30 cookies?",
    "If I have $50 and spend $18.75 on lunch and $12.30 on a book, how much money do I have left?"
]

print("=== TESTING REASONING CAPABILITIES ===\n")

print("ðŸ¤– BASE MODEL RESPONSE:")

for i, prompt in enumerate(reasoning_prompts, 1):
    print(f"Problem {i}: {prompt}")

    # Prepare the prompt for base model (no chat template)
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=300,
            temperature=0.7,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id
        )
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        print(f"Answer {i}:\n")
        print(response[len(prompt):])
        
    print("\n" + "-"*50 + "\n")
    
print("\n" + "="*50)

print("ðŸ¤– INSTRUCT MODEL RESPONSE:")

for i, prompt in enumerate(reasoning_prompts, 1):
    print(f"Problem {i}: {prompt}")
    
    messages = [{"role": "user", "content": prompt}]
    formatted_prompt = instruct_tokenizer.apply_chat_template(
        messages, tokenize=False, add_generation_prompt=True
    )
    inputs = sft_tokenizer(formatted_prompt, return_tensors="pt").to("cuda")

    with torch.no_grad():
        outputs = sft_model.generate(
            **inputs,
            max_new_tokens=500,
            temperature=0.7,
            do_sample=True,
            pad_token_id=sft_tokenizer.eos_token_id
        )
        response = sft_tokenizer.decode(outputs[0], skip_special_tokens=True)
        print(f"Answer {i}:\n")
        print(response[len(prompt):]) # Show only the generated part
        
    print("\n" + "-"*50 + "\n")




## Optional: Train with LoRA/PEFT

In [None]:
# LoRA configuration with PEFT
from peft import LoraConfig

peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

# Create SFTTrainer with LoRA enabled
from trl import SFTTrainer

lora_trainer = SFTTrainer(
    model=model,
    train_dataset=formatted_dataset,  # dataset with a "text" field or messages + dataset_text_field in config
    args=training_config,
    peft_config=peft_config,  # << enable LoRA
)

print("Starting LoRA trainingâ€¦")
lora_trainer.train()