In [1]:
from unsloth import FastLanguageModel
import torch
from datasets import load_dataset, Dataset
from unsloth.chat_templates import standardize_sharegpt
from trl import SFTTrainer, SFTConfig
import pandas as pd
from typing import *
from transformers import DataCollatorForLanguageModeling

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
INFO 06-01 05:22:38 [__init__.py:243] Automatically detected platform cuda.


In [None]:
from typing import Any, Dict, List

import torch
from datasets import load_dataset
from loguru import logger
from unsloth import FastLanguageModel
from unsloth.chat_templates import standardize_sharegpt

RANDOM_SEED = 3407
logger.info("Loading raw splits")

# -------------------------------------------------------------------
# 0. Model / tokenizer (needed inside the converters)


# -------------------------------------------------------------------
# 1. Load datasets
# -------------------------------------------------------------------
if not "model" in dir():
    # Load model
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name="model_store/unsloth/Qwen3-0.6B-bnb-4bit",
        max_seq_length=2048,
        load_in_4bit=True,
        load_in_8bit=False,
        full_finetuning=False,
    )

    # Add LoRA adapters
    model = FastLanguageModel.get_peft_model(
        model,
        r=32,
        target_modules=[
            "q_proj",
            "k_proj",
            "v_proj",
            "o_proj",
            "gate_proj",
            "up_proj",
            "down_proj",
        ],
        lora_alpha=32,
        lora_dropout=0,
        bias="none",
        use_gradient_checkpointing="unsloth",
        random_state=3407,
        use_rslora=False,
        loftq_config=None,
    )


raw_chat = load_dataset("mlabonne/FineTome-100k", split="train")
chat_std = standardize_sharegpt(raw_chat).remove_columns(["source", "score"])


def tokenize_on_the_fly(sample: Dict[str, str]) -> Dict[str, List[int]]:
    text = tokenizer.apply_chat_template(sample['conversations'],
                                         tokenize=False)
    encoded = tokenizer(
        text,
        truncation=True,
        max_length=2048,
        padding=False,              # padding happens in the collator
        return_attention_mask=True,
    )

    # Do not add label here it is handled by the collator
    return encoded

chat_std.set_transform(tokenize_on_the_fly)
collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)
# -------------------------------------------------------------------
# 2. Trainer
# -------------------------------------------------------------------
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=chat_std,
    args=SFTConfig(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        max_steps=30,
        warmup_steps=5,
        learning_rate=2e-4,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=RANDOM_SEED,
        logging_steps=1,
        report_to="none",
        dataset_kwargs={"skip_prepare_dataset": True},
        # <-- critical line
        remove_unused_columns=False,
    ),
    data_collator=collator,
)

trainer.train()

[32m2025-05-31 19:23:31.816[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m10[0m - [1mLoading raw splits[0m
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 100,000 | Num Epochs = 1 | Total steps = 30
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 4 x 1) = 16
 "-____-"     Trainable parameters = 20,185,088/6,000,000,000 (0.34% trained)


Step,Training Loss
1,0.9411
2,1.136
3,1.0721
4,1.0644
5,0.9281
6,1.2498
7,0.9812
8,1.0533
9,1.0504
10,1.1456


TrainOutput(global_step=30, training_loss=1.1043422877788545, metrics={'train_runtime': 57.7936, 'train_samples_per_second': 8.305, 'train_steps_per_second': 0.519, 'total_flos': 1286580444069888.0, 'train_loss': 1.1043422877788545})

: 

In [None]:
DataCollatorForLanguageModeling??