In [1]:
%load_ext autoreload
%autoreload 2

In [None]:
from datasets import Dataset

from trl import SFTTrainer, SFTConfig
from peft import LoraConfig
from transformers.utils import is_torch_bf16_gpu_available
from transformers import AutoModelForCausalLM

from jigsaw.settings import DATA_DIR, MODEL_DIR


train_path = DATA_DIR / "ds_no_examples_train.ds"
eval_path = DATA_DIR / "ds_no_examples_eval.ds"

train_dataset = Dataset.load_from_disk(str(train_path))
eval_dataset = Dataset.load_from_disk(str(eval_path))

lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.1,
    bias="none",
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
    task_type="CAUSAL_LM",
)

training_args = SFTConfig(
    report_to="wandb",
    num_train_epochs=3,
    adam_epsilon=1e-6,
    per_device_train_batch_size=8,
    gradient_accumulation_steps=2,
    optim="paged_adamw_8bit",
    learning_rate=1e-4,  # keep high, lora usually likes high.
    weight_decay=0.01,
    max_grad_norm=1.0,
    lr_scheduler_type="cosine",
    warmup_steps=40,
    bf16=is_torch_bf16_gpu_available(),
    fp16=not is_torch_bf16_gpu_available(),
    dataloader_pin_memory=True,
    gradient_checkpointing=True,
    gradient_checkpointing_kwargs={"use_reentrant": False},
    save_strategy="no",
    packing=False,
    remove_unused_columns=False,
    completion_only_loss=True,
)



In [3]:
train_dataset[0]

{'prompt': [{'content': "You are an unbiased moderator that classifies comments as violating a rule or not. You are moderating for the subreddit <subreddit> r/Futurology </subreddit>. The rule is <rule> No Advertising: Spam, referral links, unsolicited advertising, and promotional content are not allowed. </rule>. Does the given comment violate the rule? Respond with 'yes' or 'no'.Comment: <comment>Banks don't want you to know this! Click here to know more!</comment>",
   'role': 'user'}],
 'completion': [{'content': 'no', 'role': 'assistant'}]}

In [4]:
BASE_MODEL_PATH = "Qwen/Qwen3-0.6B"  # Model to fine-tune
LORA_PATH = MODEL_DIR / "lora_checkpoint.pt"  # Where to save LoRA weights

In [5]:
model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL_PATH, dtype="auto", device_map="auto"
)

In [6]:
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    peft_config=lora_config,
)

trainer.train()
trainer.save_model(LORA_PATH)

The model is already on multiple devices. Skipping the move to device specified in `args`.
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': None, 'pad_token_id': 151643}.
[34m[1mwandb[0m: Currently logged in as: [33mfelix-newman[0m ([33mproject-vesuvius[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss
10,1.4475
20,0.1152
30,0.092
40,0.117
50,0.0895
60,0.0891
70,0.0885
80,0.0881
90,0.0745
100,0.0587
