In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

cache_path = r"D:\TrainedModel"

tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-0.6B", cache_dir=cache_path)
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen3-0.6B", cache_dir=cache_path)

In [2]:
from datasets import load_dataset
ds = load_dataset("samhog/psychology-10k")

Psychology-10K.json:   0%|          | 0.00/5.21M [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Generating train split:   0%|          | 0/9846 [00:00<?, ? examples/s]

In [20]:
ds = ds.map(
    lambda x: {"prompt": x["input"], "completion": x["output"]},
    remove_columns=["instruction", "input", "output"]
)

Map:   0%|          | 0/9846 [00:00<?, ? examples/s]

In [21]:
len(ds["train"])

9846

In [23]:
ds["train"][9845]

{'prompt': "I'm having trouble making decisions and sticking to them.",
 'completion': "Indecisiveness can be a frustrating issue to deal with, but it's important to identify any underlying factors that may be contributing to this behavior. Let's work on developing strategies for improving decision-making skills and maintaining accountability for sticking to those decisions."}

In [24]:
if tokenizer.pad_token_id is None:
    tokenizer.pad_token_id = tokenizer.eos_token_id

In [25]:
from peft import LoraConfig, get_peft_model, TaskType

config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    inference_mode=False, 
    r=16, 
    lora_alpha=16, 
    lora_dropout=0.1
)
model = get_peft_model(model, config)
model.train()  

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): PeftModelForCausalLM(
      (base_model): LoraModel(
        (model): PeftModelForCausalLM(
          (base_model): LoraModel(
            (model): PeftModelForCausalLM(
              (base_model): LoraModel(
                (model): Qwen3ForCausalLM(
                  (model): Qwen3Model(
                    (embed_tokens): Embedding(151936, 1024)
                    (layers): ModuleList(
                      (0-27): 28 x Qwen3DecoderLayer(
                        (self_attn): Qwen3Attention(
                          (q_proj): lora.Linear(
                            (base_layer): Linear(in_features=1024, out_features=2048, bias=False)
                            (lora_dropout): ModuleDict(
                              (default): Dropout(p=0.1, inplace=False)
                            )
                            (lora_A): ModuleDict(
                              (default): Linear(in_features=1024, out_features=16, b

In [26]:
model.print_trainable_parameters()

trainable params: 10,092,544 || all params: 606,142,464 || trainable%: 1.6650


In [32]:
from transformers import TrainingArguments
from trl import SFTTrainer, SFTConfig

training_args = SFTConfig(
    output_dir="./SFTcheckpoints",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    learning_rate=5e-5,
    num_train_epochs=3,       
    logging_steps=5,
    save_steps=5,
    save_total_limit=2,
    fp16=True,
    # completion_only_loss=True,  # 只对回答部分算 loss（默认也如此）
)


In [33]:
trainer = SFTTrainer(
    model=model,
    train_dataset=ds["train"],
    peft_config=config,
    args=training_args,
    processing_class=tokenizer,    # trl>=0.16.0 时用它替代旧的 tokenizer 参数
)

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [34]:
trainer.train()
model.save_pretrained("./SFTuned")

  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


Step,Training Loss
5,2.229
10,2.3029
15,2.2964
20,2.1521
25,2.0145
30,1.8668
35,1.7177
40,1.588
45,1.5636
50,1.4753


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
  return Variable._execution_engine.run_backward(  # Calls into 

KeyboardInterrupt: 

In [35]:
CHECKPOINT = "./SFTcheckpoints/checkpoint-925"
FINAL_ADAPTER = "./SFTuned"

In [37]:
model = model.from_pretrained(model, CHECKPOINT)

In [38]:
model.save_pretrained(FINAL_ADAPTER)

In [40]:
tokenizer.save_pretrained(FINAL_ADAPTER)

('./SFTuned\\tokenizer_config.json',
 './SFTuned\\special_tokens_map.json',
 './SFTuned\\vocab.json',
 './SFTuned\\merges.txt',
 './SFTuned\\added_tokens.json',
 './SFTuned\\tokenizer.json')