In [22]:
!pip install hf_xet
!pip install trl
!pip install --upgrade transformers
!pip install tensorboard

Collecting tensorboard
  Downloading tensorboard-2.19.0-py3-none-any.whl.metadata (1.8 kB)
Collecting absl-py>=0.4 (from tensorboard)
  Downloading absl_py-2.3.0-py3-none-any.whl.metadata (2.4 kB)
Collecting tensorboard-data-server<0.8.0,>=0.7.0 (from tensorboard)
  Downloading tensorboard_data_server-0.7.2-py3-none-any.whl.metadata (1.1 kB)
Downloading tensorboard-2.19.0-py3-none-any.whl (5.5 MB)
   ---------------------------------------- 0.0/5.5 MB ? eta -:--:--
   ----------- ---------------------------- 1.6/5.5 MB 9.3 MB/s eta 0:00:01
   -------------------------- ------------- 3.7/5.5 MB 9.9 MB/s eta 0:00:01
   ---------------------------------------- 5.5/5.5 MB 10.2 MB/s eta 0:00:00
Downloading tensorboard_data_server-0.7.2-py3-none-any.whl (2.4 kB)
Downloading absl_py-2.3.0-py3-none-any.whl (135 kB)
Installing collected packages: tensorboard-data-server, absl-py, tensorboard

   ------------- -------------------------- 1/3 [absl-py]
   ------------- -------------------------- 1

### Import Packages

In [1]:
import os
from datasets import load_dataset, concatenate_datasets, Dataset
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from trl import SFTTrainer


### Load Datasets

In [2]:
# load tuning sets for <Sunrise on the Reaping>
sunrise_train_path = r'fine_tuning\sunrise_train_data.json'
sunrise_val_path = r'fine_tuning\sunrise_val_data.json'
sunrise_test_path = r'fine_tuning\sunrise_test_data.json'

sunrise_train_datasets = Dataset.from_json(sunrise_train_path)
sunrise_val_datasets = Dataset.from_json(sunrise_val_path)
sunrise_test_datasets = Dataset.from_json(sunrise_test_path)

In [3]:
# load tuning sets for <All Fours>
allfours_train_path = r'fine_tuning2\reasoned_qa_output\allfours_train_data.json'
allfours_val_path = r'fine_tuning2\reasoned_qa_output\allfours_val_data.json'
allfours_test_path = r'fine_tuning2\reasoned_qa_output\allfours_test_data.json'

allfours_train_datasets = Dataset.from_json(allfours_train_path)
allfours_val_datasets = Dataset.from_json(allfours_val_path)
allfours_test_datasets = Dataset.from_json(allfours_test_path)

In [4]:
# Combine two books together
train_set = concatenate_datasets([sunrise_train_datasets, allfours_train_datasets])
val_set = concatenate_datasets([sunrise_val_datasets, allfours_val_datasets])
test_set = concatenate_datasets([sunrise_test_datasets, allfours_test_datasets])

In [5]:
train_set

Dataset({
    features: ['Question', 'Answer', 'Reasoning'],
    num_rows: 958
})

### Model Tuning: Llama3.2-3B

In [6]:
qwen_model = 'Qwen/Qwen3-4B'
tokenizer = AutoTokenizer.from_pretrained(qwen_model)

In [7]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=False,
)

In [8]:
model = AutoModelForCausalLM.from_pretrained(
    qwen_model,
    quantization_config=bnb_config,
    device_map="auto" 
)
model.config.use_cache = False

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [9]:
model = prepare_model_for_kbit_training(model)

In [10]:
lora_config = LoraConfig(
    r=16, 
    lora_alpha=32,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"], 
    bias="none", 
    lora_dropout=0.05,
    task_type="CAUSAL_LM",
)

In [11]:
model = get_peft_model(model, lora_config)

In [12]:
model.print_trainable_parameters() 

trainable params: 33,030,144 || all params: 4,055,498,240 || trainable%: 0.8145


In [13]:
def formatting_prompts_func(examples):
    texts = []
    for question, answer, reasoning_list in zip(examples["Question"], examples["Answer"], examples["Reasoning"]):
        reasoning_str = "\n".join([f"Step {i+1}: {step}" for i, step in enumerate(reasoning_list)])
        formatted_text = (
            f"### Question:\n{question}\n\n"  
            f"### Thought Process:\n" 
            f"{reasoning_str}\n\n"    
            f"### Answer:\n{answer}"  
        )
        texts.append(formatted_text)
    return {"text": texts}

In [14]:
train_dataset_formatted = train_set.map( 
    formatting_prompts_func, 
    batched=True,
    remove_columns=train_set.column_names 
)
validation_dataset_formatted = val_set.map(
    formatting_prompts_func, 
    batched=True,
    remove_columns=val_set.column_names
)

In [24]:
from transformers import TrainingArguments
from trl import SFTTrainer # 确保 trl 库已导入

# 训练参数的配置
training_arguments = TrainingArguments(
    output_dir="./results",          # 模型输出目录
    num_train_epochs=3,              # 训练轮数
    per_device_train_batch_size=4,   # 训练批次大小
    per_device_eval_batch_size=4,    # 验证批次大小
    warmup_steps=500,                # 预热步数
    weight_decay=0.01,               # 权重衰减
    logging_dir="./logs",            # 日志目录
    logging_steps=10,                # 日志记录步数
    save_steps=500,                  # 模型保存步数
    eval_strategy="steps",           # 每隔 save_steps 进行一次评估
    report_to="tensorboard",         # 可以将日志报告到 TensorBoard，方便可视化训练过程
    dataloader_num_workers=0,        # 数据加载器工作进程数
    max_steps=-1,                    # 最大训练步数，-1表示根据epoch计算
)

# 初始化 SFTTrainer
trainer = SFTTrainer(
    model=model,  # 之前加载并配置好的 QLoRA 模型 (Qwen)
    train_dataset=train_dataset_formatted,  # 格式化后的训练数据集
    eval_dataset=validation_dataset_formatted,  # 格式化后的验证数据集 (强烈推荐用于监控过拟合)
    peft_config=lora_config,  # 之前配置好的 LoRA 参数
    args=training_arguments,  # 训练参数
)

print("\nTrainer 配置完成。")
print("现在你可以开始训练了： trainer.train()")

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.



Trainer 配置完成。
现在你可以开始训练了： trainer.train()


In [26]:


print("Model Tuning Start...")
trainer.train()
print("Model Tuning Completed")

# 训练完成后，保存微调后的 LoRA 适配器
# 这只会保存 LoRA 权重，而不是整个基础模型
output_model_path = "./QLoRA_Qwen" # 建议使用有意义的名称
trainer.save_model(output_model_path)
print(f"微调后的 LoRA 适配器已保存到: {output_model_path}")

Model Tuning Start...


Step,Training Loss,Validation Loss


KeyboardInterrupt: 