In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, DataCollatorForSeq2Seq
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer
import pandas as pd
from datasets import Dataset


In [2]:
torch.cuda.empty_cache()
# 定义模型名称
model_name = "/root/autodl-tmp/deepseek-r1-distill-llama-8b"  # 模型文件夹路径

# Tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token_id = tokenizer.eos_token_id
tokenizer.padding_side = 'right'  # 确保 padding_side 为 'right'

# 加载模型
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map='auto',  # 自动分块加载模型
    trust_remote_code=True, # 确保加载自定义代码
    torch_dtype=torch.float16  # 使用 float16 数据类型
)

# LoRA配置
lora_config = LoraConfig(
    task_type="CAUSAL_LM",  # 微调模型为自回归模型
    r=16,  # LoRA 低秩分解的秩
    lora_alpha=32,  # LoRA 缩放因子
    target_modules=["q_proj", "v_proj"],  # 目标模块，根据LLaMA3模型结构指定
    lora_dropout=0.05,  # Dropout 概率
    bias="none",  # 不训练 bias
    init_lora_weights=True,  # 初始化 LoRA 层权重
    inference_mode=False  # 允许训练
)

# 将LoRA配置应用到模型
model = get_peft_model(model, lora_config)

# 定义训练参数
training_arguments = TrainingArguments(
    output_dir="./Llama3_8b_LoRA_2025",
    logging_dir="./logs",  # 设置日志文件保存目录
    eval_strategy="no",
    optim="paged_adamw_8bit",
    per_device_train_batch_size=1,  # 减小批量大小
    gradient_accumulation_steps=4,  # 增加梯度累积步骤
    per_device_eval_batch_size=4,
    log_level="debug",
    save_strategy="epoch",
    logging_steps=80,
    learning_rate=1e-4,
    fp16=False,  # 启用 FP16 混合精度
    bf16=False,
    num_train_epochs=6,
    warmup_ratio=0.1,
    lr_scheduler_type="linear",
)

# 数据预处理
def process_func(example):
    MAX_LENGTH = 384
    input_ids, attention_mask, labels = [], [], []
    instruction = tokenizer(f"User: {example['instruction']} {example['input']}\n\n", add_special_tokens=False)
    response = tokenizer(f"Assistant: {example['output']}{tokenizer.eos_token}", add_special_tokens=False)
    input_ids = instruction["input_ids"] + response["input_ids"]
    attention_mask = instruction["attention_mask"] + response["attention_mask"]
    labels = [-100] * len(instruction["input_ids"]) + response["input_ids"]
    if len(input_ids) > MAX_LENGTH:  # 截断
        input_ids = input_ids[:MAX_LENGTH]
        attention_mask = attention_mask[:MAX_LENGTH]
        labels = labels[:MAX_LENGTH]
    return {
        "input_ids": input_ids,
        "attention_mask": attention_mask,
        "labels": labels
    }

# 加载数据
df = pd.read_json('./huanhuan.json')
ds = Dataset.from_pandas(df)
tokenized_id = ds.map(process_func, remove_columns=ds.column_names)

tokenizer.padding_side = 'right'
# 创建训练器
trainer = SFTTrainer(
    model=model,
    args=training_arguments,
    train_dataset=tokenized_id,
    data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer, padding=True),
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Map:   0%|          | 0/3729 [00:00<?, ? examples/s]



In [3]:
# 使用训练前的模型回答问题
def generate_response(model, tokenizer, prompt, max_length=128):
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(**inputs, max_length=max_length)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# 示例问题
# question = "What is the capital of France?"
question = "大理寺少卿，也不是什么高官。"
print("Answer before training:")
print(generate_response(model, tokenizer, question))

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Answer before training:
大理寺少卿，也不是什么高官。是不是有些误解？

大理寺少卿，听起来像是小说里的角色，或者是某个地方的别称。可是用户说他不是什么高官，这让我有点困惑。可能是因为“少卿”这个词让我联想到“少卿”在古代指的是高官之子的意思，所以误以为大理寺少卿是高官。但实际上，可能在现代或者某些特定语境下，“少卿”指的是普通人，或者是一个虚构的角色名


In [5]:
try:
    # 开始训练
    train_result = trainer.train()
    # 训练完成后保存训练指标
    metrics = train_result.metrics
    trainer.log_metrics("train", metrics)
    trainer.save_metrics("train", metrics)
    trainer.save_state()
except Exception as e:
    print(f"训练过程中出现异常: {e}")
    # 可以在这里添加一些保存当前模型状态的逻辑，以便后续恢复训练
else:
    # 保存模型
    trainer.save_model("./Llama3_8b_LoRA")

Currently training with a batch size of: 1
***** Running training *****
  Num examples = 3,729
  Num Epochs = 6
  Instantaneous batch size per device = 1
  Total train batch size (w. parallel, distributed & accumulation) = 4
  Gradient Accumulation steps = 4
  Total optimization steps = 5,592
  Number of trainable parameters = 6,815,744


Step,Training Loss
80,4.4787
160,3.4671
240,3.1545
320,2.9969
400,2.9762
480,2.9497
560,2.878
640,2.9446
720,2.8126
800,2.8027


Saving model checkpoint to ./Llama3_8b_LoRA_2025/checkpoint-932
loading configuration file /root/autodl-tmp/deepseek-r1-distill-llama-8b/config.json
Model config LlamaConfig {
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 128000,
  "eos_token_id": 128001,
  "head_dim": 128,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 14336,
  "max_position_embeddings": 131072,
  "mlp_bias": false,
  "model_type": "llama",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 8,
  "pretraining_tp": 1,
  "rms_norm_eps": 1e-05,
  "rope_scaling": {
    "factor": 8.0,
    "high_freq_factor": 4.0,
    "low_freq_factor": 1.0,
    "original_max_position_embeddings": 8192,
    "rope_type": "llama3"
  },
  "rope_theta": 500000.0,
  "tie_word_embeddings": false,
  "torch_dtype": "bfloat16",
  "transformers_version": "4.46.0",
  "use_cache": true,
  "vocab_si

***** train metrics *****
  epoch                    =     5.9984
  total_flos               = 56425811GF
  train_loss               =     2.3528
  train_runtime            = 0:30:59.54
  train_samples_per_second =     12.032
  train_steps_per_second   =      3.007


In [6]:
# 使用训练后的模型回答问题
def generate_response(model, tokenizer, prompt, max_length=128):
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(**inputs, max_length=max_length, eos_token_id=tokenizer.eos_token_id)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

question = "大理寺少卿，也不是什么高官。"
print("Answer after training:")
print(generate_response(model, tokenizer, question))

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Answer after training:
大理寺少卿，也不是什么高官。王爷是大理寺少卿的上司，王爷对他却有这样的看法，是不是说他没有什么出息？王爷是何等的眼光。
