In [None]:
torchrun --nproc_per_node 3 --nnodes 1 --node_rank 0 --master_addr localhost --master_port 6601 /root/autodl-tmp/finetune.py \
    --model_name_or_path "/root/autodl-fs/trained_models/deepseek_ri_32b_merged" \
    --data_path "/root/autodl-tmp/data/datasets/output.json" \
    --eval_data_path "/root/autodl-tmp/data/datasets/dev.json" \
    --bf16 True \
    --output_dir "/root/autodl-fs/trained_models/deepseek_ri_32b_merged_4int" \
    --num_train_epochs 1 \
    --per_device_train_batch_size 1 \
    --per_device_eval_batch_size 1 \
    --gradient_accumulation_steps 1 \
    --evaluation_strategy "steps" \
    --eval_steps  10 \
    --metric_for_best_model "eval_loss" \
    --greater_is_better False \
    --save_strategy "steps" \
    --save_steps 100 \
    --load_best_model_at_end True \
    --save_total_limit 1 \
    --learning_rate 1e-5 \
    --weight_decay 0.1 \
    --adam_beta2 0.95 \
    --warmup_ratio 0.01 \
    --lr_scheduler_type "cosine" \
    --logging_steps 1 \
    --report_to "none" \
    --model_max_length 512 \
    --gradient_checkpointing True \
    --lazy_preprocess True \
    --deepspeed "/root/autodl-tmp/deepspeed/ds_config_zero2.json" \
    --use_lora \
    --q_lora

## 权重融合


In [None]:


from transformers import AutoModelForCausalLM
from peft import PeftModel
import torch

model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-1_8B-Chat/", torch_dtype=torch.float16, device_map="auto", trust_remote_code=True)
model = PeftModel.from_pretrained(model, "output_qwen/")
merged_model = model.merge_and_unload()
merged_model.save_pretrained("output_qwen_merged", max_shard_size="2048MB", safe_serialization=True)

## 分词器

In [None]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(
    "Qwen/Qwen-1_8B-Chat-Int4/",
    trust_remote_code=True
)

tokenizer.save_pretrained("output_qwen_merged")

## 模型测试


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.generation import GenerationConfig

tokenizer = AutoTokenizer.from_pretrained("output_qwen_merged", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    "output_qwen_merged",
    device_map="auto",
    trust_remote_code=True
).eval()

response, history = model.chat(tokenizer, "你好", history=None)
print(response)