In [None]:
# AutoDL加速huggingface连接  [JupyterLab使用]
import subprocess
import os

result = subprocess.run('bash -c "source /etc/network_turbo && env | grep proxy"', shell=True, capture_output=True,
                        text=True)
output = result.stdout
for line in output.splitlines():
    if '=' in line:
        var, value = line.split('=', 1)
        os.environ[var] = value

In [None]:
#1. 安装微调库
# %%capture
import torch

major_version, minor_version = torch.cuda.get_device_capability()
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps xformers trl peft accelerate bitsandbytes

In [None]:
#2. 加载模型
from unsloth import FastLanguageModel
import torch

max_seq_length = 2048
dtype = None
load_in_4bit = True
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/llama-3-8b-Instruct-bnb-4bit",
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)

In [None]:
#3. 微调前测试
alpaca_prompt = """
{}
### Instruction:
{}
### Input:
{}
### Response 1:
{}
### Response 2:
{}
### Choice & Reason:
{}
"""
FastLanguageModel.for_inference(model)
inputs = tokenizer(
    [
        alpaca_prompt.format(
            # prompt
            "Below are two responses for a given task. The task is defined by the Instruction. As a judge, evaluate the responses and provide the following:\n1. Which response do you think is better (Response 1 or Response 2)?\n2. Explain why you think this response is better.\nPlease ensure that both the choice and reason remain consistent upon repeated. inquiries.",
            # instruction
            "Give three tips for staying healthy.",
            # input 
            "",
            # response 1
            "1. Eat a balanced and nutritious diet.\n2. Get regular exercise.\n3. Get enough sleep.",
            # response 2
            "1. Eat a balanced diet with plenty of fruits, vegetables, and whole grains.\n2. Get regular physical activity, such as walking, jogging, or swimming.\n3. Get enough sleep and practice healthy sleeping habits.",
            # output (choice & reason)
            "",
        )
    ], return_tensors="pt").to("cuda")
from transformers import TextStreamer

text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer=text_streamer, max_new_tokens=128)


In [None]:
#4. 准备微调数据集
from IPython.core.debugger import set_trace

EOS_TOKEN = tokenizer.eos_token  # 必须添加 EOS_TOKEN


def formatting_prompts_func(examples):
    prompts = examples["prompt"]
    instructions = examples["instruction"]
    inputs = examples["input"]
    response_1s = examples["response_1"]
    response_2s = examples["response_2"]
    outputs = examples["output"]
    texts = []
    for prompt, instruction, input, response_1, response_2, evaluation, output in zip(prompts, instructions, inputs,
                                                                                      response_1s, response_2s,
                                                                                      outputs):
        # 必须添加EOS_TOKEN，否则无限生成
        text = alpaca_prompt.format(
            prompt, instruction, input, response_1, response_2, output) + EOS_TOKEN
        texts.append(text)
    return {"text": texts, }


pass

from datasets import load_dataset

dataset = load_dataset('json', data_files='autodl-tmp/pandalm_after_preprocess_v3.json')
dataset = dataset.map(formatting_prompts_func, batched=True, )

In [None]:
#5. 设置训练参数
from trl import SFTTrainer
from transformers import TrainingArguments

model = FastLanguageModel.get_peft_model(
    model,
    r=16,  #  建议 8, 16, 32, 64, 128
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj", ],
    lora_alpha=32,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",  # 检查点，长上下文度
    random_state=3407,
    use_rslora=False,
    loftq_config=None,
)

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset['train'],
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    packing=False,  # 可以让短序列的训练速度提高5倍。
    args=TrainingArguments(
        per_device_train_batch_size=32,
        gradient_accumulation_steps=4,
        warmup_steps=100,
        max_steps=2100,  # 微调步数
        learning_rate=3e-4,  # 学习率
        fp16=not torch.cuda.is_bf16_supported(),
        bf16=torch.cuda.is_bf16_supported(),
        logging_steps=1,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",
    ),
)

In [None]:
#6. 开始训练
trainer_stats = trainer.train()

# 从checkpoints继续训练
# checkpoint_path = "outputs/checkpoint-2000"
# trainer_stats = trainer.train(resume_from_checkpoint=checkpoint_path)

In [None]:
# 7.测试微调效果
FastLanguageModel.for_inference(model)
inputs = tokenizer(
    [
        alpaca_prompt.format(
            # prompt
            "Below are two responses for a given task. The task is defined by the Instruction. As a judge, evaluate the responses and provide the following:\n1. Which response do you think is better (Response 1 or Response 2)?\n2. Explain why you think this response is better.\nPlease ensure that both the choice and reason remain consistent upon repeated. inquiries.",
            # instruction
            "Give three tips for staying healthy.",
            # input 
            "",
            # response 1
            "1. Eat a balanced and nutritious diet.\n2. Get regular exercise.\n3. Get enough sleep.",
            # response 2
            "1. Eat a balanced diet with plenty of fruits, vegetables, and whole grains.\n2. Get regular physical activity, such as walking, jogging, or swimming.\n3. Get enough sleep and practice healthy sleeping habits.",
            # output (choice & reason)
            "",
        )
    ], return_tensors="pt").to("cuda")
from transformers import TextStreamer

text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer=text_streamer, max_new_tokens=128)

In [None]:
#8. 保存模型
# 保存LoRA模型
model.save_pretrained("autodl-tmp/lora_model")  # Local saving
# 保存tokenizer
tokenizer.save_pretrained("autodl-tmp/tokenizer")
# 合并模型并量化成4位gguf保存
model.save_pretrained_gguf("autodl-tmp/model", tokenizer, quantization_method="q4_k_m")