# Llama2微调实战-LoRA技术微调

## 步骤1 导入相关包

In [None]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, DataCollatorForSeq2Seq, TrainingArguments, Trainer
import os

# 配置环境变量
os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'
os.environ['HF_HOME'] = '/root/autodl-tmp/cache/'
os.environ['MODELSCOPE_CACHE']='/root/autodl-tmp/cache/'

#模型下载
from modelscope import snapshot_download
model_dir = snapshot_download('modelscope/Llama-2-7b-ms')

## 步骤2 加载数据集

In [None]:
ds = load_dataset("llm-wizard/alpaca-gpt4-data-zh")
ds

In [None]:
ds[:1]

## 步骤3 数据集预处理

In [None]:
#加载本地模型，提前下载到本地
tokenizer = AutoTokenizer.from_pretrained("/root/autodl-tmp/cache/modelscope/hub/modelscope/Llama-2-7b-ms")
tokenizer

In [None]:
# padding_side模式左对齐，需要修改改为右边对齐
tokenizer.padding_side = "right"
# 对齐填充设置为结束符的token (eos_token_id)
tokenizer.pad_token_id = 2

In [None]:
def process_func(example):
    MAX_LENGTH = 400
    input_ids, attention_mask, labels = [], [], []
    instruction = tokenizer("\n".join(["Human: " + example["instruction"], example["input"]]).strip() + "\n\nAssistant: ", add_special_tokens=False)
    response = tokenizer(example["output"], add_special_tokens=False)
    input_ids = instruction["input_ids"] + response["input_ids"] + [tokenizer.eos_token_id]
    attention_mask = instruction["attention_mask"] + response["attention_mask"] + [1]
    labels = [-100] * len(instruction["input_ids"]) + response["input_ids"] + [tokenizer.eos_token_id]
    if len(input_ids) > MAX_LENGTH:
        input_ids = input_ids[:MAX_LENGTH]
        attention_mask = attention_mask[:MAX_LENGTH]
        labels = labels[:MAX_LENGTH]
    return {
        "input_ids": input_ids,
        "attention_mask": attention_mask,
        "labels": labels
    }

In [None]:
tokenized_ds = ds.map(process_func, remove_columns=ds.column_names)
tokenized_ds

In [None]:
print(tokenized_ds[0]["input_ids"])

In [None]:
# 检查数据（是否包含结束符）
tokenizer.decode(list(filter(lambda x: x != -100, tokenized_ds[1]["labels"])))

## 步骤4 创建模型

In [None]:
import torch
model = AutoModelForCausalLM.from_pretrained("/root/autodl-tmp/cache/modelscope/hub/modelscope/Llama-2-7b-ms", 
                    low_cpu_mem_usage=True,torch_dtype=torch.half,device_map="auto")
model.dtype

### 1、PEFT 步骤1 配置文件

In [None]:
from peft import LoraConfig, TaskType, get_peft_model

config = LoraConfig(task_type=TaskType.CAUSAL_LM,)
config

### 2、PEFT 步骤2 创建模型

In [None]:
model = get_peft_model(model, config)
model

In [None]:
# 打印出模型中可训练参数的数量
model.print_trainable_parameters()

In [None]:
# 查看模型参数，查看LoRA层添加到哪
for name, param in model.named_parameters():
    print(name, param.shape, param.dtype)

## 步骤5 配置训练参数

In [None]:
##   adam_epsilon=1e-4
args = TrainingArguments(    
    output_dir='/root/autodl-tmp/cache/finetuning/llama-2-7b-ms-lora', # 指定模型训练结果的输出目录
    per_device_train_batch_size=2,# 设置每个设备（如GPU）在训练过程中的批次大小为2，越大需要资源也越多
    gradient_accumulation_steps=8,# 指定梯度累积步数为8，即将多个批次的梯度累加后再进行一次参数更新
    logging_steps=10,# 每10个步骤记录一次日志信息
    num_train_epochs=1,# 指定训练的总轮数为1
    adam_epsilon=1e-4 #避免精度溢出
)

## 步骤6 创建训练器

In [None]:
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tokenized_ds,
    data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer, padding=True),
)

## 步骤7 模型训练

In [None]:
trainer.train()

## 步骤8 模型推理

In [None]:
from transformers import pipeline

pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

ipt = "Human: {}\n{}".format("如何写简历？", "").strip() + "\n\nAssistant: "
print(pipe(ipt, max_length=256, do_sample=True, ))