In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = "5"

In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer

from peft import PeftModel

  from .autonotebook import tqdm as notebook_tqdm


## 加载基础模型

In [3]:
model = AutoModelForCausalLM.from_pretrained("/data/PLM/bloom-1b4-zh", low_cpu_mem_usage=True)
tokenizer = AutoTokenizer.from_pretrained("/data/PLM/bloom-1b4-zh")

## 加载Lora模型

In [4]:
p_model = PeftModel.from_pretrained(model, model_id="./chatbot/checkpoint-500/") # 先加载训练好的Lora模型然后与原模型合并，这就是Lora训练完成后的模型！
p_model # 可以看到最外层还是PeftModelForCausalLM和LoraModel

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): BloomForCausalLM(
      (transformer): BloomModel(
        (word_embeddings): ModulesToSaveWrapper(
          (original_module): Embedding(46145, 2048)
          (modules_to_save): ModuleDict(
            (default): Embedding(46145, 2048)
          )
        )
        (word_embeddings_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
        (h): ModuleList(
          (0-23): 24 x BloomBlock(
            (input_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
            (self_attention): BloomAttention(
              (query_key_value): Linear(
                in_features=2048, out_features=6144, bias=True
                (lora_dropout): ModuleDict(
                  (default): Identity()
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=2048, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
           

In [5]:
ipt = tokenizer("Human: {}\n{}".format("考试有哪些技巧？", "").strip() + "\n\nAssistant: ", return_tensors="pt")
tokenizer.decode(p_model.generate(**ipt, do_sample=False)[0], skip_special_tokens=True)



'Human: 考试有哪些技巧？\n\nAssistant: 考试技巧有很多，比如：\n'

## 模型合并

In [6]:
merge_model = p_model.merge_and_unload() # 其实只要Lora训练完成后model.merge_and_unload()就行
merge_model # 合并后就只有BloomForCausalLM，而没有PeftModelForCausalLM和LoraModel

BloomForCausalLM(
  (transformer): BloomModel(
    (word_embeddings): Embedding(46145, 2048)
    (word_embeddings_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
    (h): ModuleList(
      (0-23): 24 x BloomBlock(
        (input_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
        (self_attention): BloomAttention(
          (query_key_value): Linear(in_features=2048, out_features=6144, bias=True)
          (dense): Linear(in_features=2048, out_features=2048, bias=True)
          (attention_dropout): Dropout(p=0.0, inplace=False)
        )
        (post_attention_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
        (mlp): BloomMLP(
          (dense_h_to_4h): Linear(in_features=2048, out_features=8192, bias=True)
          (gelu_impl): BloomGelu()
          (dense_4h_to_h): Linear(in_features=8192, out_features=2048, bias=True)
        )
      )
    )
    (ln_f): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
  )
  (l

In [7]:
ipt = tokenizer("Human: {}\n{}".format("考试有哪些技巧？", "").strip() + "\n\nAssistant: ", return_tensors="pt")
tokenizer.decode(merge_model.generate(**ipt, do_sample=False)[0], skip_special_tokens=True)

'Human: 考试有哪些技巧？\n\nAssistant: 考试技巧有很多，比如：\n'

## 完整模型保存

In [9]:
merge_model.save_pretrained("./chatbot/merge_model")