# PEFT 进阶操作

In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = "5"

## 1. 自定义模型适配

In [2]:
import torch
from torch import nn
from peft import LoraConfig, get_peft_model, PeftModel 
# LoraConfig设置Lora的相关参数；get_peft_model向模型中加入未经训练的Lora；PeftModel用于加载训练好的Lora模型然后与原模型合并（model.merge_and_unload()）

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
net1 = nn.Sequential(
    nn.Linear(10, 10),
    nn.ReLU(),
    nn.Linear(10, 2)
)
net1

Sequential(
  (0): Linear(in_features=10, out_features=10, bias=True)
  (1): ReLU()
  (2): Linear(in_features=10, out_features=2, bias=True)
)

In [4]:
for name, param in net1.named_parameters():
    print(name, param.dtype)

0.weight torch.float32
0.bias torch.float32
2.weight torch.float32
2.bias torch.float32


In [5]:
config = LoraConfig(target_modules=["0"]) # 第0层的名称就是"0"，task_type因为是自定义的模型所以不存在
config

LoraConfig(peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type=None, inference_mode=False, r=8, target_modules={'0'}, lora_alpha=8, lora_dropout=0.0, fan_in_fan_out=False, bias='none', modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None, rank_pattern={}, alpha_pattern={})

In [6]:
model1 = get_peft_model(net1, config)

In [7]:
model1

PeftModel(
  (base_model): LoraModel(
    (model): Sequential(
      (0): Linear(
        in_features=10, out_features=10, bias=True
        (lora_dropout): ModuleDict(
          (default): Identity()
        )
        (lora_A): ModuleDict(
          (default): Linear(in_features=10, out_features=8, bias=False)
        )
        (lora_B): ModuleDict(
          (default): Linear(in_features=8, out_features=10, bias=False)
        )
        (lora_embedding_A): ParameterDict()
        (lora_embedding_B): ParameterDict()
      )
      (1): ReLU()
      (2): Linear(in_features=10, out_features=2, bias=True)
    )
  )
)

## 2. 多适配器加载与切换

可以对每个任务都使用不同的Lora，然后根据具体任务进行加载与切换，太妙了！！！

In [8]:
net2 = nn.Sequential(
    nn.Linear(10, 10),
    nn.ReLU(),
    nn.Linear(10, 2)
)
net2

Sequential(
  (0): Linear(in_features=10, out_features=10, bias=True)
  (1): ReLU()
  (2): Linear(in_features=10, out_features=2, bias=True)
)

In [9]:
config1 = LoraConfig(target_modules=["0"])
model2 = get_peft_model(net2, config1)
model2.save_pretrained("./loraA") # loraA和loraB不是一对Lora矩阵

In [10]:
config2 = LoraConfig(target_modules=["2"])
model2 = get_peft_model(net2, config2)
model2.save_pretrained("./loraB") # 而是两个不同的Lora

In [11]:
net2 # get_peft_model同样会影响net2，只不过net2没有PeftModel、LoraModel的包覆

Sequential(
  (0): Linear(
    in_features=10, out_features=10, bias=True
    (lora_dropout): ModuleDict(
      (default): Identity()
    )
    (lora_A): ModuleDict(
      (default): Linear(in_features=10, out_features=8, bias=False)
    )
    (lora_B): ModuleDict(
      (default): Linear(in_features=8, out_features=10, bias=False)
    )
    (lora_embedding_A): ParameterDict()
    (lora_embedding_B): ParameterDict()
  )
  (1): ReLU()
  (2): Linear(
    in_features=10, out_features=2, bias=True
    (lora_dropout): ModuleDict(
      (default): Identity()
    )
    (lora_A): ModuleDict(
      (default): Linear(in_features=10, out_features=8, bias=False)
    )
    (lora_B): ModuleDict(
      (default): Linear(in_features=8, out_features=2, bias=False)
    )
    (lora_embedding_A): ParameterDict()
    (lora_embedding_B): ParameterDict()
  )
)

In [12]:
model2 # 注意与net2之间的区别：PeftModel、LoraModel

PeftModel(
  (base_model): LoraModel(
    (model): Sequential(
      (0): Linear(
        in_features=10, out_features=10, bias=True
        (lora_dropout): ModuleDict(
          (default): Identity()
        )
        (lora_A): ModuleDict(
          (default): Linear(in_features=10, out_features=8, bias=False)
        )
        (lora_B): ModuleDict(
          (default): Linear(in_features=8, out_features=10, bias=False)
        )
        (lora_embedding_A): ParameterDict()
        (lora_embedding_B): ParameterDict()
      )
      (1): ReLU()
      (2): Linear(
        in_features=10, out_features=2, bias=True
        (lora_dropout): ModuleDict(
          (default): Identity()
        )
        (lora_A): ModuleDict(
          (default): Linear(in_features=10, out_features=8, bias=False)
        )
        (lora_B): ModuleDict(
          (default): Linear(in_features=8, out_features=2, bias=False)
        )
        (lora_embedding_A): ParameterDict()
        (lora_embedding_B): Parameter

In [13]:
net2 = nn.Sequential(
    nn.Linear(10, 10),
    nn.ReLU(),
    nn.Linear(10, 2)
)
net2

Sequential(
  (0): Linear(in_features=10, out_features=10, bias=True)
  (1): ReLU()
  (2): Linear(in_features=10, out_features=2, bias=True)
)

In [14]:
model2 = PeftModel.from_pretrained(net2, model_id="./loraA/", adapter_name="loraA") # adapter_name对适配器进行命名，不指定会默认为default！
# 注意将这个loraA与实际的lora_A相区分！
model2

PeftModel(
  (base_model): LoraModel(
    (model): Sequential(
      (0): Linear(
        in_features=10, out_features=10, bias=True
        (lora_dropout): ModuleDict(
          (loraA): Identity()
        )
        (lora_A): ModuleDict(
          (loraA): Linear(in_features=10, out_features=8, bias=False)
        )
        (lora_B): ModuleDict(
          (loraA): Linear(in_features=8, out_features=10, bias=False)
        )
        (lora_embedding_A): ParameterDict()
        (lora_embedding_B): ParameterDict()
      )
      (1): ReLU()
      (2): Linear(in_features=10, out_features=2, bias=True)
    )
  )
)

In [15]:
model2.load_adapter("./loraB/", adapter_name="loraB") # 此时model2已经是PeftModel类型了，可以直接load_adapter而不用from_pretrained
model2 # 注意必须指定adapter_name，因为之前已经有adapter被合并了

PeftModel(
  (base_model): LoraModel(
    (model): Sequential(
      (0): Linear(
        in_features=10, out_features=10, bias=True
        (lora_dropout): ModuleDict(
          (loraA): Identity()
        )
        (lora_A): ModuleDict(
          (loraA): Linear(in_features=10, out_features=8, bias=False)
        )
        (lora_B): ModuleDict(
          (loraA): Linear(in_features=8, out_features=10, bias=False)
        )
        (lora_embedding_A): ParameterDict()
        (lora_embedding_B): ParameterDict()
      )
      (1): ReLU()
      (2): Linear(
        in_features=10, out_features=2, bias=True
        (lora_dropout): ModuleDict(
          (loraB): Identity()
        )
        (lora_A): ModuleDict(
          (loraB): Linear(in_features=10, out_features=8, bias=False)
        )
        (lora_B): ModuleDict(
          (loraB): Linear(in_features=8, out_features=2, bias=False)
        )
        (lora_embedding_A): ParameterDict()
        (lora_embedding_B): ParameterDict()
     

In [16]:
model2.active_adapter # 活跃的adapter默认是loraA

'loraA'

In [17]:
model2(torch.arange(0, 10).view(1, 10).float()) # 注意此时loraA的lora_B为0，所以Lora其实并没有什么卵用
# 默认处于eval状态

tensor([[-0.2133, -0.2173]])

In [18]:
for name, param in model2.named_parameters():
    print(name, param)

base_model.model.0.weight Parameter containing:
tensor([[ 0.1104, -0.1041, -0.0983, -0.1713, -0.1536,  0.2697, -0.1695,  0.2809,
          0.2345,  0.1055],
        [ 0.0013,  0.0923,  0.0483, -0.3077,  0.2787, -0.0782,  0.2527,  0.2126,
          0.2008, -0.3018],
        [ 0.0544, -0.2970, -0.0767, -0.0035, -0.2246,  0.2156, -0.0449,  0.1619,
         -0.1122, -0.2874],
        [-0.1588,  0.2359,  0.2355,  0.2573,  0.0462,  0.2305, -0.1351, -0.1660,
          0.2975, -0.1043],
        [-0.2665, -0.2501,  0.1441,  0.0415,  0.1242, -0.1810, -0.0929, -0.2947,
         -0.1736, -0.1115],
        [-0.2274,  0.0383,  0.0497,  0.2918, -0.1197, -0.0032,  0.0781, -0.1618,
         -0.2034,  0.1531],
        [ 0.0616, -0.0254, -0.1494, -0.1608, -0.1331, -0.0720,  0.0710, -0.2306,
          0.0889, -0.1325],
        [-0.1672,  0.0955,  0.1939, -0.1365, -0.1152,  0.0280, -0.2409,  0.2064,
          0.0816, -0.2088],
        [-0.2774,  0.2275, -0.0322,  0.2042,  0.0296,  0.2570, -0.0759,  0.2561,

In [19]:
for name, param in model2.named_parameters():
    if name in ["base_model.model.0.lora_A.loraA.weight", "base_model.model.0.lora_B.loraA.weight"]:
        param.data = torch.ones_like(param)

In [20]:
model2(torch.arange(0, 10).view(1, 10).float()) # 现在loraA才算有点用处

tensor([[57.0814, 11.6204]])

In [21]:
model2.set_adapter("loraB") # 将活跃的adapter设置为loraB，变相禁用了loraA

In [22]:
model2.active_adapter

'loraB'

In [23]:
model2(torch.arange(0, 10).view(1, 10).float()) # 注意此时loraB的lora_B也为0，所以Lora其实也并没有什么卵用

tensor([[-0.2133, -0.2173]], grad_fn=<AddBackward0>)

## 3. 禁用适配器

In [24]:
model2.set_adapter("loraA")

In [25]:
model2(torch.arange(0, 10).view(1, 10).float())

tensor([[57.0814, 11.6204]], grad_fn=<AddmmBackward0>)

In [26]:
with model2.disable_adapter(): # Lora等adapter其实是在这里禁用的！之前都必须有某个adapter活跃
    print(model2(torch.arange(0, 10).view(1, 10).float()))

tensor([[-0.2133, -0.2173]])
