- references
    - https://huggingface.co/docs/peft/developer_guides/low_level_api

In [3]:
import inspect
import torch
from torch import nn
from peft import inject_adapter_in_model, LoraConfig

[2024-04-04 14:13:19,645] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)


In [4]:
class DummyModel(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.embedding = torch.nn.Embedding(100, 1000)
        self.linear = torch.nn.Linear(1000, 1000)
        self.lm_head = torch.nn.Linear(1000, 100)

    def forward(self, input_ids):
        x = self.embedding(input_ids)
        x = self.linear(x)
        x = self.lm_head(x)
        return x

In [5]:
lora_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    target_modules=["linear"],
)

In [6]:
lora_config

LoraConfig(peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type=None, inference_mode=False, r=64, target_modules={'linear'}, lora_alpha=16, lora_dropout=0.1, fan_in_fan_out=False, bias='none', use_rslora=False, modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None, rank_pattern={}, alpha_pattern={}, megatron_config=None, megatron_core='megatron.core', loftq_config={}, use_dora=False, layer_replication=None)

## dummy model

In [7]:
model = DummyModel()
model

DummyModel(
  (embedding): Embedding(100, 1000)
  (linear): Linear(in_features=1000, out_features=1000, bias=True)
  (lm_head): Linear(in_features=1000, out_features=100, bias=True)
)

In [8]:
type(model.linear)

torch.nn.modules.linear.Linear

In [9]:
dummy_inputs = torch.LongTensor([[0, 1, 2, 3, 4, 5, 6, 7]])
model(dummy_inputs).shape

torch.Size([1, 8, 100])

## lora model

In [10]:
model = inject_adapter_in_model(lora_config, model)
model

DummyModel(
  (embedding): Embedding(100, 1000)
  (linear): lora.Linear(
    (base_layer): Linear(in_features=1000, out_features=1000, bias=True)
    (lora_dropout): ModuleDict(
      (default): Dropout(p=0.1, inplace=False)
    )
    (lora_A): ModuleDict(
      (default): Linear(in_features=1000, out_features=64, bias=False)
    )
    (lora_B): ModuleDict(
      (default): Linear(in_features=64, out_features=1000, bias=False)
    )
    (lora_embedding_A): ParameterDict()
    (lora_embedding_B): ParameterDict()
  )
  (lm_head): Linear(in_features=1000, out_features=100, bias=True)
)

In [11]:
type(model.embedding), type(model.linear)

(torch.nn.modules.sparse.Embedding, peft.tuners.lora.layer.Linear)

In [12]:
isinstance(model.linear, nn.Module)

True

In [13]:
inspect.getmro(type(model.linear))

(peft.tuners.lora.layer.Linear,
 torch.nn.modules.module.Module,
 peft.tuners.lora.layer.LoraLayer,
 peft.tuners.tuners_utils.BaseTunerLayer,
 abc.ABC,
 object)

In [14]:
# result = result + lora_B(lora_A(dropout(x))) * scaling
model.linear.forward??

[0;31mSignature:[0m [0mmodel[0m[0;34m.[0m[0mlinear[0m[0;34m.[0m[0mforward[0m[0;34m([0m[0mx[0m[0;34m:[0m [0;34m'torch.Tensor'[0m[0;34m,[0m [0;34m*[0m[0margs[0m[0;34m:[0m [0;34m'Any'[0m[0;34m,[0m [0;34m**[0m[0mkwargs[0m[0;34m:[0m [0;34m'Any'[0m[0;34m)[0m [0;34m->[0m [0;34m'torch.Tensor'[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Defines the computation performed at every call.

Should be overridden by all subclasses.

.. note::
    Although the recipe for forward pass needs to be defined within
    this function, one should call the :class:`Module` instance afterwards
    instead of this since the former takes care of running the
    registered hooks while the latter silently ignores them.
[0;31mSource:[0m   
    [0;32mdef[0m [0mforward[0m[0;34m([0m[0mself[0m[0;34m,[0m [0mx[0m[0;34m:[0m [0mtorch[0m[0;34m.[0m[0mTensor[0m[0;34m,[0m [0;34m*[0m[0margs[0m[0;34m:[0m [0mAny[0m[0;34m,[0m [0;34m**[0m[0mkwar

## save

In [45]:
from peft import get_peft_model_state_dict

In [48]:
get_peft_model_state_dict(model)

{'linear.lora_A.weight': tensor([[-0.0066, -0.0240,  0.0164,  ..., -0.0070,  0.0205,  0.0312],
         [-0.0244, -0.0162, -0.0250,  ...,  0.0038, -0.0161, -0.0171],
         [ 0.0122, -0.0050, -0.0068,  ..., -0.0132,  0.0286,  0.0253],
         ...,
         [-0.0158, -0.0284, -0.0248,  ...,  0.0025,  0.0007,  0.0157],
         [-0.0013, -0.0223, -0.0096,  ..., -0.0131, -0.0036,  0.0033],
         [-0.0202,  0.0143,  0.0211,  ...,  0.0237, -0.0268, -0.0189]]),
 'linear.lora_B.weight': tensor([[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]])}