- references
    - https://huggingface.co/blog/peft_merging
    - https://huggingface.co/docs/peft/developer_guides/model_merging

In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

os.environ['http_proxy'] = 'http://127.0.0.1:7890'
os.environ['https_proxy'] = 'http://127.0.0.1:7890'

In [2]:
# !pip install --upgrade peft

In [3]:
from peft import PeftConfig, PeftModel
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
import torch
import random

[2024-03-31 20:18:22,363] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)


## lora models

In [4]:
peft_model_id = "smangrul/tinyllama_lora_norobots"
device = "cuda"
config = PeftConfig.from_pretrained(peft_model_id)

In [5]:
config

LoraConfig(peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, base_model_name_or_path='TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T', revision=None, task_type='CAUSAL_LM', inference_mode=True, r=8, target_modules={'down_proj', 'v_proj', 'embed_tokens', 'q_proj', 'k_proj', 'gate_proj', 'lm_head', 'o_proj', 'up_proj'}, lora_alpha=16, lora_dropout=0.1, fan_in_fan_out=False, bias='none', use_rslora=False, modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None, rank_pattern={}, alpha_pattern={}, megatron_config=None, megatron_core='megatron.core', loftq_config={}, use_dora=False, layer_replication=None)

In [6]:
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, load_in_4bit=True, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(peft_model_id)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [7]:
len(tokenizer)

32005

In [8]:
model.config.vocab_size

32000

In [9]:
model.resize_token_embeddings(len(tokenizer))

Embedding(32005, 2048)

In [10]:
AutoTokenizer.from_pretrained("smangrul/tinyllama_lora_norobots").vocab.keys() - AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T").vocab.keys()

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


{'<pad>',
 '<|im_end|>',
 '<|im_start|>assistant',
 '<|im_start|>system',
 '<|im_start|>user'}

In [11]:
model = PeftModel.from_pretrained(model, peft_model_id, adapter_name="norobots")

In [12]:
_ = model.load_adapter("smangrul/tinyllama_lora_sql", adapter_name="sql")
_ = model.load_adapter("smangrul/tinyllama_lora_adcopy", adapter_name="adcopy")

In [13]:
model.peft_config.keys()

dict_keys(['norobots', 'sql', 'adcopy'])

In [18]:
for adapter in adapters:
    print(adapter, model.peft_config[adapter].target_modules)

norobots {'down_proj', 'v_proj', 'embed_tokens', 'q_proj', 'k_proj', 'gate_proj', 'lm_head', 'o_proj', 'up_proj'}
adcopy {'v_proj', 'k_proj', 'embed_tokens', 'q_proj', 'down_proj', 'gate_proj', 'lm_head', 'o_proj', 'up_proj'}
sql {'v_proj', 'down_proj', 'q_proj', 'k_proj', 'gate_proj', 'o_proj', 'up_proj'}


In [38]:
# model

## merge 3 adapters

- combination_type
    - [`svd`, `linear`, `cat`, `ties`, `ties_svd`, `dare_ties`, `dare_linear`, `dare_ties_svd`, `dare_linear_svd`, `magnitude_prune`, `magnitude_prune_svd`]
    - `combination_type = "linear" if len(adapters) == 1 else combination_type`
    - 两种主要的类型
        - TIES：TrIm, Elect, and Merge (TIES) is a three-step method for merging models. F
        - DARE：Drop And REscale is a method that can be used to prepare for other model merging methods like TIES.
    - 实现上
        -  `cat`
        -  `[svd, ties_svd, dare_linear_svd, dare_ties_svd, magnitude_prune_svd]`
        -  `[linear, ties, dare_linear, dare_ties, magnitude_prune]`
- target
    - lora_A/lora_B
    - lora_embedding_A/lora_embedding_B 

In [15]:
adapters = ["norobots", "adcopy", "sql"]
weights = [2.0, 0.3, 0.7]
adapter_name = "merge"
density = 0.2
# combination_type = "ties"
combination_type = "svd"
if adapter_name in model.peft_config:
    model.delete_adapter(adapter_name)
model.add_weighted_adapter(adapters, weights, adapter_name, combination_type=combination_type, density=density)

In [50]:
# model

### from scratch

In [24]:
model.base_model.model.model.embed_tokens.lora_embedding_A

ParameterDict(
    (norobots): Parameter containing: [torch.cuda.HalfTensor of size 8x32005 (GPU 0)]
    (adcopy): Parameter containing: [torch.cuda.HalfTensor of size 8x32005 (GPU 0)]
    (merge): Parameter containing: [torch.cuda.FloatTensor of size 8x32005 (GPU 0)]
)

In [33]:
lora_a_1 = model.base_model.model.model.embed_tokens.lora_embedding_A['norobots']
lora_a_2 = model.base_model.model.model.embed_tokens.lora_embedding_A['adcopy']
lora_b_1 = model.base_model.model.model.embed_tokens.lora_embedding_B['norobots']
lora_b_2 = model.base_model.model.model.embed_tokens.lora_embedding_B['adcopy']

In [34]:
lora_a_1.shape, lora_b_1.shape, lora_a_2.shape, lora_b_2.shape

(torch.Size([8, 32005]),
 torch.Size([2048, 8]),
 torch.Size([8, 32005]),
 torch.Size([2048, 8]))

In [58]:
scaling = [2, 2]
weights = [2, 0.3] 
valid_weights = [2*2, 2*0.3]

In [43]:
def transpose(weight, fan_in_fan_out):
    if not fan_in_fan_out:
        return weight

    if isinstance(weight, torch.nn.Parameter):
        return torch.nn.Parameter(weight.T)
    return weight.T

In [54]:
delta_lora_1 = transpose(lora_b_1 @ lora_a_1, True) * scaling[0]
delta_lora_2 = transpose(lora_b_2 @ lora_a_2, True) * scaling[1]

In [55]:
delta_lora_1.shape, delta_lora_2.shape

(torch.Size([32005, 2048]), torch.Size([32005, 2048]))

In [60]:
delta_lora = valid_weights[0] * delta_lora_1 + valid_weights[1] * delta_lora_2
delta_lora

tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0147,  0.0087,  0.0105,  ...,  0.0161,  0.0129, -0.0181],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        ...,
        [-0.0150,  0.0119, -0.0298,  ..., -0.0230, -0.0261, -0.0239],
        [-0.0233, -0.0274, -0.0006,  ..., -0.0169, -0.0194,  0.0371],
        [ 0.0084,  0.0575, -0.0138,  ..., -0.0258,  0.0178,  0.0437]],
       device='cuda:0', dtype=torch.float16, grad_fn=<AddBackward0>)

In [63]:
delta_lora = delta_lora.T
U, S, Vh = torch.linalg.svd(delta_lora.float(), full_matrices=True)
U = U[:, :8]
S = S[:8]
U = U @ torch.diag(S)
Vh = Vh[:8, :]

In [70]:
Vh.shape, U.shape

(torch.Size([8, 32005]), torch.Size([2048, 8]))

In [74]:
Vh

tensor([[-5.9250e-08,  3.6710e-03,  5.8551e-09,  ..., -5.5535e-03,
         -1.1281e-02, -5.8577e-03],
        [-5.3737e-08,  7.6944e-03, -9.9491e-09,  ..., -1.8266e-02,
         -2.5229e-02,  5.2019e-03],
        [-2.8191e-08, -3.8739e-03, -2.3445e-09,  ..., -2.6179e-03,
         -1.5592e-03,  2.8683e-02],
        ...,
        [ 1.6487e-08, -3.0573e-03, -1.4432e-10,  ...,  9.0157e-03,
         -2.2049e-02,  4.6260e-03],
        [-2.2342e-08,  1.2938e-02, -5.7639e-09,  ...,  1.6897e-02,
          8.5601e-03, -1.1252e-02],
        [-1.0293e-08, -4.6255e-03, -2.4660e-08,  ..., -1.4191e-02,
          1.2497e-02,  6.5705e-03]], device='cuda:0', grad_fn=<SliceBackward0>)

In [35]:
lora_a_new = model.base_model.model.model.embed_tokens.lora_embedding_A['merge']
lora_b_new = model.base_model.model.model.embed_tokens.lora_embedding_B['merge']

In [36]:
lora_a_new.shape, lora_b_new.shape

(torch.Size([8, 32005]), torch.Size([2048, 8]))

In [75]:
lora_a_new

Parameter containing:
tensor([[ 6.6927e-08,  3.6697e-03, -4.0709e-08,  ..., -5.5574e-03,
         -1.1279e-02, -5.8365e-03],
        [ 5.2394e-08,  7.7513e-03,  3.8410e-08,  ..., -1.8233e-02,
         -2.5212e-02,  4.8264e-03],
        [-3.8023e-08,  3.7841e-03,  8.6544e-09,  ...,  2.8255e-03,
          1.8587e-03, -2.8766e-02],
        ...,
        [-2.4451e-09, -3.0670e-03, -6.7911e-09,  ...,  8.9840e-03,
         -2.2061e-02,  4.7337e-03],
        [-3.0357e-08, -1.2936e-02, -3.8171e-09,  ..., -1.6890e-02,
         -8.5719e-03,  1.1253e-02],
        [ 1.4960e-08, -4.6251e-03,  7.7989e-08,  ..., -1.4207e-02,
          1.2501e-02,  6.5824e-03]], device='cuda:0')