- references
    - https://huggingface.co/docs/peft/developer_guides/model_merging

In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

os.environ['http_proxy'] = 'http://127.0.0.1:7890'
os.environ['https_proxy'] = 'http://127.0.0.1:7890'

In [2]:
# !pip install --upgrade peft

In [3]:
from peft import PeftConfig, PeftModel
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
import torch
import random

[2024-03-31 11:24:19,352] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)


## lora models

In [4]:
peft_model_id = "smangrul/tinyllama_lora_norobots"
device = "cuda"
config = PeftConfig.from_pretrained(peft_model_id)

In [5]:
config

LoraConfig(peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, base_model_name_or_path='TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T', revision=None, task_type='CAUSAL_LM', inference_mode=True, r=8, target_modules={'down_proj', 'q_proj', 'embed_tokens', 'gate_proj', 'o_proj', 'lm_head', 'k_proj', 'up_proj', 'v_proj'}, lora_alpha=16, lora_dropout=0.1, fan_in_fan_out=False, bias='none', use_rslora=False, modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None, rank_pattern={}, alpha_pattern={}, megatron_config=None, megatron_core='megatron.core', loftq_config={}, use_dora=False, layer_replication=None)

In [6]:
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, load_in_4bit=True, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(peft_model_id)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [7]:
len(tokenizer)

32005

In [8]:
model.config.vocab_size

32000

In [9]:
model.resize_token_embeddings(len(tokenizer))

Embedding(32005, 2048)

In [10]:
AutoTokenizer.from_pretrained("smangrul/tinyllama_lora_norobots").vocab.keys() - AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T").vocab.keys()

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


{'<pad>',
 '<|im_end|>',
 '<|im_start|>assistant',
 '<|im_start|>system',
 '<|im_start|>user'}

In [11]:
model = PeftModel.from_pretrained(model, peft_model_id, adapter_name="norobots")

In [12]:
_ = model.load_adapter("smangrul/tinyllama_lora_sql", adapter_name="sql")
_ = model.load_adapter("smangrul/tinyllama_lora_adcopy", adapter_name="adcopy")

In [13]:
model.peft_config.keys()

dict_keys(['norobots', 'sql', 'adcopy'])

## merge 3 adapters

- combination_type
    - [`svd`, `linear`, `cat`, `ties`, `ties_svd`, `dare_ties`, `dare_linear`, `dare_ties_svd`, `dare_linear_svd`, `magnitude_prune`, `magnitude_prune_svd`]
    - `combination_type = "linear" if len(adapters) == 1 else combination_type`
    - 两种主要的类型
        - TIES：TrIm, Elect, and Merge (TIES) is a three-step method for merging models. F
        - DARE：Drop And REscale is a method that can be used to prepare for other model merging methods like TIES.
- target
    - lora_A/lora_B
    - lora_embedding_A/lora_embedding_B 

In [14]:
adapters = ["norobots", "adcopy", "sql"]
weights = [2.0, 0.3, 0.7]
adapter_name = "merge"
density = 0.2
# combination_type = "ties"
combination_type = "svd"
if adapter_name in model.peft_config:
    model.delete_adapter(adapter_name)
model.add_weighted_adapter(adapters, weights, adapter_name, combination_type=combination_type, density=density)

In [18]:
for adapter in adapters:
    print(adapter, model.peft_config[adapter].target_modules)

norobots {'down_proj', 'q_proj', 'embed_tokens', 'gate_proj', 'o_proj', 'lm_head', 'k_proj', 'up_proj', 'v_proj'}
adcopy {'down_proj', 'gate_proj', 'embed_tokens', 'q_proj', 'o_proj', 'lm_head', 'k_proj', 'up_proj', 'v_proj'}
sql {'down_proj', 'q_proj', 'gate_proj', 'o_proj', 'k_proj', 'up_proj', 'v_proj'}
