**Reference**: https://huggingface.co/docs/peft/en/developer_guides/model_merging?merge-method=DARE&ties=instruct

# Setup

In [None]:
!pip install -U bitsandbytes

Collecting bitsandbytes
  Downloading bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl.metadata (2.9 kB)
Downloading bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl (69.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.1/69.1 MB[0m [31m32.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.45.0


In [None]:
from peft import PeftConfig, PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from huggingface_hub import HfApi
from huggingface_hub import upload_folder, login, create_repo
import gc

In [None]:
!huggingface-cli login

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
The token `Test` has been saved to /root/.cache/huggingface/stored_tokens
Your token has been saved to /root/.cache/huggingface/token
Login successful.
The current active token is: `Test`


#Load Model

In [None]:
#Load base model
model = AutoModelForCausalLM.from_pretrained("bigcode/starcoder2-7b", load_in_4bit=True, device_map="auto").eval()
tokenizer = AutoTokenizer.from_pretrained("bigcode/starcoder2-7b")

#Load adapters
model = PeftModel.from_pretrained(model, "ahmedashrafay/staradapters-python", adapter_name="python")
model.load_adapter("ahmedashrafay/staradapters-cpp", adapter_name="cpp")
model.load_adapter("ahmedashrafay/staradapters-java", adapter_name="java")
model.load_adapter("ahmedashrafay/staradapters-javascript", adapter_name="javascript")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/893 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors.index.json:   0%|          | 0.00/41.6k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.89G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.51G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/7.88k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/777k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/442k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.06M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/958 [00:00<?, ?B/s]

adapter_config.json:   0%|          | 0.00/721 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/29.4M [00:00<?, ?B/s]

adapter_config.json:   0%|          | 0.00/721 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/29.4M [00:00<?, ?B/s]

adapter_config.json:   0%|          | 0.00/721 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/29.4M [00:00<?, ?B/s]

adapter_config.json:   0%|          | 0.00/721 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/29.4M [00:00<?, ?B/s]

_IncompatibleKeys(missing_keys=['base_model.model.model.embed_tokens.weight', 'base_model.model.model.layers.0.self_attn.q_proj.base_layer.weight', 'base_model.model.model.layers.0.self_attn.q_proj.base_layer.bias', 'base_model.model.model.layers.0.self_attn.q_proj.lora_A.python.weight', 'base_model.model.model.layers.0.self_attn.q_proj.lora_A.cpp.weight', 'base_model.model.model.layers.0.self_attn.q_proj.lora_A.java.weight', 'base_model.model.model.layers.0.self_attn.q_proj.lora_B.python.weight', 'base_model.model.model.layers.0.self_attn.q_proj.lora_B.cpp.weight', 'base_model.model.model.layers.0.self_attn.q_proj.lora_B.java.weight', 'base_model.model.model.layers.0.self_attn.k_proj.base_layer.weight', 'base_model.model.model.layers.0.self_attn.k_proj.base_layer.bias', 'base_model.model.model.layers.0.self_attn.k_proj.lora_A.python.weight', 'base_model.model.model.layers.0.self_attn.k_proj.lora_A.cpp.weight', 'base_model.model.model.layers.0.self_attn.k_proj.lora_A.java.weight', 'bas

In [None]:
adapters = ["python", "cpp", "java", "javascript"]

Merging/Testing

In [None]:
#Function that merges adapters with equal weight
def equal_merge_model (combination_name, adapter_name, adapters):

  try:
    model.delete_adapter(adapter_name)


  except Exception as e:
    pass

  weights = [1.0] * len(adapters)
  density = 0.2
  model.add_weighted_adapter(adapters, weights, adapter_name = adapter_name, combination_type=combination_name, density=density)

In [None]:
#Function that generates the output by the model using temperature = 0.7
def generate(prompt, max_length=512):
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_length=max_length,
            temperature=0.7,
            top_p=0.95,
            top_k=50,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id,
            do_sample=True,
            num_return_sequences=1
        )

    return tokenizer.batch_decode(outputs, skip_special_tokens=True)[0][len(prompt):]

In [None]:
#Function that tests a particular combination for viability
def test_model (combination_name):
  model.set_adapter(combination_name)
  prompt = "int fibonacci(int n) {"
  return generate(prompt)

In [None]:
#Function that tests uploads a model to the corresponding repository
def save_model (adapter_name, repo_name):
  model.set_adapter(f"{adapter_name}")
  model.save_pretrained(f"./{adapter_name}")
  model.config.save_pretrained(f"./{adapter_name}/{adapter_name}")
  tokenizer.save_pretrained(f"./{adapter_name}/{adapter_name}")

  token = os.environ.get("HF_TOKEN")
  login(token=token)

  repo_id = f"ahmedashrafay/{repo_name}"

  # Push to Hub
  upload_folder(folder_path=f"./{adapter_name}/{adapter_name}",
     repo_id=repo_id,
    commit_message="Upload {combination_name} files")



# Merging models with different adapters masked

In [None]:
gc.collect()
torch.cuda.empty_cache()

method = "dare_ties"
try:
  equal_merge_model (method, "dare_ties_minus_python", adapters = ["cpp", "java", "javascript"])

except Exception as e:
  print("Exception: ")
  print(e)


In [None]:
gc.collect()
torch.cuda.empty_cache()

method = "dare_ties"
try:
  equal_merge_model (method, "dare_ties_minus_java", adapters = ["cpp", "python", "javascript"])

except Exception as e:
  print("Exception: ")
  print(e)


In [None]:
gc.collect()
torch.cuda.empty_cache()

method = "dare_ties"
try:
  equal_merge_model (method, "dare_ties_minus_cpp", adapters = ["java", "python", "javascript"])

except Exception as e:
  print("Exception: ")
  print(e)


In [None]:
gc.collect()
torch.cuda.empty_cache()

method = "dare_ties"
try:
  equal_merge_model (method, "dare_ties_minus_javascript", adapters = ["java", "python", "cpp"])

except Exception as e:
  print("Exception: ")
  print(e)


In [None]:
save_model ("dare_ties_minus_javascript", "staradapters-dare-ties-minus-js")
save_model ("dare_ties_minus_cpp", "staradapters-dare-ties-minus-cpp")
save_model ("dare_ties_minus_java", "staradapters-dare-ties-minus-java")
save_model ("dare_ties_minus_python", "staradapters-dare-ties-minus-python")

adapter_model.safetensors:   0%|          | 0.00/29.4M [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/29.4M [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/58.8M [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/29.4M [00:00<?, ?B/s]