<a href="https://colab.research.google.com/github/cubiwan/colabs_IA/blob/main/Mergekit.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Ejemplo de uso de [mergekit](https://github.com/cg123/mergekit)**

In [None]:
!git clone https://github.com/cg123/mergekit.git
%cd mergekit
%pip install -e .


## Ficheros de configuracion:


In [2]:
import yaml

#passthrough - añadir capas
yaml_config_passthrough = """
merge_method: passthrough
dtype: bfloat16

slices:
  - sources:
    - model: rhysjones/phi-2-orange
      layer_range: [0, 32]
  - sources:
    - model: cognitivecomputations/dolphin-2_6-phi-2
      layer_range: [0, 32]
merge_method: passthrough
dtype: bfloat16
"""

#linear - mezclar capas
yaml_config_linear = """
models:
  - model: rhysjones/phi-2-orange
    parameters:
      weight: 1.0
  - model: cognitivecomputations/dolphin-2_6-phi-2
    parameters:
      weight: 0.8
merge_method: linear
dtype: float16
"""



# Configuración

In [3]:
OUTPUT_PATH = "./merged"  # folder to store the result in
LORA_MERGE_CACHE = "/tmp"  # change if you want to keep these for some reason
#CONFIG_YML = "./examples/gradient-slerp.yml"  # merge configuration file
CONFIG_YML = "./config.yml"  # merge configuration file
COPY_TOKENIZER = True  # you want a tokenizer? yeah, that's what i thought
LAZY_UNPICKLE = True  # experimental low-memory model loader
LOW_CPU_MEMORY = True  # enable if you somehow have more VRAM than RAM+swap
OUT_SHARD_SIZE = "1B"
yaml_config = yaml_config_linear #mezcla lineal
#yaml_config = yaml_config_passthrough #frankenmerging

# Save config as yaml file
with open('./config.yml', 'w', encoding="utf-8") as f:
    f.write(yaml_config)

# Mezcla:

In [None]:
# actually do merge
import torch


from mergekit.config import MergeConfiguration
from mergekit.merge import MergeOptions, run_merge
from mergekit.common import parse_kmb


with open(CONFIG_YML, "r", encoding="utf-8") as fp:
    merge_config = MergeConfiguration.model_validate(yaml.safe_load(fp))

run_merge(
    merge_config,
    out_path=OUTPUT_PATH,
    options=MergeOptions(
        lora_merge_cache=LORA_MERGE_CACHE,
        cuda=torch.cuda.is_available(),
        copy_tokenizer=COPY_TOKENIZER,
        lazy_unpickle=LAZY_UNPICKLE,
        low_cpu_memory=LOW_CPU_MEMORY,
        out_shard_size = parse_kmb(OUT_SHARD_SIZE),
        trust_remote_code=True
    )
)
print("Done!")

#Probar el modelo

In [None]:
!pip install transformers bitsandbytes>=0.39.0 -q
!pip install einops

In [None]:
from transformers import AutoModelForCausalLM
from transformers import AutoTokenizer

model_path = "./merged"

model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto")

tokenizer = AutoTokenizer.from_pretrained(model_path)


In [None]:
#test
model_inputs = tokenizer(["A list of colors: red, blue"], return_tensors="pt").to("cuda")
generated_ids = model.generate(**model_inputs)
tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]