## PEFT Model Merging - Multi Tasking from the same Base Model with specific adapters

In [1]:
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [2]:
import torch
import random

from peft import PeftConfig, PeftModel
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset



In [3]:
peft_model_id = "smangrul/tinyllama_lora_norobots"
device = "cuda"

In [4]:
config = PeftConfig.from_pretrained(peft_model_id)

In [5]:
config.base_model_name_or_path

'TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T'

In [6]:
base_model_path = 'TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T'

In [7]:
base_model = AutoModelForCausalLM.from_pretrained(base_model_path, load_in_4bit=True, device_map="auto")

In [8]:
tokenizer = AutoTokenizer.from_pretrained(base_model_path)

In [9]:
base_model.eval()

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 2048)
    (layers): ModuleList(
      (0-21): 22 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear4bit(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear4bit(in_features=2048, out_features=256, bias=False)
          (v_proj): Linear4bit(in_features=2048, out_features=256, bias=False)
          (o_proj): Linear4bit(in_features=2048, out_features=2048, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=2048, out_features=5632, bias=False)
          (up_proj): Linear4bit(in_features=2048, out_features=5632, bias=False)
          (down_proj): Linear4bit(in_features=5632, out_features=2048, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm()
        (post_attention_layernorm): LlamaRMSNorm()
      )
    )
    (norm): LlamaRMS

In [10]:
messages = [
    {"role": "user", "content": "Write an essay about Generative AI."},
]
text = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
inputs = tokenizer(text, return_tensors="pt")  # , add_special_tokens=False)
inputs = {k: v.to("cuda") for k, v in inputs.items()}
outputs = base_model.generate(
    **inputs,
    max_new_tokens=256,
    do_sample=True,
    top_p=0.95,
    temperature=0.2,
    repetition_penalty=1.2,
    eos_token_id=tokenizer.eos_token_id,
)
print(tokenizer.decode(outputs[0]))


No chat template is defined for this tokenizer - using the default template for the LlamaTokenizerFast class. If the default is not appropriate for your model, please set `tokenizer.chat_template` to an appropriate template. See https://huggingface.co/docs/transformers/main/chat_templating for more information.



<s><s> [INST] Write an essay about Generative AI. [/INST]
Write a paper on the following topic: "The Future of Artificial Intelligence" [FUTURE] Write a paper on the following topic: "The Future of Artificial Intelligence". [/FUTURE]
[INTELLECTUAL PROPERTY] Write a paper on the following topic: "Intellectual Property and its Impact on Society" [IP] Write a paper on the following topic: "Intellectual Property and its Impact on Society". [/IP]
[ECONOMICS] Write a paper on the following topic: "How to Make Money in Economics?" [ECON] Write a paper on the following topic: "How to Make Money in Economics?". [/ECON]
[MATHEMATICS] Write a paper on the following topic: "Mathematical Problems for Students" [MATH] Write a paper on the following topic: "Mathematical Problems for Students". [/MATH]
[PHYSICS] Write a paper on the following topic: "Physics Problems" [PHY] Write a paper on the following topic: "Physics Problems".


In [11]:
messages = [
    {"role": "system", "content": "Create a text ad given the following product and description."},
    {
        "role": "user",
        "content": "Product: Sony PS5 PlayStation Console\nDescription: The PS5™ console unleashes new gaming possibilities that you never anticipated.",
    },
]
text = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
inputs = tokenizer(text, return_tensors="pt")  # , add_special_tokens=False)
inputs = {k: v.to("cuda") for k, v in inputs.items()}
outputs = base_model.generate(
    **inputs,
    max_new_tokens=128,
    do_sample=True,
    top_p=0.95,
    temperature=0.2,
    repetition_penalty=1.2,
    eos_token_id=tokenizer.eos_token_id,
)
print(tokenizer.decode(outputs[0]))

<s><s> [INST] <<SYS>>
Create a text ad given the following product and description.
<</SYS>>

Product: Sony PS5 PlayStation Console
Description: The PS5™ console unleashes new gaming possibilities that you never anticipated. [/INST]

The above example is from an actual email I received from Microsoft, but it's pretty much what I was looking for in terms of formatting. 
I tried to use the same format as this one (https://www.microsoft.com/en-us/p/sonyps5console/9nblggh14062) but it didn't work out so well.
Any help would be appreciated!
</s>


In [12]:
text = """Table: 2-11365528-2
Columns: ['Team', 'Head Coach', 'President', 'Home Ground', 'Location']
Natural Query: Who is the Head Coach of the team whose President is Mario Volarevic?
SQL Query:"""

inputs = tokenizer(text, return_tensors="pt")  # , add_special_tokens=False)
inputs = {k: v.to("cuda") for k, v in inputs.items()}
outputs = base_model.generate(
    **inputs, max_new_tokens=64, repetition_penalty=1.1, eos_token_id=tokenizer("</s>").input_ids[-1]
)
print(tokenizer.decode(outputs[0]))

<s> Table: 2-11365528-2
Columns: ['Team', 'Head Coach', 'President', 'Home Ground', 'Location']
Natural Query: Who is the Head Coach of the team whose President is Mario Volarevic?
SQL Query: SELECT Team, Head Coach, President, Home Ground, Location FROM Teams WHERE Head Coach = Mario Volarevic

A: You can use a join to get all the columns you need.
SELECT t.Team, t.HeadCoach, t.President, t.HomeGround


### Merge Models

In [13]:
#config = PeftConfig.from_pretrained(robots_peft_model_id)

In [14]:
base_model_path = 'TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T'

In [15]:
base_model = AutoModelForCausalLM.from_pretrained(base_model_path, load_in_4bit=True, device_map="auto")

In [16]:
robots_peft_model_id = "smangrul/tinyllama_lora_norobots"

In [17]:
tokenizer = AutoTokenizer.from_pretrained(robots_peft_model_id)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [18]:
base_model.resize_token_embeddings(len(tokenizer))

Embedding(32005, 2048)

In [19]:
combined_peft_model = PeftModel.from_pretrained(base_model, robots_peft_model_id, adapter_name="norobots")

In [20]:
sql_peft_model_id = "smangrul/tinyllama_lora_sql"

In [21]:
adcopy_peft_model_id = "smangrul/tinyllama_lora_adcopy"

In [22]:
_ = combined_peft_model.load_adapter(sql_peft_model_id, adapter_name="sql")

In [23]:
_ = combined_peft_model.load_adapter(adcopy_peft_model_id, adapter_name="adcopy")

In [24]:
adapters = ["norobots", "adcopy", "sql"]
weights = [2.0, 0.3, 0.7]
adapter_name = "merge"
density = 0.2
combination_type = "ties"

In [25]:
if adapter_name in combined_peft_model.peft_config:
    print(f"deleting adapter {adapter_name}")
    model.delete_adapter(adapter_name)

In [27]:
combined_peft_model.add_weighted_adapter(adapters, weights, adapter_name, combination_type=combination_type, density=density)

In [28]:
combined_peft_model.eval()
combined_peft_model.set_adapter("merge")

In [29]:
messages = [
    {"role": "user", "content": "Write an essay about Generative AI."},
]
text = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
inputs = tokenizer(text, return_tensors="pt")  # , add_special_tokens=False)
inputs = {k: v.to("cuda") for k, v in inputs.items()}
outputs = combined_peft_model.generate(
    **inputs,
    max_new_tokens=256,
    do_sample=True,
    top_p=0.95,
    temperature=0.2,
    repetition_penalty=1.2,
    eos_token_id=tokenizer.eos_token_id,
)
print(tokenizer.decode(outputs[0]))

<s><|im_start|>user 
Write an essay about Generative AI.<|im_end|> 
<|im_start|>assistant 
Write a paper on the topic of your choice, but make sure it is relevant to the theme of this conference. 

### Submission Instructions
1. Please submit your submission through [this form](https://docs.google.com/forms/d/e/1FAIpiYXF76wv5Z2Q_K4qJ9G0jD38zR-MHVpNuEOgLmWkUyPxTtSsBcAiCfhbQnQa5l1rYoJO1i0YJY) by Monday, September 2nd at 11:59pm EST. If you are unable to complete the form in time for the deadline, please email <EMAIL> with the subject "Submission Extension". You will be required to provide proof that you have submitted the extension request before we can accept your submission.

2. The following instructions apply only if you are submitting a poster presentation. For oral presentations, see below.

#### Poster Presentation Guidelines
Posters must be double sided and should not exceed 


In [30]:
text = """Table: 2-11365528-2
Columns: ['Team', 'Head Coach', 'President', 'Home Ground', 'Location']
Natural Query: Who is the Head Coach of the team whose President is Mario Volarevic?
SQL Query:"""

inputs = tokenizer(text, return_tensors="pt")  # , add_special_tokens=False)
inputs = {k: v.to("cuda") for k, v in inputs.items()}
outputs = combined_peft_model.generate(
    **inputs, max_new_tokens=64, repetition_penalty=1.1, eos_token_id=tokenizer("</s>").input_ids[-1]
)
print(tokenizer.decode(outputs[0]))

<s> Table: 2-11365528-2
Columns: ['Team', 'Head Coach', 'President', 'Home Ground', 'Location']
Natural Query: Who is the Head Coach of the team whose President is Mario Volarevic?
SQL Query: SELECT Team, Head Coach, President, Home Ground, Location FROM Teams WHERE Head Coach = Mario Volarevic

A: You can use a join to get all the columns you need.
SELECT t.Team, t.HeadCoach, t.President, t.HomeGround
