In [6]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

model_path = "facebook/opt-350m"
device = "cuda" if torch.cuda.is_available() else 'cpu'

In [7]:
# Loading existing model. Start small with llama2
model = AutoModelForCausalLM.from_pretrained(model_path).to(device)
tokenizer = AutoTokenizer.from_pretrained(model_path)

In [10]:
text_input = "The movie was great and fun to watch"
inputs = tokenizer.encode(text_input, return_tensors='pt').to(device)
output = model.generate(inputs, max_new_tokens=25)

In [11]:
output

tensor([[    2,   133,  1569,    21,   372,     8,  1531,     7,  1183,     4,
            38,   437,  7785,    51,   222,    24,     4, 50118,   100,   437,
          7785,    51,   222,    24,     4,    38,   437,  7785,    51,   222,
            24,     4,    38,   437]], device='cuda:0')

In [13]:
decode_text = tokenizer.decode(output[0])

In [14]:
print(decode_text)

</s>The movie was great and fun to watch. I'm glad they did it.
I'm glad they did it. I'm glad they did it. I'm


In [None]:
lora_config = LoraConfig(
    target_modules=["q_proj", "k_proj"],
    init_lora_weights=False
)

model.add_adapter(lora_config, adapter_name="adapter_1")

In [22]:
# getting the adapters set
from peft import PeftConfig, PeftModel

adapter_model = "ybelkada/opt-350m-lora"

model = AutoModelForCausalLM.from_pretrained(model_path)
model = PeftModel.from_pretrained(model, adapter_model)

In [23]:
# merging and saving the models / adapters

model = model.merge_and_unload()
save_path = "/home/kamal/.cache/huggingface/merged_model"
model.save_pretrained(save_path) # will create the folder and save the model there