# Low-Rank Adaption (LoRA)
 apply low-rank adaptation (LoRA) to  model of choice using [Parameter-Efficient Fine-Tuning (PEFT) library developed by Hugging Face](https://huggingface.co/docs/peft/index).



1. Apply LoRA to a model
1. Fine-tune on  provided dataset
1. Save  model
1. Conduct inference using the fine-tuned model

In [None]:
%pip install peft==0.4.0

[43mNote: you may need to restart the kernel using dbutils.library.restartPython() to use updated packages.[0m
Collecting peft==0.4.0
  Using cached peft-0.4.0-py3-none-any.whl (72 kB)
Collecting safetensors
  Using cached safetensors-0.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
Installing collected packages: safetensors, peft
Successfully installed peft-0.4.0 safetensors-0.3.2
[43mNote: you may need to restart the kernel using dbutils.library.restartPython() to use updated packages.[0m


In [None]:
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "bigscience/bloomz-560m"
tokenizer = AutoTokenizer.from_pretrained(model_name)
foundation_model = AutoModelForCausalLM.from_pretrained(model_name)

data = load_dataset("Abirate/english_quotes", cache_dir=DA.paths.datasets+"/datasets")
data = data.map(lambda samples: tokenizer(samples["quote"]), batched=True)
train_sample = data["train"].select(range(50))
display(train_sample)

Found cached dataset json (/dbfs/mnt/dbacademy-datasets/llm-foundation-models/v01-raw/datasets/Abirate___json/Abirate--english_quotes-6e72855d06356857/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4)


  0%|          | 0/1 [00:00<?, ?it/s]

Loading cached processed dataset at /dbfs/mnt/dbacademy-datasets/llm-foundation-models/v01-raw/datasets/Abirate___json/Abirate--english_quotes-6e72855d06356857/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4/cache-768c685e1cb83483.arrow


Dataset({
    features: ['quote', 'author', 'tags', 'input_ids', 'attention_mask'],
    num_rows: 50
})

In [None]:

import peft
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=1,
    lora_alpha=1, # a scaling factor that adjusts the magnitude of the weight matrix. Usually set to 1
    target_modules=["query_key_value"],
    lora_dropout=0.05,
    bias="none", # this specifies if the bias parameter should be trained.
    task_type="CAUSAL_LM"
)

In [None]:

#model, peft_config, lora_config
peft_model = get_peft_model(foundation_model,lora_config)
print(peft_model.print_trainable_parameters())

trainable params: 98,304 || all params: 559,312,896 || trainable%: 0.01757585078102687
None


## Define `Trainer` class for fine-tuning

In [None]:

import transformers
from transformers import TrainingArguments, Trainer
import os

output_directory = os.path.join(DA.paths.working_dir, "peft_lab_outputs")
training_args = TrainingArguments(
    output_dir=output_directory,
    auto_find_batch_size=True,
    learning_rate= 3e-2, # Higher learning rate than full fine-tuning.
    num_train_epochs=1,
    no_cuda=True
)





trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=train_sample,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
trainer.train()

Step,Training Loss


TrainOutput(global_step=7, training_loss=4.56964956011091, metrics={'train_runtime': 140.7924, 'train_samples_per_second': 0.355, 'train_steps_per_second': 0.05, 'total_flos': 12037198331904.0, 'train_loss': 4.56964956011091, 'epoch': 1.0})

## Load model

In [None]:
import time

time_now = time.time()

username = spark.sql("SELECT CURRENT_USER").first()[0]
peft_model_path = os.path.join(output_directory, f"peft_model_{time_now}")

trainer.model.save_pretrained(peft_model_path)

In [None]:

from peft import PeftModel, PeftConfig

loaded_model = PeftModel.from_pretrained(foundation_model,peft_model_path,
 is_trainable=False)

## Inference

In [1]:

inputs = tokenizer("Two things are infinite: ", return_tensors="pt")
outputs = peft_model.generate(
    input_ids=inputs["input_ids"],
    attention_mask=inputs["attention_mask"],
    max_new_tokens=5,
    #eos_token_id=tokenizer.eos_id
    )
print(tokenizer.batch_decode(outputs, skip_special_tokens=True))

NameError: ignored