<a href="https://colab.research.google.com/github/camillan/llm/blob/main/LoRa_and_PEFT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import get_peft_model, LoraConfig, TaskType

# This model and tokenizer are pretrained. Falcon is used for next-word predictions
model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-125M")
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-125M")

# LoRA config
config = LoraConfig(
    r=8, # Rank of low-rank matrices. Smaller means less parameters.
    lora_alpha=16, # Controls how much the lora modules impact the output
    target_modules=["q_proj", "v_proj"], # These are the attention layers that lora applies to
    lora_dropout=0.1, # Regularization so we don't overfit
    bias="none", # We won't adjust base model biases
    task_type=TaskType.CAUSAL_LM # Tells which type of model we're implementing this on
)

# Inject LoRA into model
lora_model = get_peft_model(model, config)

# Only LoRA parameters will require gradients
print(lora_model.print_trainable_parameters())

trainable params: 294,912 || all params: 125,493,504 || trainable%: 0.2350
None
