# implement LoRA for a linear layer in PyTorch

In [None]:
import torch
import torch.nn as nn
import math

class LoRALinear(nn.Module):
    def __init__(self, orig_linear: nn.Linear, r: int = 8, alpha: float = 1.0):
        """
        Wraps a given linear layer with LoRA adaptation.
        orig_linear: an existing nn.Linear layer from a pre-trained model (weights frozen).
        r: rank of the LoRA adapters.
        alpha: scaling factor for LoRA (often set such that alpha/r is 1).
        """
        super().__init__()
        self.in_features = orig_linear.in_features
        self.out_features = orig_linear.out_features
        # Freeze original weight and bias
        self.weight = nn.Parameter(orig_linear.weight.data, requires_grad=False)
        if orig_linear.bias is not None:
            self.bias = nn.Parameter(orig_linear.bias.data, requires_grad=False)
        else:
            self.bias = None
        # LoRA low-rank matrices
        self.r = r
        self.alpha = alpha
        # "Down" projection: reduces dimension from in_features to r
        self.lora_down = nn.Parameter(torch.zeros((r, self.in_features)))
        # "Up" projection: increases dimension from r to out_features
        self.lora_up   = nn.Parameter(torch.zeros((self.out_features, r)))
        # Initialize LoRA weights: usually lora_down random, lora_up zero
        nn.init.kaiming_uniform_(self.lora_down, a=math.sqrt(5))  # He init for down-proj
        nn.init.zeros_(self.lora_up)  # start with no effect
        # Note: starting with lora_up = 0 means initially the LoRA doesn't change the output
        # (since lora_down * 0 = 0), so the model starts exactly like the pre-trained one.
    
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        # Compute base linear output (no grad, since weight is frozen)
        # x shape: [batch, in_features]
        # weight shape: [out_features, in_features]
        result = torch.matmul(x, self.weight.T)
        # Compute LoRA adaptation: (x * lora_down^T) * lora_up^T scaled by alpha/r
        # lora_down^T shape: [in_features, r], lora_up^T: [r, out_features]
        lora_out = x @ self.lora_down.T    # shape: [batch, r]
        lora_out = lora_out @ self.lora_up.T  # shape: [batch, out_features]
        # Scale the LoRA output
        result += lora_out * (self.alpha / self.r)
        # Add bias if present
        if self.bias is not None:
            result += self.bias
        return result

# Example usage:
# Suppose we have a GPT-2 model and want to apply LoRA to its first fully-connected layer
from transformers import GPT2Model
model = GPT2Model.from_pretrained('gpt2')
# Pick a linear layer from the model, e.g., the feed-forward layer in the first Transformer block
orig_linear = model.h[0].mlp.c_fc  # (Assume c_fc is a nn.Linear in GPT2 block 0)
# Replace it with a LoRA-wrapped layer
model.h[0].mlp.c_fc = LoRALinear(orig_linear, r=8, alpha=8)
# Now model.h[0].mlp.c_fc will only train the LoRA params. Freeze others as needed.


AttributeError: 'Conv1D' object has no attribute 'in_features'

In [10]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import LoraConfig, get_peft_model

# Load pretrained GPT-2
model_name = "gpt2"
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Define LoRA configuration
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["c_fc", "c_proj"]  # These are Conv1D layers in GPT-2
)

# Apply LoRA using PEFT
model = get_peft_model(model, lora_config)

# Optional: See trainable parameters
model.print_trainable_parameters()


'NoneType' object has no attribute 'cadam32bit_grad_fp32'
trainable params: 884,736 || all params: 125,324,544 || trainable%: 0.7060


  warn("The installed version of bitsandbytes was compiled without GPU support. "


In [11]:
input_text = "Why is LoRA useful in large language models?"
inputs = tokenizer(input_text, return_tensors="pt")
outputs = model(**inputs, labels=inputs["input_ids"])

print(f"Loss: {outputs.loss.item():.4f}")


`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.


Loss: 6.5681


In [13]:

# Generate output
model.eval()
with torch.no_grad():
    generation = model.generate(
        input_ids=inputs["input_ids"],
        max_new_tokens=50,
        do_sample=True,
        top_k=50,
        top_p=0.95,
        temperature=0.7,
        pad_token_id=tokenizer.eos_token_id
    )

# Decode and print
output_text = tokenizer.decode(generation[0], skip_special_tokens=True)
print("🧠 GPT-2 with LoRA says:\n")
print(output_text)

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


🧠 GPT-2 with LoRA says:

Why is LoRA useful in large language models?

LOORA is used to represent an abstract class with a structure that is not bound by a reference to another class, such as a dictionary. LoRA is typically used in a type system like the Python language, and it is possible to
