# Implement LoRA layer

In [1]:
import math
import torch
from torch import nn

class LoRALayer(nn.Module):
    def __init__(self, in_dim, out_dim, rank, alpha):
        super(LoRALayer, self).__init__()
        self.A = torch.nn.Parameter(torch.empty(in_dim, rank))
        torch.nn.init.kaiming_uniform_(self.A, a=math.sqrt(5)) # follow: https://github.com/microsoft/LoRA/blob/main/loralib/layers.py#L124
        self.B = torch.nn.Parameter(torch.zeros(rank, out_dim))
        self.alpha = alpha

    def forward(self, x):
        x = self.alpha * (x @ self.A @ self.B)
        return x


class LinearWithLoRA(torch.nn.Module):
    def __init__(self, linear, rank, alpha):
        super(LinearWithLoRA, self).__init__()
        self.linear = linear
        self.lora = LoRALayer(
            linear.in_features, linear.out_features, rank, alpha
        )

    def forward(self, x):
        return self.linear(x) + self.lora(x)

# Function to replace `Linear` layers with `LinearWithLoRA` layers

In [2]:
import torch

def replace_linear_with_lora(model, rank, alpha):
    for name, module in model.named_children():
        if isinstance(module, torch.nn.Linear):
            # Check if the module is an instance of torch.nn.Linear
            # Replace the Linear layer with a LinearWithLoRA layer
            setattr(model, name, LinearWithLoRA(module, rank, alpha))
        else:
            # If the module is not a Linear layer, recursively apply the function to the child modules
            replace_linear_with_lora(module, rank, alpha)

# Apply LoRA Layers

In [3]:
from transformers import AutoModel

In [4]:
model = AutoModel.from_pretrained('microsoft/deberta-v3-large')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


## Calculate total number of trainable parameters of the original model

In [5]:
total_params_no_lora = sum(p.numel() for p in model.parameters() if p.requires_grad)
total_params_no_lora

434012160

## Freeze All Layers Before Applying LoRA Layers

In [6]:
for param in model.parameters():
    param.requires_grad = False

In [7]:
total_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
total_trainable_params

0

## Replace Linear Layers

In [8]:
replace_linear_with_lora(model=model, rank=8, alpha=16)

In [9]:
total_params_with_lora = sum(p.numel() for p in model.parameters() if p.requires_grad)
total_params_with_lora

3538944

In [10]:
((total_params_no_lora - total_params_with_lora) / total_params_no_lora) * 100

99.1845979614949

Thus, applying LoRA reduces the number of trainable parameters by approximately 99.18%.

By applying LoRA, we are able to train the model with only about 1% of the original number of parameters. This massive reduction in the number of trainable parameters (approximately 99.18%) means that we can achieve efficient fine-tuning with significantly fewer resources, making the process much more efficient in terms of computation and memory usage.