# A Basic LoRA Implementation

This notebook contains code adapted from [LLMs-from-scratch](https://github.com/rasbt/LLMs-from-scratch)

Copyright 2023-2025 Sebastian Raschka
Licensed under the Apache License, Version 2.0

For the full license text, see: https://github.com/rasbt/LLMs-from-scratch?tab=License-1-ov-file#readme

In [None]:
# install dependencies
%pip install torch -q


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [None]:
import math

import torch
from torch.nn import Module


class LoRALayer(Module):
    """A basic LoRALayer implementation."""

    def __init__(self, d_in: int, d_out: int, rank: int):
        super().__init__()
        self.A = torch.nn.Parameter(torch.empty(d_in, rank))
        torch.nn.init.kaiming_uniform_(self.A, a=math.sqrt(5))
        self.B = torch.nn.Parameter(torch.zeros(rank, d_out))

    def forward(self, x):
        return x @ self.A @ self.B

  cpu = _conversion_method_template(device=torch.device("cpu"))


In [None]:
class LinearWithLoRA(torch.nn.Module):
    """Linear with LoRA."""

    def __init__(self, linear: torch.nn.Linear, rank: int):
        super().__init__()
        self.linear = linear
        self.lora = LoRALayer(
            d_in=linear.in_features, d_out=linear.out_features, rank=rank
        )

    def forward(self, x):
        return self.linear(x) + self.lora(x)

Let's do a demo forward pass for both `Linear` and `LinearWithLoRa`.

In [None]:
x = torch.tensor([[[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]]], dtype=torch.float32)
print(x.shape)

# linear and linear_with_lora
linear = torch.nn.Linear(in_features=5, out_features=5)
linear_with_lora = LinearWithLoRA(linear=linear, rank=2)

torch.Size([1, 2, 5])


In [None]:
# forward with linear
linear(x)

tensor([[[ 1.0671,  2.3147, -0.5288,  2.4322, -1.4214],
         [ 3.9911,  4.9642,  1.0640,  5.9899, -3.5283]]],
       grad_fn=<ViewBackward0>)

In [None]:
# forward for linear_with_lora
# NOTE: this should be the same as linear since we've only initialized the
# LinearWithLoRA layer, and B initializes to a zero matrix. During fine-tuning
# as A and B both update its weights, these two will no longer be the same.
linear_with_lora(x)

tensor([[[ 1.0671,  2.3147, -0.5288,  2.4322, -1.4214],
         [ 3.9911,  4.9642,  1.0640,  5.9899, -3.5283]]],
       grad_fn=<AddBackward0>)

In [None]:
# A's weights
linear_with_lora.lora.A

Parameter containing:
tensor([[-0.0655,  0.2380],
        [-0.2090,  0.6242],
        [-0.2263,  0.4789],
        [ 0.4980,  0.2259],
        [ 0.2017, -0.4462]], requires_grad=True)

In [None]:
# B's weights
linear_with_lora.lora.B

Parameter containing:
tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]], requires_grad=True)