In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
torch.manual_seed(42)
X = torch.randn(100, 10)  # 100 samples, 10 features
y = X @ torch.randn(10, 1) * 2 + torch.randn(100, 1) * 0.5  # Linear relation with noise

In [3]:
class SimpleModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(SimpleModel, self).__init__()
        self.W = nn.Parameter(torch.randn(input_dim, output_dim))  # Weight matrix W (d_in x d_out)

    def forward(self, x):
        return x @ self.W

In [4]:
model = SimpleModel(input_dim=10, output_dim=1)

In [5]:
loss_fn = nn.MSELoss()

In [6]:
def train_full_model(model, X, y, optimizer, loss_fn, num_epochs=100):
    model.train()
    for epoch in range(num_epochs):
        optimizer.zero_grad()

        output = model(X)

        loss   = loss_fn(output, y)

        loss.backward()

        optimizer.step()

In [7]:
# Fine-tune full model (W)
optimizer = optim.SGD(model.parameters(), lr=0.01)
train_full_model(model, X, y, optimizer, loss_fn)

In [8]:
# Store the original W after fine-tuning
W_fine_tuned = model.W.clone()

In [9]:
# Now, applying LoRA (using low-rank matrices A and B)
rank = 2  # Low-rank approximation

In [11]:
A = nn.Parameter(torch.randn(10, rank))  # A (d_in x r)
B = nn.Parameter(torch.randn(rank, 1))   # B (r x d_out) corrected to (2, 1) for matrix multiplication

In [12]:
# Define a new model with LoRA modification
class LoRAModel(nn.Module):
    def __init__(self, input_dim, output_dim, A, B):
        super(LoRAModel, self).__init__()
        self.A = A  # Low-rank matrix A
        self.B = B  # Low-rank matrix B
        self.W_base = nn.Parameter(torch.randn(input_dim, output_dim))  # Base weight W (d_in x d_out)

    def forward(self, x):
        # Apply LoRA (W' = W_base + AB^T)
        W_prime = self.W_base + torch.matmul(self.A, self.B)  # Corrected: A (d_in, r) and B.T (r, d_out)
        return x @ W_prime

In [13]:
# Initialize LoRA model
lora_model = LoRAModel(input_dim=10, output_dim=1, A=A, B=B)

In [14]:
# Train the LoRA model (fine-tune A and B)
def train_lora_model(model, X, y, optimizer, loss_fn, num_epochs=100):
    model.train()
    for epoch in range(num_epochs):
        optimizer.zero_grad()
        output = model(X)
        loss = loss_fn(output, y)
        loss.backward()
        optimizer.step()

In [15]:
# Fine-tune LoRA model (A and B)
optimizer = optim.SGD([lora_model.A, lora_model.B], lr=0.01)  # Using lora_model.A and lora_model.B

train_lora_model(lora_model, X, y, optimizer, loss_fn)


In [16]:
# Store the W_prime after fine-tuning LoRA
W_prime_fine_tuned = lora_model.W_base + torch.matmul(lora_model.A, lora_model.B)

In [17]:
# Compare the results
print("Fine-tuned W from full model:\n", W_fine_tuned)
print("Fine-tuned W' from LoRA model (W_base + AB^T):\n", W_prime_fine_tuned)

Fine-tuned W from full model:
 tensor([[ 0.7774],
        [ 1.6432],
        [ 0.0985],
        [ 1.9516],
        [ 0.3730],
        [ 2.6920],
        [-5.2525],
        [-4.1507],
        [ 0.0789],
        [-0.0585]], grad_fn=<CloneBackward0>)
Fine-tuned W' from LoRA model (W_base + AB^T):
 tensor([[ 0.5844],
        [ 1.4381],
        [-0.4161],
        [ 2.4815],
        [ 0.5286],
        [ 3.0505],
        [-6.1518],
        [-4.9549],
        [ 0.3103],
        [-0.0373]], grad_fn=<AddBackward0>)


In [18]:
# Check if the results are similar (using Frobenius norm of difference)
difference = torch.norm(W_fine_tuned - W_prime_fine_tuned)
print(f"Difference between fine-tuned W and W' (Frobenius norm): {difference.item()}")

Difference between fine-tuned W and W' (Frobenius norm): 1.5123080015182495
