# Problem 13: Large Model Size

Demonstrates LoRA for efficient fine-tuning of large models.

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/gaurav-redhat/transformer_problems/blob/main/13_model_size/demo.ipynb)


In [None]:
!pip install torch -q
import torch
import torch.nn as nn

# Original large layer
class OriginalLinear(nn.Module):
    def __init__(self, in_features, out_features):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(out_features, in_features) * 0.01)
    
    def forward(self, x):
        return x @ self.weight.T

# LoRA: Low-Rank Adaptation
class LoRALinear(nn.Module):
    def __init__(self, in_features, out_features, rank=8):
        super().__init__()
        # Frozen original weights
        self.weight = nn.Parameter(torch.randn(out_features, in_features) * 0.01, requires_grad=False)
        # Trainable low-rank adapters
        self.lora_A = nn.Parameter(torch.randn(rank, in_features) * 0.01)
        self.lora_B = nn.Parameter(torch.zeros(out_features, rank))
    
    def forward(self, x):
        original = x @ self.weight.T
        lora = x @ self.lora_A.T @ self.lora_B.T
        return original + lora

# Compare parameters
d_model = 4096
original = OriginalLinear(d_model, d_model)
lora = LoRALinear(d_model, d_model, rank=8)

orig_params = sum(p.numel() for p in original.parameters() if p.requires_grad)
lora_params = sum(p.numel() for p in lora.parameters() if p.requires_grad)

print("Parameter Comparison (4096×4096 layer):")
print(f"  Original: {orig_params:,} trainable parameters")
print(f"  LoRA (r=8): {lora_params:,} trainable parameters")
print(f"\n✓ LoRA reduces trainable params by {orig_params/lora_params:.0f}x!")
