# Without LoRA

In [1]:
from fastai.vision.all import *

path = untar_data(URLs.PETS)/'images'
dls = ImageDataLoaders.from_name_func(
    path, get_image_files(path), valid_pct=0.2,
    label_func=lambda x: x[0].isupper(), item_tfms=Resize(224))

learn = vision_learner(dls, "resnet50", metrics=[error_rate, accuracy])

def count_parameters(model):
    trainable_params = 0
    non_trainable_params = 0
    
    for param in model.parameters():
        if param.requires_grad:
            trainable_params += param.numel()
        else:
            non_trainable_params += param.numel()
    
    print(f"Trainable parameters: {trainable_params:,}")
    print(f"Non-trainable parameters: {non_trainable_params:,}")
    print(f"Total parameters: {trainable_params + non_trainable_params:,}")
    
    return trainable_params, non_trainable_params

count_parameters(learn.model)

# learn.unfreeze()

for name, param in learn.model.named_parameters():
    print(f"{name}: requires_grad = {param.requires_grad}")

# learn.summary()

Trainable parameters: 2,160,512
Non-trainable parameters: 23,454,912
Total parameters: 25,615,424
0.model.conv1.weight: requires_grad = False
0.model.bn1.weight: requires_grad = True
0.model.bn1.bias: requires_grad = True
0.model.layer1.0.conv1.weight: requires_grad = False
0.model.layer1.0.bn1.weight: requires_grad = True
0.model.layer1.0.bn1.bias: requires_grad = True
0.model.layer1.0.conv2.weight: requires_grad = False
0.model.layer1.0.bn2.weight: requires_grad = True
0.model.layer1.0.bn2.bias: requires_grad = True
0.model.layer1.0.conv3.weight: requires_grad = False
0.model.layer1.0.bn3.weight: requires_grad = True
0.model.layer1.0.bn3.bias: requires_grad = True
0.model.layer1.0.downsample.0.weight: requires_grad = False
0.model.layer1.0.downsample.1.weight: requires_grad = True
0.model.layer1.0.downsample.1.bias: requires_grad = True
0.model.layer1.1.conv1.weight: requires_grad = False
0.model.layer1.1.bn1.weight: requires_grad = True
0.model.layer1.1.bn1.bias: requires_grad = Tru

In [2]:
learn.fit_one_cycle(3)

epoch,train_loss,valid_loss,error_rate,accuracy,time
0,0.172675,0.072987,0.017591,0.982409,00:18
1,0.064944,0.025864,0.006089,0.993911,00:17
2,0.036647,0.027223,0.007442,0.992558,00:17


  return F.conv2d(input, weight, bias, self.stride,


## With LoRA

In [3]:
from fastai.vision.all import *
from lora_adapters import LoraConv2d, apply_adapter, mark_only_lora_as_trainable

path = untar_data(URLs.PETS)/'images'
dls = ImageDataLoaders.from_name_func(
    path, get_image_files(path), valid_pct=0.2,
    label_func=lambda x: x[0].isupper(), item_tfms=Resize(224))

learn = vision_learner(dls, "resnet50", metrics=[error_rate, accuracy])
# learn.freeze_to(2)

learn.model[0].model = apply_adapter(learn.model[0].model, LoraConv2d, rank=16)
learn.model[0].model = mark_only_lora_as_trainable(learn.model[0].model)


def count_parameters(model):
    trainable_params = 0
    non_trainable_params = 0
    
    for param in model.parameters():
        if param.requires_grad:
            trainable_params += param.numel()
        else:
            non_trainable_params += param.numel()
    
    print(f"Trainable parameters: {trainable_params:,}")
    print(f"Non-trainable parameters: {non_trainable_params:,}")
    print(f"Total parameters: {trainable_params + non_trainable_params:,}")
    
    return trainable_params, non_trainable_params

count_parameters(learn.model)

for name, param in learn.model.named_parameters():
    print(f"{name}: requires_grad = {param.requires_grad}")

learn.summary()

Trainable parameters: 3,910,960
Non-trainable parameters: 23,508,032
Total parameters: 27,418,992
0.model.conv1.weight: requires_grad = False
0.model.conv1.lora_A: requires_grad = True
0.model.conv1.lora_B: requires_grad = True
0.model.bn1.weight: requires_grad = False
0.model.bn1.bias: requires_grad = False
0.model.layer1.0.conv1.weight: requires_grad = False
0.model.layer1.0.conv1.lora_A: requires_grad = True
0.model.layer1.0.conv1.lora_B: requires_grad = True
0.model.layer1.0.bn1.weight: requires_grad = False
0.model.layer1.0.bn1.bias: requires_grad = False
0.model.layer1.0.conv2.weight: requires_grad = False
0.model.layer1.0.conv2.lora_A: requires_grad = True
0.model.layer1.0.conv2.lora_B: requires_grad = True
0.model.layer1.0.bn2.weight: requires_grad = False
0.model.layer1.0.bn2.bias: requires_grad = False
0.model.layer1.0.conv3.weight: requires_grad = False
0.model.layer1.0.conv3.lora_A: requires_grad = True
0.model.layer1.0.conv3.lora_B: requires_grad = True
0.model.layer1.0.bn

Sequential (Input shape: 64 x 3 x 224 x 224)
Layer (type)         Output Shape         Param #    Trainable 
                     64 x 64 x 112 x 112 
LoraConv2d                                61936      False     
BatchNorm2d                               128        False     
ReLU                                                           
____________________________________________________________________________
                     64 x 64 x 56 x 56   
MaxPool2d                                                      
LoraConv2d                                6144       False     
BatchNorm2d                               128        False     
ReLU                                                           
LoraConv2d                                55296      False     
BatchNorm2d                               128        False     
Identity                                                       
ReLU                                                           
Identity                  

In [4]:
learn.fit_one_cycle(3)

epoch,train_loss,valid_loss,error_rate,accuracy,time
0,0.195193,0.120581,0.015562,0.984438,00:20
1,0.081211,0.031215,0.008119,0.991881,00:21
2,0.045041,0.023882,0.003383,0.996617,00:22


In [5]:
import timm
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = timm.create_model('resnet50', pretrained=True).to(device)
apply_adapter(model, LoraConv2d, rank=16)

trainable_layers = []
for name, param in model.named_parameters():

    trainable_layers.append((name, param.numel(), param.requires_grad))

    print(f"Layer: {name}")
    print(f"  Trainable: {param.requires_grad}")
    print(f"  Shape: {param.shape}")
    print("---")

Layer: conv1.weight
  Trainable: False
  Shape: torch.Size([64, 3, 7, 7])
---
Layer: conv1.lora_A
  Trainable: True
  Shape: torch.Size([112, 21])
---
Layer: conv1.lora_B
  Trainable: True
  Shape: torch.Size([448, 112])
---
Layer: bn1.weight
  Trainable: True
  Shape: torch.Size([64])
---
Layer: bn1.bias
  Trainable: True
  Shape: torch.Size([64])
---
Layer: layer1.0.conv1.weight
  Trainable: False
  Shape: torch.Size([64, 64, 1, 1])
---
Layer: layer1.0.conv1.lora_A
  Trainable: True
  Shape: torch.Size([16, 64])
---
Layer: layer1.0.conv1.lora_B
  Trainable: True
  Shape: torch.Size([64, 16])
---
Layer: layer1.0.bn1.weight
  Trainable: True
  Shape: torch.Size([64])
---
Layer: layer1.0.bn1.bias
  Trainable: True
  Shape: torch.Size([64])
---
Layer: layer1.0.conv2.weight
  Trainable: False
  Shape: torch.Size([64, 64, 3, 3])
---
Layer: layer1.0.conv2.lora_A
  Trainable: True
  Shape: torch.Size([48, 192])
---
Layer: layer1.0.conv2.lora_B
  Trainable: True
  Shape: torch.Size([192, 48])

In [6]:
trainable_layers

[('conv1.weight', 9408, False),
 ('conv1.lora_A', 2352, True),
 ('conv1.lora_B', 50176, True),
 ('bn1.weight', 64, True),
 ('bn1.bias', 64, True),
 ('layer1.0.conv1.weight', 4096, False),
 ('layer1.0.conv1.lora_A', 1024, True),
 ('layer1.0.conv1.lora_B', 1024, True),
 ('layer1.0.bn1.weight', 64, True),
 ('layer1.0.bn1.bias', 64, True),
 ('layer1.0.conv2.weight', 36864, False),
 ('layer1.0.conv2.lora_A', 9216, True),
 ('layer1.0.conv2.lora_B', 9216, True),
 ('layer1.0.bn2.weight', 64, True),
 ('layer1.0.bn2.bias', 64, True),
 ('layer1.0.conv3.weight', 16384, False),
 ('layer1.0.conv3.lora_A', 1024, True),
 ('layer1.0.conv3.lora_B', 4096, True),
 ('layer1.0.bn3.weight', 256, True),
 ('layer1.0.bn3.bias', 256, True),
 ('layer1.0.downsample.0.weight', 16384, False),
 ('layer1.0.downsample.0.lora_A', 1024, True),
 ('layer1.0.downsample.0.lora_B', 4096, True),
 ('layer1.0.downsample.1.weight', 256, True),
 ('layer1.0.downsample.1.bias', 256, True),
 ('layer1.1.conv1.weight', 16384, False),
 