## Basic Setup

In [None]:
# source for utlity functions:
# - get_device 
# - get_cifar10_loaders
# - get_resnet50_for_cifar10
# - train
# - evaluate
# - estimate_latency
# - get_size
%run utils.ipynb


In [None]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms, models

from copy import deepcopy


Using device: cuda


In [None]:
# get device
device = get_device()

In [None]:
# get the CIFAR-10 data loaders
train_loader, val_loader, test_loader = get_cifar10_loaders()

In [None]:
# Get architecture for CIFAR-10 training
model = get_resnet50_for_cifar10(device)

## Train and Evaluate full model

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = torch.nn.CrossEntropyLoss()
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=3, factor=0.5)

train(
    model,
    train_loader,
    val_loader,
    optimizer,
    criterion,
    device,
    epochs=50,
    scheduler=scheduler,
    grad_clip=1.0,
    save_path="full_model_resnet50_best_model.pt",
    early_stopping_patience=5,
    resume=True,
)

# Save the fine-tuned original model
original_model = deepcopy(model)

Model already trained. Loading from full_model_resnet50.pth


In [None]:
# Function to compress layers
def compress_layer(layer, epsilon=0.10):
    """
    Compresses a layer using SVD if the compression is beneficial.
    Args:
        layer (nn.Module): The layer to compress.
        epsilon (float): The energy threshold for compression.
    Returns:
        nn.Module: The compressed layer or the original layer if compression is not beneficial.
    """

    if isinstance(layer, nn.Linear):
        # handle Linear layers
        W = layer.weight.data.cpu()
        U, S, Vh = torch.linalg.svd(W, full_matrices=False)
        energy = torch.cumsum(S**2, dim=0) / torch.sum(S**2)
        rank = torch.searchsorted(energy, 1 - epsilon).item() + 1
        old_size = W.numel()
        new_size = rank * (W.shape[0] + W.shape[1])
        if new_size < old_size:
            print(f"Compressing Linear layer: old size = {old_size}, new size = {new_size}")
            U_r = U[:, :rank] @ torch.diag(S[:rank])
            V_r = Vh[:rank, :]
            compressed_layer = nn.Sequential(
                nn.Linear(W.shape[1], rank, bias=False),
                nn.Linear(rank, W.shape[0], bias=True)
            )
            compressed_layer[0].weight.data = V_r.to(device)
            compressed_layer[1].weight.data = U_r.to(device)
            compressed_layer[1].bias.data = layer.bias.data.to(device)
            return compressed_layer
        
    elif isinstance(layer, nn.Conv2d):
        # handle Conv2d layers
        W = layer.weight.data.cpu()  # shape: [out_channels, in_channels, kH, kW]
        OC, IC, kH, kW = W.shape
        W_flat = W.view(OC, -1)  # shape: [OC, IC*kH*kW]
        U, S, Vh = torch.linalg.svd(W_flat, full_matrices=False)
        energy = torch.cumsum(S**2, dim=0) / torch.sum(S**2)
        rank = torch.searchsorted(energy, 1 - epsilon).item() + 1
        old_size = W.numel()
        new_size = rank * (IC * kH * kW + OC)
        if new_size < old_size:
            print(f"Compressing Conv2d layer: old size = {old_size}, new size = {new_size}")
            U_r = U[:, :rank] @ torch.diag(S[:rank])
            V_r = Vh[:rank, :]
            conv1 = nn.Conv2d(
                in_channels=IC,
                out_channels=rank,
                kernel_size=1,
                stride=1,
                padding=0,
                bias=False
            )
            conv2 = nn.Conv2d(
                in_channels=rank,
                out_channels=OC,
                kernel_size=(kH, kW),
                stride=layer.stride,
                padding=layer.padding,
                bias=(layer.bias is not None)
            )
            conv1.weight.data = V_r.view(rank, IC, kH, kW).to(device)
            conv2.weight.data = U_r.view(OC, rank, 1, 1).to(device)
            if layer.bias is not None:
                conv2.bias.data = layer.bias.data.to(device)
            return nn.Sequential(conv1, conv2)
    return layer  # return the original layer if compression is not beneficial

In [None]:
def compress_model(model, epsilon=0.50):
    """
    Compresses the given model by applying SVD-based compression to Linear and Conv2d layers.
    
    Args:
        model (nn.Module): The model to compress.
        epsilon (float): The energy threshold for compression.
    
    Returns:
        nn.Module: The compressed model.
    """
    compressed_model = deepcopy(model)  # Create a copy of the input model
    for name, module in compressed_model.named_modules():
        if isinstance(module, (nn.Linear, nn.Conv2d)):
            if '.' in name:  # Check if the module has a parent
                parent, attr = name.rsplit('.', 1)
                parent_module = compressed_model
                for part in parent.split('.'):
                    parent_module = getattr(parent_module, part)
            else:  # Handle top-level modules
                parent_module = compressed_model
                attr = name
            setattr(parent_module, attr, compress_layer(module, epsilon))
    
    return compressed_model

Compressing Conv2d layer: old size = 1728, new size = 455
Compressing Conv2d layer: old size = 4096, new size = 1536
Compressing Conv2d layer: old size = 36864, new size = 14080
Compressing Conv2d layer: old size = 16384, new size = 6400
Compressing Conv2d layer: old size = 16384, new size = 6080
Compressing Conv2d layer: old size = 16384, new size = 6400
Compressing Conv2d layer: old size = 36864, new size = 14720
Compressing Conv2d layer: old size = 16384, new size = 6400
Compressing Conv2d layer: old size = 16384, new size = 6400
Compressing Conv2d layer: old size = 36864, new size = 14720
Compressing Conv2d layer: old size = 16384, new size = 6400
Compressing Conv2d layer: old size = 32768, new size = 12288
Compressing Conv2d layer: old size = 147456, new size = 58880
Compressing Conv2d layer: old size = 65536, new size = 24960
Compressing Conv2d layer: old size = 131072, new size = 47616
Compressing Conv2d layer: old size = 65536, new size = 25600
Compressing Conv2d layer: old siz

In [None]:
# Evaluate and print metrics for the original model
acc_orig = evaluate(original_model)
example_input = torch.rand(128, 3, 32, 32).to(device)
orig_latency_mu, orig_latency_std = estimate_latency(original_model, example_input)
size_orig = get_size(original_model)
print(f"Original -> acc: {100*acc_orig:.2f}%, latency: {orig_latency_mu:.2f} ± {orig_latency_std:.2f} ms, size: {size_orig:.2f}MB")

# Iterate over epsilon values
for epsilon in [round(x * 0.1, 2) for x in range(1, 10)]:
    print(f"\nCompressing model with epsilon = {epsilon}...")
    
    # Compress the model
    compressed_model = compress_model(original_model, epsilon=epsilon)
    
    # Evaluate compressed model before fine-tuning
    acc_comp = evaluate(compressed_model)
    print(f"Compressed -> acc before tuning: {100*acc_comp:.2f}%")
    
    # Fine-tune the compressed model
    optimizer = torch.optim.Adam(compressed_model.parameters(), lr=1e-3)
    criterion = torch.nn.CrossEntropyLoss()
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=3, factor=0.5)
    
    train(
        compressed_model,
        train_loader,
        val_loader,
        optimizer,
        criterion,
        device,
        epochs=5,
        scheduler=scheduler,
        grad_clip=1.0,
        save_path=f"compressed_model_epsilon_{epsilon}_best_model.pt",
        early_stopping_patience=3,
        resume=False,
    )
    
    # Evaluate compressed model after fine-tuning
    acc_tuned_comp = evaluate(compressed_model)
    comp_latency_mu, comp_latency_std = estimate_latency(compressed_model, example_input)
    size_comp = get_size(compressed_model)
    
    # Print metrics for the fine-tuned compressed model
    print(f"Compressed -> acc after tuning: {100*acc_tuned_comp:.2f}%, latency: {comp_latency_mu:.2f} ± {comp_latency_std:.2f} ms, size: {size_comp:.2f}MB")

Epoch 1/5, Loss: 1.2146
Epoch 2/5, Loss: 0.8451
Epoch 3/5, Loss: 0.6818
Epoch 4/5, Loss: 0.5658
Epoch 5/5, Loss: 0.4714
Training complete. Model saved to compressed_model_final_tuning.pth
