## Basic Setup

In [12]:
import torch
import torch.nn as nn
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models

import time
import os
from copy import deepcopy
import numpy as np

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [13]:
transform = transforms.Compose([
    transforms.Resize(32),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_loader = DataLoader(
    datasets.CIFAR10(root="./data", train=True, download=True, transform=transform),
    batch_size=128, shuffle=True
)

test_loader = DataLoader(
    datasets.CIFAR10(root="./data", train=False, download=True, transform=transform),
    batch_size=256
)

In [14]:
def get_resnet50_for_cifar10():
    model = models.resnet50(weights=None, num_classes=10)
    model.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
    model.maxpool = nn.Identity()
    return model.to(device)

model = get_resnet50_for_cifar10()


In [15]:
def train(model, loader, epochs=10, lr=0.01, save_path="model.pth"):
    if os.path.exists(save_path):
        print(f"Model already trained. Loading from {save_path}")
        model.load_state_dict(torch.load(save_path))
        return

    # no saved model found. training from given model state

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9)
    model.train()

    for epoch in range(epochs):
        running_loss = 0.0
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader):.4f}")

    torch.save(model.state_dict(), save_path)
    print(f"Training complete. Model saved to {save_path}")

# Evaluate models
def evaluate(model):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    acc = correct / total
    return acc

## Define helper functions to measure latency

In [16]:
class Timer:
    def __init__(self):
        self.use_cuda = torch.cuda.is_available()
        if self.use_cuda:
            self.starter = torch.cuda.Event(enable_timing=True)
            self.ender = torch.cuda.Event(enable_timing=True)

    def start(self):
        if self.use_cuda:
            self.starter.record()
        else:
            self.start_time = time.time()

    def stop(self):
        if self.use_cuda:
            self.ender.record()
            torch.cuda.synchronize()
            return self.starter.elapsed_time(self.ender)  # ms
        else:
            return (time.time() - self.start_time) * 1000  # ms

def estimate_latency(model, example_inputs, repetitions=50):
    timer = Timer()
    timings = np.zeros((repetitions, 1))

    # warm-up
    for _ in range(5):
        _ = model(example_inputs)

    with torch.no_grad():
        for rep in range(repetitions):
            timer.start()
            _ = model(example_inputs)
            elapsed = timer.stop()
            timings[rep] = elapsed

    return np.mean(timings), np.std(timings)

In [17]:

# Measure size
def get_size(model):
    torch.save(model.state_dict(), "temp.p")
    size = os.path.getsize("temp.p") / 1e6
    os.remove("temp.p")
    return size


## Train and Evaluate full model

In [18]:
train(model, train_loader, epochs=10, save_path="full_model_resnet50.pth")

# Save the fine-tuned original model
original_model = deepcopy(model)

Model already trained. Loading from full_model_resnet50.pth


In [19]:
# Function to compress layers
def compress_layer(layer, epsilon=0.10):
    if isinstance(layer, nn.Linear):
        # Handle Linear layers
        W = layer.weight.data.cpu()
        U, S, Vh = torch.linalg.svd(W, full_matrices=False)
        energy = torch.cumsum(S**2, dim=0) / torch.sum(S**2)
        rank = torch.searchsorted(energy, 1 - epsilon).item() + 1
        old_size = W.numel()
        new_size = rank * (W.shape[0] + W.shape[1])
        if new_size < old_size:
            print(f"Compressing Linear layer: old size = {old_size}, new size = {new_size}")
            U_r = U[:, :rank] @ torch.diag(S[:rank])
            V_r = Vh[:rank, :]
            compressed_layer = nn.Sequential(
                nn.Linear(W.shape[1], rank, bias=False),
                nn.Linear(rank, W.shape[0], bias=True)
            )
            compressed_layer[0].weight.data = V_r.to(device)
            compressed_layer[1].weight.data = U_r.to(device)
            compressed_layer[1].bias.data = layer.bias.data.to(device)
            return compressed_layer
    elif isinstance(layer, nn.Conv2d):
        # Handle Conv2d layers
        W = layer.weight.data.cpu()  # shape: [out_channels, in_channels, kH, kW]
        OC, IC, kH, kW = W.shape
        W_flat = W.view(OC, -1)  # shape: [OC, IC*kH*kW]
        U, S, Vh = torch.linalg.svd(W_flat, full_matrices=False)
        energy = torch.cumsum(S**2, dim=0) / torch.sum(S**2)
        rank = torch.searchsorted(energy, 1 - epsilon).item() + 1
        old_size = W.numel()
        new_size = rank * (IC * kH * kW + OC)
        if new_size < old_size:
            print(f"Compressing Conv2d layer: old size = {old_size}, new size = {new_size}")
            U_r = U[:, :rank] @ torch.diag(S[:rank])
            V_r = Vh[:rank, :]
            conv1 = nn.Conv2d(
                in_channels=IC,
                out_channels=rank,
                kernel_size=1,
                stride=1,
                padding=0,
                bias=False
            )
            conv2 = nn.Conv2d(
                in_channels=rank,
                out_channels=OC,
                kernel_size=(kH, kW),
                stride=layer.stride,
                padding=layer.padding,
                bias=(layer.bias is not None)
            )
            conv1.weight.data = V_r.view(rank, IC, kH, kW).to(device)
            conv2.weight.data = U_r.view(OC, rank, 1, 1).to(device)
            if layer.bias is not None:
                conv2.bias.data = layer.bias.data.to(device)
            return nn.Sequential(conv1, conv2)
    return layer  # Return the original layer if compression is not beneficial

In [20]:
# Compress the model
for name, module in model.named_modules():
    if isinstance(module, (nn.Linear, nn.Conv2d)):
        if '.' in name:  # Check if the module has a parent
            parent, attr = name.rsplit('.', 1)
            parent_module = model
            for part in parent.split('.'):
                parent_module = getattr(parent_module, part)
        else:  # Handle top-level modules
            parent_module = model
            attr = name
        setattr(parent_module, attr, compress_layer(module, epsilon=0.50))

# Save the compressed model before fine-tuning
compressed_model = deepcopy(model)

Compressing Conv2d layer: old size = 1728, new size = 455
Compressing Conv2d layer: old size = 4096, new size = 1536
Compressing Conv2d layer: old size = 36864, new size = 14080
Compressing Conv2d layer: old size = 16384, new size = 6400
Compressing Conv2d layer: old size = 16384, new size = 6080
Compressing Conv2d layer: old size = 16384, new size = 6400
Compressing Conv2d layer: old size = 36864, new size = 14720
Compressing Conv2d layer: old size = 16384, new size = 6400
Compressing Conv2d layer: old size = 16384, new size = 6400
Compressing Conv2d layer: old size = 36864, new size = 14720
Compressing Conv2d layer: old size = 16384, new size = 6400
Compressing Conv2d layer: old size = 32768, new size = 12288
Compressing Conv2d layer: old size = 147456, new size = 58880
Compressing Conv2d layer: old size = 65536, new size = 24960
Compressing Conv2d layer: old size = 131072, new size = 47616
Compressing Conv2d layer: old size = 65536, new size = 25600
Compressing Conv2d layer: old siz

In [21]:
train(model, train_loader, epochs=5, save_path=f"compressed_model_final_tuning.pth")

Epoch 1/5, Loss: 1.2146
Epoch 2/5, Loss: 0.8451
Epoch 3/5, Loss: 0.6818
Epoch 4/5, Loss: 0.5658
Epoch 5/5, Loss: 0.4714
Training complete. Model saved to compressed_model_final_tuning.pth


In [22]:
# Compare models
acc_orig = evaluate(original_model)
acc_comp = evaluate(compressed_model)
acc_tuned_comp = evaluate(model)

example_input = torch.rand(128, 3, 32, 32).to(device)
orig_latency_mu, orig_latency_std = estimate_latency(original_model, example_input)
comp_latency_mu, comp_latency_std = estimate_latency(compressed_model, example_input)
tuned_latency_mu, tuned_latency_std = estimate_latency(model, example_input)

size_orig = get_size(original_model)
size_comp = get_size(compressed_model)
size_tuned_comp = get_size(model)

print(f"Original -> acc: {100*acc_orig:.2f}%, latency: {orig_latency_mu:.2f} ± {orig_latency_std:.2f} ms, size: {size_orig:.2f}MB")
print(f"Compressed -> acc: {100%acc_comp:.2f}%, latency: {comp_latency_mu:.2f} ± {comp_latency_std:.2f} ms, size: {size_comp:.2f}MB")
print(f"Tuned Compressed -> acc: {100*acc_tuned_comp:.2f}%, {tuned_latency_mu:.2f} ± {tuned_latency_std:.2f} ms, size: {size_tuned_comp:.2f}MB")

Original -> acc: 72.73%, latency: 73.57 ± 0.06 ms, size: 94.38MB
Compressed -> acc: 0.10%, latency: 73.42 ± 0.04 ms, size: 37.06MB
Tuned Compressed -> acc: 72.70%, 73.49 ± 0.05 ms, size: 37.06MB
