## Basic Setup

In [None]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10
from torchvision.models import resnet50
from torch.utils.data import DataLoader
import time
import os

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

ModuleNotFoundError: No module named 'torch'

In [None]:
# Load pretrained ResNet50 and adjust for CIFAR-10
model = resnet50(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 10)  # Adjust final layer for CIFAR-10
model = model.to(device)

In [None]:
# Fine-tune on CIFAR-10 train set
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor()
])
transform_test = transforms.Compose([
    transforms.ToTensor()
])

train_set = CIFAR10(root='./data', train=True, download=True, transform=transform_train)
test_set = CIFAR10(root='./data', train=False, download=True, transform=transform_test)

train_loader = DataLoader(train_set, batch_size=128, shuffle=True)
test_loader = DataLoader(test_set, batch_size=128, shuffle=False)


In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)

def train(model, epochs=10):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader):.4f}")



In [None]:
train(model, epochs=10)

# Save the fine-tuned original model
original_model = deepcopy(model)

In [None]:
# Function to compress layers
def compress_layer(layer, epsilon=0.10):
    if isinstance(layer, nn.Linear):
        W = layer.weight.data.cpu()
        U, S, Vh = torch.linalg.svd(W, full_matrices=False)
        energy = torch.cumsum(S**2, dim=0) / torch.sum(S**2)
        rank = torch.searchsorted(energy, 1 - epsilon).item() + 1
        if rank * (W.shape[0] + W.shape[1]) < W.numel():
            U_r = U[:, :rank] @ torch.diag(S[:rank])
            V_r = Vh[:rank, :]
            compressed_layer = nn.Sequential(
                nn.Linear(W.shape[1], rank, bias=False),
                nn.Linear(rank, W.shape[0], bias=True)
            )
            compressed_layer[0].weight.data = V_r.to(device)
            compressed_layer[1].weight.data = U_r.to(device)
            compressed_layer[1].bias.data = layer.bias.data.to(device)
            return compressed_layer
    elif isinstance(layer, nn.Conv2d):
        W = layer.weight.data.cpu()
        OC, IC, kH, kW = W.shape
        W_flat = W.view(OC, -1)
        U, S, Vh = torch.linalg.svd(W_flat, full_matrices=False)
        energy = torch.cumsum(S**2, dim=0) / torch.sum(S**2)
        rank = torch.searchsorted(energy, 1 - epsilon).item() + 1
        if rank * (IC * kH * kW + OC) < W.numel():
            U_r = U[:, :rank] @ torch.diag(S[:rank])
            V_r = Vh[:rank, :]
            conv1 = nn.Conv2d(in_channels=IC, out_channels=rank, kernel_size=1, stride=1, padding=0, bias=False)
            conv2 = nn.Conv2d(in_channels=rank, out_channels=OC, kernel_size=(kH, kW), stride=layer.stride, padding=layer.padding, bias=(layer.bias is not None))
            conv1.weight.data = V_r.view(rank, IC, kH, kW).to(device)
            conv2.weight.data = U_r.view(OC, rank, 1, 1).to(device)
            if layer.bias is not None:
                conv2.bias.data = layer.bias.data.to(device)
            return nn.Sequential(conv1, conv2)
    return layer

In [None]:
# Compress the model
for name, module in model.named_modules():
    if isinstance(module, (nn.Linear, nn.Conv2d)):
        parent, attr = name.rsplit('.', 1)
        parent_module = model
        for part in parent.split('.'):
            parent_module = getattr(parent_module, part)
        setattr(parent_module, attr, compress_layer(module, epsilon=0.10))

# Save the compressed model before fine-tuning
compressed_model = deepcopy(model)

In [None]:

# Fine-tune the compressed model
train(model, epochs=5)


In [None]:
# Evaluate models
def evaluate(model):
    model.eval()
    correct = 0
    total = 0
    start_time = time.time()
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    end_time = time.time()
    acc = correct / total
    latency = end_time - start_time
    return acc, latency

# Measure size
def get_size(model):
    torch.save(model.state_dict(), "temp.p")
    size = os.path.getsize("temp.p") / 1e6
    os.remove("temp.p")
    return size


In [None]:
# Compare models
acc_orig, latency_orig = evaluate(original_model)
acc_comp, latency_comp = evaluate(compressed_model)
acc_tuned_comp, latency_tuned_comp = evaluate(model)

size_orig = get_size(original_model)
size_comp = get_size(compressed_model)
size_tuned_comp = get_size(model)

print(f"Original -> acc: {acc_orig:.4f}, latency: {latency_orig:.2f}s, size: {size_orig:.2f}MB")
print(f"Compressed -> acc: {acc_comp:.4f}, latency: {latency_comp:.2f}s, size: {size_comp:.2f}MB")
print(f"Tuned Compressed -> acc: {acc_tuned_comp:.4f}, latency: {latency_tuned_comp:.2f}s, size: {size_tuned_comp:.2f}MB")