## Basic Setup

In [None]:
# source for utility functions: get_device, get_cifar10_loaders, get_resnet50_for_cifar10
#                               train, evaluate, estimate_latency, get_size
%run utils.ipynb


[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [None]:
import torch
import torch.nn as nn
from copy import deepcopy

# get device
device = get_device()

# get the CIFAR-10 data loaders
train_loader, val_loader, test_loader = get_cifar10_loaders()

# Get architecture for CIFAR-10 training
model = get_resnet50_for_cifar10(device)

Full train set size: 50000
Train ratio: 0.9
Train samples: 45000
Validation samples: 5000
Test samples: 10000
Number of training batches: 352
Number of validation batches: 40
Number of test batches: 79


## Train and Evaluate full model

In [9]:
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = torch.nn.CrossEntropyLoss()
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=3, factor=0.5)

train(
    model,
    train_loader,
    val_loader,
    optimizer,
    criterion,
    device,
    epochs=50,
    scheduler=scheduler,
    grad_clip=1.0,
    save_path="full_model_resnet50_best_model.pt",
    early_stopping_patience=5,
    resume=True,
)

# Save the fine-tuned original model
original_model = deepcopy(model)

🔁 Resumed training from epoch 4


                                                               

Epoch   5 | Train Loss: 0.4172 | Acc: 0.8527
          | Val   Loss: 0.6039 | Acc: 0.8130
          | ✅ New best model saved to 'full_model_resnet50_best_model.pt'


                                                               

Epoch   6 | Train Loss: 0.3214 | Acc: 0.8864
          | Val   Loss: 0.6077 | Acc: 0.8152
          | No improvement for 1 epoch(s)


                                                               

Epoch   7 | Train Loss: 0.2445 | Acc: 0.9149
          | Val   Loss: 0.6085 | Acc: 0.8206
          | No improvement for 2 epoch(s)


                                                               

Epoch   8 | Train Loss: 0.1817 | Acc: 0.9368
          | Val   Loss: 0.6411 | Acc: 0.8256
          | No improvement for 3 epoch(s)


                                                               

Epoch   9 | Train Loss: 0.1383 | Acc: 0.9521
          | Val   Loss: 0.6303 | Acc: 0.8366
          | No improvement for 4 epoch(s)


                                                                

Epoch  10 | Train Loss: 0.0373 | Acc: 0.9880
          | Val   Loss: 0.6574 | Acc: 0.8560
          | No improvement for 5 epoch(s)
🛑 Early stopping triggered after 5 epochs without improvement.
Training complete.


In [26]:
# Function to compress layers
def compress_layer(layer, epsilon=0.10):
    """
    Compresses a layer using SVD if the compression is beneficial.
    Args:
        layer (nn.Module): The layer to compress.
        epsilon (float): The energy threshold for compression.
    Returns:
        nn.Module: The compressed layer or the original layer if compression is not beneficial.
    """

    # handle Linear layers
    if isinstance(layer, nn.Linear):
        # get linear layer weight matrix
        W = layer.weight.data.cpu()
        
        # run SVD on flat weight matrix
        U, S, Vh = torch.linalg.svd(W, full_matrices=False)

        # find rank that capture the asked energy (1-epsilon)
        energy = torch.cumsum(S**2, dim=0) / torch.sum(S**2)
        rank = torch.searchsorted(energy, 1 - epsilon).item() + 1

        # check that factorization actually reduces number of parameters
        old_size = W.numel()
        new_size = rank * (W.shape[0] + W.shape[1])
        if new_size < old_size:
            # define low rank factorization from SVD and rank
            U_r = U[:, :rank] @ torch.diag(S[:rank])
            V_r = Vh[:rank, :]

            # define two linear layers to replace the original linear layer
            compressed_layer = nn.Sequential(
                nn.Linear(W.shape[1], rank, bias=False),
                nn.Linear(rank, W.shape[0], bias=True)
            )
            compressed_layer[0].weight.data = V_r.to(device)
            compressed_layer[1].weight.data = U_r.to(device)
            compressed_layer[1].bias.data = layer.bias.data.to(device)
            return compressed_layer, old_size, new_size
        
    # handle Conv2d layers
    elif isinstance(layer, nn.Conv2d):
        # get convolution weight 4d matrix, shape: [out_channels, in_channels, kH, kW]
        W = layer.weight.data.cpu()  
        OC, IC, kH, kW = W.shape

        # reshape to 2d matrix, with shape: [OC, IC*kH*kW]
        W_flat = W.view(OC, -1)

        # run SVD on flat weight matrix        
        U, S, Vh = torch.linalg.svd(W_flat, full_matrices=False)

        # find rank that capture the asked energy (1-epsilon)
        energy = torch.cumsum(S**2, dim=0) / torch.sum(S**2)
        rank = torch.searchsorted(energy, 1 - epsilon).item() + 1

        # check that factorization actually reduces number of parameters
        old_size = W.numel()
        new_size = rank * (IC * kH * kW + OC)
        if new_size < old_size:
            # define low rank factorization from SVD and rank
            U_r = U[:, :rank] @ torch.diag(S[:rank])
            V_r = Vh[:rank, :]

            # define two convolutional layers to replace the original convolutional layer
            conv1 = nn.Conv2d(
                in_channels=IC,
                out_channels=rank,
                kernel_size=1,
                stride=1,
                padding=0,
                bias=False
            )
            conv2 = nn.Conv2d(
                in_channels=rank,
                out_channels=OC,
                kernel_size=(kH, kW),
                stride=layer.stride,
                padding=layer.padding,
                bias=(layer.bias is not None)
            )
            conv1.weight.data = V_r.view(rank, IC, kH, kW).to(device)
            conv2.weight.data = U_r.view(OC, rank, 1, 1).to(device)
            if layer.bias is not None:
                conv2.bias.data = layer.bias.data.to(device)
            return nn.Sequential(conv1, conv2), old_size, new_size

    return layer, 0, 0  # return the original layer if compression is not beneficial

In [27]:
def compress_model(model, epsilon=0.50):
    """
    Compresses the given model by applying SVD-based compression to Linear and Conv2d layers.
    
    Args:
        model (nn.Module): The model to compress.
        epsilon (float): The energy threshold for compression.
    
    Returns:
        nn.Module: The compressed model.
    """
   
    compressed_model = deepcopy(model)  # Create a copy of the input model

    total_old_size = 0
    total_new_size = 0

    for name, module in compressed_model.named_modules():
        if isinstance(module, (nn.Linear, nn.Conv2d)):
            if '.' in name:  # Check if the module has a parent
                parent, attr = name.rsplit('.', 1)
                parent_module = compressed_model
                for part in parent.split('.'):
                    parent_module = getattr(parent_module, part)
            else:  # Handle top-level modules
                parent_module = compressed_model
                attr = name
            new_layer, old_size, new_size = compress_layer(module, epsilon)
            total_old_size += old_size
            total_new_size += new_size
            setattr(parent_module, attr, new_layer)
    
    return compressed_model, total_old_size, total_new_size

In [29]:
# Evaluate and print metrics for the original model
acc_orig = evaluate(original_model, test_loader, device)
example_input = torch.rand(128, 3, 32, 32).to(device)
orig_latency_mu, orig_latency_std = estimate_latency(original_model, example_input)
size_orig = get_size(original_model)
print(f"Original -> acc: {100*acc_orig:.2f}%, latency: {orig_latency_mu:.2f} ± {orig_latency_std:.2f} ms, size: {size_orig:.2f}MB")

# Iterate over epsilon values
for epsilon in [round(x * 0.1, 2) for x in range(1, 10)]:
    print(f"\nCompressing model with epsilon = {epsilon}...")
    
    # Compress the model
    compressed_model, total_old_size, total_new_size = compress_model(original_model, epsilon=epsilon)
    
    # Evaluate compressed model before fine-tuning
    acc_comp = evaluate(compressed_model, test_loader, device)
    print(f"Old size: {total_old_size}, New size: {total_new_size}, Parameter count reduction: {total_old_size-total_new_size}")
    print(f"Compressed -> acc before tuning: {100*acc_comp:.2f}%")
    
    # Fine-tune the compressed model
    optimizer = torch.optim.Adam(compressed_model.parameters(), lr=1e-3)
    criterion = torch.nn.CrossEntropyLoss()
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=2, factor=0.5)
    
    train(
        compressed_model,
        train_loader,
        val_loader,
        optimizer,
        criterion,
        device,
        epochs=50,
        scheduler=scheduler,
        grad_clip=1.0,
        save_path=f"compressed_model_epsilon_{epsilon}_best_model.pt",
        early_stopping_patience=3,
        resume=False,
    )
    
    # Evaluate compressed model after fine-tuning
    acc_tuned_comp = evaluate(compressed_model, test_loader, device)
    comp_latency_mu, comp_latency_std = estimate_latency(compressed_model, example_input)
    size_comp = get_size(compressed_model)
    
    # Print metrics for the fine-tuned compressed model
    print(f"Compressed -> acc after tuning: {100*acc_tuned_comp:.2f}%, latency: {comp_latency_mu:.2f} ± {comp_latency_std:.2f} ms, size: {size_comp:.2f}MB")

Evaluating: 100%|██████████| 79/79 [00:06<00:00, 11.57batch/s]
Measuring latency: 100%|██████████| 50/50 [00:03<00:00, 12.74it/s]


Original -> acc: 84.63%, latency: 72.00 ± 0.06 ms, size: 94.38MB

Compressing model with epsilon = 0.1...


Evaluating: 100%|██████████| 79/79 [00:07<00:00, 10.14batch/s]


Old size: 23463616, New size: 17327424, Parameter count reduction: 6136192
Compressed -> acc before tuning: 34.17%


                                                               

Epoch   1 | Train Loss: 0.4461 | Acc: 0.8448
          | Val   Loss: 0.8771 | Acc: 0.7516
          | ✅ New best model saved to 'compressed_model_epsilon_0.1_best_model.pt'


                                                               

Epoch   2 | Train Loss: 0.3322 | Acc: 0.8840
          | Val   Loss: 0.9339 | Acc: 0.7312
          | No improvement for 1 epoch(s)


                                                               

Epoch   3 | Train Loss: 0.2629 | Acc: 0.9067
          | Val   Loss: 0.8698 | Acc: 0.7678
          | ✅ New best model saved to 'compressed_model_epsilon_0.1_best_model.pt'


                                                               

Epoch   4 | Train Loss: 0.2174 | Acc: 0.9227
          | Val   Loss: 0.7878 | Acc: 0.7864
          | ✅ New best model saved to 'compressed_model_epsilon_0.1_best_model.pt'


                                                               

Epoch   5 | Train Loss: 0.1782 | Acc: 0.9371
          | Val   Loss: 0.7534 | Acc: 0.8180
          | ✅ New best model saved to 'compressed_model_epsilon_0.1_best_model.pt'


                                                               

Epoch   6 | Train Loss: 0.1542 | Acc: 0.9458
          | Val   Loss: 0.6098 | Acc: 0.8380
          | ✅ New best model saved to 'compressed_model_epsilon_0.1_best_model.pt'


                                                               

Epoch   7 | Train Loss: 0.1343 | Acc: 0.9526
          | Val   Loss: 0.6638 | Acc: 0.8276
          | No improvement for 1 epoch(s)


                                                               

Epoch   8 | Train Loss: 0.1170 | Acc: 0.9596
          | Val   Loss: 0.7998 | Acc: 0.8248
          | No improvement for 2 epoch(s)


                                                               

Epoch   9 | Train Loss: 0.1061 | Acc: 0.9630
          | Val   Loss: 0.8311 | Acc: 0.8176
          | No improvement for 3 epoch(s)
🛑 Early stopping triggered after 3 epochs without improvement.
Training complete.


Evaluating: 100%|██████████| 79/79 [00:07<00:00, 10.13batch/s]
Measuring latency: 100%|██████████| 50/50 [00:04<00:00, 11.24it/s]


Compressed -> acc after tuning: 80.78%, latency: 84.57 ± 0.09 ms, size: 69.86MB

Compressing model with epsilon = 0.2...


Evaluating: 100%|██████████| 79/79 [00:07<00:00, 10.83batch/s]


Old size: 23467712, New size: 11210176, Parameter count reduction: 12257536
Compressed -> acc before tuning: 32.09%


                                                               

Epoch   1 | Train Loss: 0.5005 | Acc: 0.8270
          | Val   Loss: 1.1856 | Acc: 0.7060
          | ✅ New best model saved to 'compressed_model_epsilon_0.2_best_model.pt'


                                                               

Epoch   2 | Train Loss: 0.3590 | Acc: 0.8740
          | Val   Loss: 0.8112 | Acc: 0.7510
          | ✅ New best model saved to 'compressed_model_epsilon_0.2_best_model.pt'


                                                               

Epoch   3 | Train Loss: 0.2845 | Acc: 0.8992
          | Val   Loss: 0.8876 | Acc: 0.7540
          | No improvement for 1 epoch(s)


                                                               

Epoch   4 | Train Loss: 0.2337 | Acc: 0.9173
          | Val   Loss: 0.6625 | Acc: 0.8162
          | ✅ New best model saved to 'compressed_model_epsilon_0.2_best_model.pt'


                                                               

Epoch   5 | Train Loss: 0.1936 | Acc: 0.9324
          | Val   Loss: 0.7539 | Acc: 0.8068
          | No improvement for 1 epoch(s)


                                                               

Epoch   6 | Train Loss: 0.1604 | Acc: 0.9426
          | Val   Loss: 0.8684 | Acc: 0.7918
          | No improvement for 2 epoch(s)


                                                               

Epoch   7 | Train Loss: 0.1430 | Acc: 0.9496
          | Val   Loss: 0.7463 | Acc: 0.8120
          | No improvement for 3 epoch(s)
🛑 Early stopping triggered after 3 epochs without improvement.
Training complete.


Evaluating: 100%|██████████| 79/79 [00:07<00:00, 10.83batch/s]
Measuring latency: 100%|██████████| 50/50 [00:04<00:00, 12.19it/s]


Compressed -> acc after tuning: 80.18%, latency: 78.12 ± 0.09 ms, size: 45.37MB

Compressing model with epsilon = 0.3...


Evaluating: 100%|██████████| 79/79 [00:06<00:00, 11.55batch/s]


Old size: 23467712, New size: 7052645, Parameter count reduction: 16415067
Compressed -> acc before tuning: 27.84%


                                                               

Epoch   1 | Train Loss: 0.5560 | Acc: 0.8097
          | Val   Loss: 0.7305 | Acc: 0.7640
          | ✅ New best model saved to 'compressed_model_epsilon_0.3_best_model.pt'


                                                               

Epoch   2 | Train Loss: 0.3905 | Acc: 0.8650
          | Val   Loss: 0.6281 | Acc: 0.8052
          | ✅ New best model saved to 'compressed_model_epsilon_0.3_best_model.pt'


                                                               

Epoch   3 | Train Loss: 0.3089 | Acc: 0.8924
          | Val   Loss: 0.6043 | Acc: 0.8246
          | ✅ New best model saved to 'compressed_model_epsilon_0.3_best_model.pt'


                                                               

Epoch   4 | Train Loss: 0.2500 | Acc: 0.9128
          | Val   Loss: 0.7441 | Acc: 0.7994
          | No improvement for 1 epoch(s)


                                                               

Epoch   5 | Train Loss: 0.2092 | Acc: 0.9258
          | Val   Loss: 0.6862 | Acc: 0.8172
          | No improvement for 2 epoch(s)


                                                               

Epoch   6 | Train Loss: 0.1747 | Acc: 0.9390
          | Val   Loss: 0.6719 | Acc: 0.8220
          | No improvement for 3 epoch(s)
🛑 Early stopping triggered after 3 epochs without improvement.
Training complete.


Evaluating: 100%|██████████| 79/79 [00:06<00:00, 11.81batch/s]
Measuring latency: 100%|██████████| 50/50 [00:03<00:00, 13.49it/s]


Compressed -> acc after tuning: 81.17%, latency: 70.42 ± 0.08 ms, size: 28.74MB

Compressing model with epsilon = 0.4...


Evaluating: 100%|██████████| 79/79 [00:06<00:00, 11.88batch/s]


Old size: 23467712, New size: 4350693, Parameter count reduction: 19117019
Compressed -> acc before tuning: 23.57%


                                                               

Epoch   1 | Train Loss: 0.6129 | Acc: 0.7937
          | Val   Loss: 0.8399 | Acc: 0.7372
          | ✅ New best model saved to 'compressed_model_epsilon_0.4_best_model.pt'


                                                               

Epoch   2 | Train Loss: 0.4266 | Acc: 0.8524
          | Val   Loss: 0.6459 | Acc: 0.7890
          | ✅ New best model saved to 'compressed_model_epsilon_0.4_best_model.pt'


                                                               

Epoch   3 | Train Loss: 0.3436 | Acc: 0.8819
          | Val   Loss: 0.5853 | Acc: 0.8182
          | ✅ New best model saved to 'compressed_model_epsilon_0.4_best_model.pt'


                                                               

Epoch   4 | Train Loss: 0.2736 | Acc: 0.9048
          | Val   Loss: 0.6692 | Acc: 0.8118
          | No improvement for 1 epoch(s)


                                                               

Epoch   5 | Train Loss: 0.2305 | Acc: 0.9197
          | Val   Loss: 0.5705 | Acc: 0.8338
          | ✅ New best model saved to 'compressed_model_epsilon_0.4_best_model.pt'


                                                               

Epoch   6 | Train Loss: 0.1989 | Acc: 0.9312
          | Val   Loss: 1.0855 | Acc: 0.7498
          | No improvement for 1 epoch(s)


                                                               

Epoch   7 | Train Loss: 0.1701 | Acc: 0.9408
          | Val   Loss: 0.6112 | Acc: 0.8348
          | No improvement for 2 epoch(s)


                                                               

Epoch   8 | Train Loss: 0.1438 | Acc: 0.9506
          | Val   Loss: 0.8148 | Acc: 0.8114
          | No improvement for 3 epoch(s)
🛑 Early stopping triggered after 3 epochs without improvement.
Training complete.


Evaluating: 100%|██████████| 79/79 [00:06<00:00, 11.99batch/s]
Measuring latency: 100%|██████████| 50/50 [00:03<00:00, 13.81it/s]


Compressed -> acc after tuning: 79.75%, latency: 68.78 ± 0.06 ms, size: 17.93MB

Compressing model with epsilon = 0.5...


Evaluating: 100%|██████████| 79/79 [00:06<00:00, 12.32batch/s]


Old size: 23467712, New size: 2651109, Parameter count reduction: 20816603
Compressed -> acc before tuning: 14.76%


                                                               

Epoch   1 | Train Loss: 0.7369 | Acc: 0.7494
          | Val   Loss: 0.8405 | Acc: 0.7260
          | ✅ New best model saved to 'compressed_model_epsilon_0.5_best_model.pt'


                                                               

Epoch   2 | Train Loss: 0.5223 | Acc: 0.8220
          | Val   Loss: 0.6998 | Acc: 0.7806
          | ✅ New best model saved to 'compressed_model_epsilon_0.5_best_model.pt'


                                                               

Epoch   3 | Train Loss: 0.4289 | Acc: 0.8539
          | Val   Loss: 0.7971 | Acc: 0.7670
          | No improvement for 1 epoch(s)


                                                               

Epoch   4 | Train Loss: 0.3634 | Acc: 0.8802
          | Val   Loss: 0.7921 | Acc: 0.7742
          | No improvement for 2 epoch(s)


                                                               

Epoch   5 | Train Loss: 0.3036 | Acc: 0.8962
          | Val   Loss: 0.7270 | Acc: 0.8006
          | No improvement for 3 epoch(s)
🛑 Early stopping triggered after 3 epochs without improvement.
Training complete.


Evaluating: 100%|██████████| 79/79 [00:06<00:00, 12.50batch/s]
Measuring latency: 100%|██████████| 50/50 [00:03<00:00, 14.47it/s]


Compressed -> acc after tuning: 78.98%, latency: 65.52 ± 0.03 ms, size: 11.14MB

Compressing model with epsilon = 0.6...


Evaluating: 100%|██████████| 79/79 [00:06<00:00, 12.35batch/s]


Old size: 23467712, New size: 1591744, Parameter count reduction: 21875968
Compressed -> acc before tuning: 11.10%


                                                               

Epoch   1 | Train Loss: 0.9198 | Acc: 0.6764
          | Val   Loss: 0.9328 | Acc: 0.6856
          | ✅ New best model saved to 'compressed_model_epsilon_0.6_best_model.pt'


                                                               

Epoch   2 | Train Loss: 0.6555 | Acc: 0.7787
          | Val   Loss: 0.8928 | Acc: 0.7176
          | ✅ New best model saved to 'compressed_model_epsilon_0.6_best_model.pt'


                                                               

Epoch   3 | Train Loss: 0.5443 | Acc: 0.8169
          | Val   Loss: 0.6799 | Acc: 0.7898
          | ✅ New best model saved to 'compressed_model_epsilon_0.6_best_model.pt'


                                                               

Epoch   4 | Train Loss: 0.4776 | Acc: 0.8407
          | Val   Loss: 0.7771 | Acc: 0.7708
          | No improvement for 1 epoch(s)


                                                               

Epoch   5 | Train Loss: 0.4127 | Acc: 0.8583
          | Val   Loss: 0.6792 | Acc: 0.8066
          | ✅ New best model saved to 'compressed_model_epsilon_0.6_best_model.pt'


                                                               

Epoch   6 | Train Loss: 0.3661 | Acc: 0.8745
          | Val   Loss: 0.7140 | Acc: 0.8004
          | No improvement for 1 epoch(s)


                                                               

Epoch   7 | Train Loss: 0.3264 | Acc: 0.8885
          | Val   Loss: 0.6911 | Acc: 0.8106
          | No improvement for 2 epoch(s)


                                                               

Epoch   8 | Train Loss: 0.2917 | Acc: 0.9009
          | Val   Loss: 0.6338 | Acc: 0.8176
          | ✅ New best model saved to 'compressed_model_epsilon_0.6_best_model.pt'


                                                               

Epoch   9 | Train Loss: 0.2628 | Acc: 0.9100
          | Val   Loss: 0.7146 | Acc: 0.8018
          | No improvement for 1 epoch(s)


                                                                

Epoch  10 | Train Loss: 0.2380 | Acc: 0.9192
          | Val   Loss: 0.7504 | Acc: 0.8228
          | No improvement for 2 epoch(s)


                                                                

Epoch  11 | Train Loss: 0.2097 | Acc: 0.9286
          | Val   Loss: 0.7755 | Acc: 0.8248
          | No improvement for 3 epoch(s)
🛑 Early stopping triggered after 3 epochs without improvement.
Training complete.


Evaluating: 100%|██████████| 79/79 [00:06<00:00, 12.61batch/s]
Measuring latency: 100%|██████████| 50/50 [00:03<00:00, 14.59it/s]


Compressed -> acc after tuning: 81.14%, latency: 64.47 ± 0.05 ms, size: 6.90MB

Compressing model with epsilon = 0.7...


Evaluating: 100%|██████████| 79/79 [00:06<00:00, 12.52batch/s]


Old size: 23467712, New size: 906688, Parameter count reduction: 22561024
Compressed -> acc before tuning: 10.00%


                                                               

Epoch   1 | Train Loss: 1.3905 | Acc: 0.4367
          | Val   Loss: 1.3247 | Acc: 0.4902
          | ✅ New best model saved to 'compressed_model_epsilon_0.7_best_model.pt'


                                                               

Epoch   2 | Train Loss: 1.1162 | Acc: 0.6055
          | Val   Loss: 1.4810 | Acc: 0.5576
          | No improvement for 1 epoch(s)


                                                               

Epoch   3 | Train Loss: 0.9700 | Acc: 0.6760
          | Val   Loss: 1.1312 | Acc: 0.6742
          | ✅ New best model saved to 'compressed_model_epsilon_0.7_best_model.pt'


                                                               

Epoch   4 | Train Loss: 0.8738 | Acc: 0.7152
          | Val   Loss: 1.0535 | Acc: 0.6850
          | ✅ New best model saved to 'compressed_model_epsilon_0.7_best_model.pt'


                                                               

Epoch   5 | Train Loss: 0.7948 | Acc: 0.7456
          | Val   Loss: 0.9522 | Acc: 0.7026
          | ✅ New best model saved to 'compressed_model_epsilon_0.7_best_model.pt'


                                                               

Epoch   6 | Train Loss: 0.7348 | Acc: 0.7683
          | Val   Loss: 1.0356 | Acc: 0.7174
          | No improvement for 1 epoch(s)


                                                               

Epoch   7 | Train Loss: 0.6766 | Acc: 0.7870
          | Val   Loss: 0.8800 | Acc: 0.7318
          | ✅ New best model saved to 'compressed_model_epsilon_0.7_best_model.pt'


                                                               

Epoch   8 | Train Loss: 0.6343 | Acc: 0.8021
          | Val   Loss: 0.9950 | Acc: 0.7582
          | No improvement for 1 epoch(s)


                                                               

Epoch   9 | Train Loss: 0.5789 | Acc: 0.8192
          | Val   Loss: 0.9425 | Acc: 0.7634
          | No improvement for 2 epoch(s)


                                                                

Epoch  10 | Train Loss: 0.5412 | Acc: 0.8330
          | Val   Loss: 0.9732 | Acc: 0.7606
          | No improvement for 3 epoch(s)
🛑 Early stopping triggered after 3 epochs without improvement.
Training complete.


Evaluating: 100%|██████████| 79/79 [00:06<00:00, 12.70batch/s]
Measuring latency: 100%|██████████| 50/50 [00:03<00:00, 14.66it/s]


Compressed -> acc after tuning: 75.46%, latency: 64.04 ± 0.06 ms, size: 4.16MB

Compressing model with epsilon = 0.8...


Evaluating: 100%|██████████| 79/79 [00:06<00:00, 12.67batch/s]


Old size: 23467712, New size: 482213, Parameter count reduction: 22985499
Compressed -> acc before tuning: 10.00%


                                                               

Epoch   1 | Train Loss: 1.5837 | Acc: 0.3614
          | Val   Loss: 1.4618 | Acc: 0.3984
          | ✅ New best model saved to 'compressed_model_epsilon_0.8_best_model.pt'


                                                               

Epoch   2 | Train Loss: 1.3018 | Acc: 0.5002
          | Val   Loss: 2.1693 | Acc: 0.4306
          | No improvement for 1 epoch(s)


                                                               

Epoch   3 | Train Loss: 1.1738 | Acc: 0.5642
          | Val   Loss: 1.2146 | Acc: 0.5266
          | ✅ New best model saved to 'compressed_model_epsilon_0.8_best_model.pt'


                                                               

Epoch   4 | Train Loss: 1.0855 | Acc: 0.5970
          | Val   Loss: 1.1796 | Acc: 0.5838
          | ✅ New best model saved to 'compressed_model_epsilon_0.8_best_model.pt'


                                                               

Epoch   5 | Train Loss: 1.0291 | Acc: 0.6253
          | Val   Loss: 1.0823 | Acc: 0.6046
          | ✅ New best model saved to 'compressed_model_epsilon_0.8_best_model.pt'


                                                               

Epoch   6 | Train Loss: 0.9722 | Acc: 0.6462
          | Val   Loss: 1.1848 | Acc: 0.5826
          | No improvement for 1 epoch(s)


                                                               

Epoch   7 | Train Loss: 0.9174 | Acc: 0.6678
          | Val   Loss: 1.1866 | Acc: 0.6330
          | No improvement for 2 epoch(s)


                                                               

Epoch   8 | Train Loss: 0.8772 | Acc: 0.6857
          | Val   Loss: 0.9860 | Acc: 0.6580
          | ✅ New best model saved to 'compressed_model_epsilon_0.8_best_model.pt'


                                                               

Epoch   9 | Train Loss: 0.8345 | Acc: 0.7020
          | Val   Loss: 1.0079 | Acc: 0.6666
          | No improvement for 1 epoch(s)


                                                                

Epoch  10 | Train Loss: 0.7959 | Acc: 0.7177
          | Val   Loss: 1.2625 | Acc: 0.6650
          | No improvement for 2 epoch(s)


                                                                

Epoch  11 | Train Loss: 0.7693 | Acc: 0.7313
          | Val   Loss: 1.1046 | Acc: 0.6966
          | No improvement for 3 epoch(s)
🛑 Early stopping triggered after 3 epochs without improvement.
Training complete.


Evaluating: 100%|██████████| 79/79 [00:06<00:00, 12.78batch/s]
Measuring latency: 100%|██████████| 50/50 [00:03<00:00, 14.79it/s]


Compressed -> acc after tuning: 68.11%, latency: 63.51 ± 0.07 ms, size: 2.46MB

Compressing model with epsilon = 0.9...


Evaluating: 100%|██████████| 79/79 [00:06<00:00, 12.23batch/s]


Old size: 23467712, New size: 210624, Parameter count reduction: 23257088
Compressed -> acc before tuning: 10.00%


                                                               

Epoch   1 | Train Loss: 1.9139 | Acc: 0.1928
          | Val   Loss: 1.8109 | Acc: 0.2346
          | ✅ New best model saved to 'compressed_model_epsilon_0.9_best_model.pt'


                                                               

Epoch   2 | Train Loss: 1.7823 | Acc: 0.2393
          | Val   Loss: 2.1895 | Acc: 0.2314
          | No improvement for 1 epoch(s)


                                                               

Epoch   3 | Train Loss: 1.7274 | Acc: 0.2713
          | Val   Loss: 1.7329 | Acc: 0.2652
          | ✅ New best model saved to 'compressed_model_epsilon_0.9_best_model.pt'


                                                               

Epoch   4 | Train Loss: 1.6910 | Acc: 0.2885
          | Val   Loss: 1.6790 | Acc: 0.2984
          | ✅ New best model saved to 'compressed_model_epsilon_0.9_best_model.pt'


                                                               

Epoch   5 | Train Loss: 1.6520 | Acc: 0.3144
          | Val   Loss: 1.6842 | Acc: 0.2912
          | No improvement for 1 epoch(s)


                                                               

Epoch   6 | Train Loss: 1.6225 | Acc: 0.3276
          | Val   Loss: 1.6374 | Acc: 0.3094
          | ✅ New best model saved to 'compressed_model_epsilon_0.9_best_model.pt'


                                                               

Epoch   7 | Train Loss: 1.5886 | Acc: 0.3424
          | Val   Loss: 1.6002 | Acc: 0.3298
          | ✅ New best model saved to 'compressed_model_epsilon_0.9_best_model.pt'


                                                               

Epoch   8 | Train Loss: 1.5730 | Acc: 0.3500
          | Val   Loss: 1.5541 | Acc: 0.3568
          | ✅ New best model saved to 'compressed_model_epsilon_0.9_best_model.pt'


                                                               

Epoch   9 | Train Loss: 1.5557 | Acc: 0.3587
          | Val   Loss: 1.5770 | Acc: 0.3556
          | No improvement for 1 epoch(s)


                                                                

Epoch  10 | Train Loss: 1.5352 | Acc: 0.3704
          | Val   Loss: 1.5570 | Acc: 0.3484
          | No improvement for 2 epoch(s)


                                                                

Epoch  11 | Train Loss: 1.5219 | Acc: 0.3750
          | Val   Loss: 1.6016 | Acc: 0.3384
          | No improvement for 3 epoch(s)
🛑 Early stopping triggered after 3 epochs without improvement.
Training complete.


Evaluating: 100%|██████████| 79/79 [00:06<00:00, 12.38batch/s]
Measuring latency: 100%|██████████| 50/50 [00:03<00:00, 14.22it/s]


Compressed -> acc after tuning: 35.11%, latency: 66.13 ± 0.06 ms, size: 1.37MB
