### Imports

In [1]:
import torch
import torch.nn as nn
import torch.functional as F
from data.dataset import (
    cifar10_trainloader,
    ciaf10_testloader,
    cifar100_trainloader,
    ciaf100_testloader,
)
from pruning.GraSP import saliency_scores, rank_by_saliency, apply_mask

  import pynvml  # type: ignore[import]


### Model and Dataloading

In [2]:
model = torch.hub.load("chenyaofo/pytorch-cifar-models", "cifar10_vgg16_bn", pretrained=False)
model100 = torch.hub.load("chenyaofo/pytorch-cifar-models", "cifar100_vgg16_bn", pretrained=False)
CEloss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-6)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
epochs = 5

Using cache found in C:\Users\Fatim_Sproj/.cache\torch\hub\chenyaofo_pytorch-cifar-models_master
Using cache found in C:\Users\Fatim_Sproj/.cache\torch\hub\chenyaofo_pytorch-cifar-models_master


In [3]:
train10 = cifar10_trainloader()
test10= ciaf10_testloader()
train100 = cifar100_trainloader(batch_size=256)
test100 =  ciaf100_testloader(batch_size=256)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


### Training till 20 percent

In [4]:

from torch.utils.data import Subset
import random

random.seed(42)
indices = random.sample(range(len(train10.dataset)), 500)
subset_500 = Subset(train10.dataset, indices)
subset_loader = torch.utils.data.DataLoader(subset_500, batch_size=64, shuffle=True)


In [5]:
def train_until(model,loss_fn, target_acc, train_loader, test_loader, device,epochs=50):
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for (x, y) in train_loader:
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            outputs = model(x)
            loss = loss_fn(outputs, y)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        model.eval()
        correct, total = 0, 0
        with torch.no_grad():
            for (x_val, y_val) in test_loader:
                x_val, y_val = x_val.to(device), y_val.to(device)
                preds = model(x_val)
                predicted = preds.argmax(dim=1)
                correct += (predicted == y_val).sum().item()
                total += y_val.size(0)
        val_acc = 100 * correct / total
        avg_loss = running_loss / len(train_loader)
        print(f"Epoch [{epoch+1}/{epochs}] - Loss: {avg_loss:.4f} | Val Acc: {val_acc:.2f}%")
        if val_acc >= target_acc:
            print(f"Stopping early at epoch {epoch+1} (val acc = {val_acc:.2f}%)")
            break
    return model


In [6]:
def train_until_masked(model, loss_fn, optimizer, target_acc, train_loader, test_loader, device, mask_dict, epochs=50):
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0

        for (x, y) in train_loader:
            x, y = x.to(device), y.to(device)

            optimizer.zero_grad()
            outputs = model(x)
            loss = loss_fn(outputs, y)
            loss.backward()
            optimizer.step()
            with torch.no_grad():
                for name, param in model.named_parameters():
                    if name in mask_dict:
                        param.mul_(mask_dict[name]) 

            running_loss += loss.item()

        model.eval()
        correct, total = 0, 0
        with torch.no_grad():
            for (x_val, y_val) in test_loader:
                x_val, y_val = x_val.to(device), y_val.to(device)
                preds = model(x_val)
                predicted = preds.argmax(dim=1)
                correct += (predicted == y_val).sum().item()
                total += y_val.size(0)
        val_acc = 100 * correct / total
        avg_loss = running_loss / len(train_loader)
        print(f"Epoch [{epoch+1}/{epochs}] - Loss: {avg_loss:.4f} | Val Acc: {val_acc:.2f}%")
        if val_acc >= target_acc:
            print(f"Stopping early at epoch {epoch+1} (val acc = {val_acc:.2f}%)")
            break
    return model


### Grasp

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

initial_target_acc = 20
final_target_sparsity = 0.8
stage_fractions = [0.5, 0.75, 1.0] 
target_accuracies = [40, 60]      
epochs_per_stage = 50

CEloss = torch.nn.CrossEntropyLoss()

print("\n[Stage 0] Training randomly initialized model to ~20% accuracy")
model = train_until(
    model=model,
    loss_fn=CEloss,
    target_acc=initial_target_acc,
    train_loader=train10,
    test_loader=test10,
    device=device,
    epochs=epochs_per_stage
)
torch.save(model.state_dict(), "stage0_trained_model.pt")
print("Saved Stage 0 trained model.\n")

current_mask = None
current_sparsity = 0.0

for stage_idx, fraction in enumerate(stage_fractions):
    target_sparsity = fraction * final_target_sparsity
    print(f"\n=== Pruning Stage {stage_idx + 1} ===")
    print(f"Target sparsity: {target_sparsity*100:.1f}%")
    
    prev_stage_model = f"stage{stage_idx}_trained_model.pt" if stage_idx > 0 else "stage0_trained_model.pt"
    model.load_state_dict(torch.load(prev_stage_model, map_location=device))
    model.to(device)

    print("→ Computing saliency scores...")
    scores = saliency_scores(model, subset_loader, device, CEloss)

    mask, thresh = rank_by_saliency(
        scores=scores,
        current_mask=current_mask,
        current_sparsity=current_sparsity,
        target_sparsity=target_sparsity
    )

    if mask is not None:
        apply_mask(model, mask)
        print(f"→ Applied pruning mask up to {target_sparsity*100:.1f}% sparsity (threshold={thresh}).")

    for m in model.modules():
        if isinstance(m, torch.nn.BatchNorm2d):
            m.reset_running_stats()

    if stage_idx < len(stage_fractions) - 1:
        optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)
        model = train_until_masked(
            model=model,
            loss_fn=CEloss,
            optimizer=optimizer,
            target_acc=target_accuracies[stage_idx],
            train_loader=train10,
            test_loader=test10,
            device=device,
            mask_dict=mask if mask is not None else current_mask,
            epochs=epochs_per_stage
        )

    torch.save(model.state_dict(), f"stage{stage_idx+1}_trained_model.pt")
    if mask is not None:
        torch.save(mask, f"stage{stage_idx+1}_mask.pt")
        current_mask = mask

    current_sparsity = target_sparsity
    print(f"Stage {stage_idx + 1} complete and saved.")

print("\n=== Final Profiling ===")
model.load_state_dict(torch.load(f"stage{len(stage_fractions)}_trained_model.pt", map_location=device))



[Stage 0] Training randomly initialized model to ~20% accuracy
Epoch [1/50] - Loss: 2.3017 | Val Acc: 16.74%
Epoch [2/50] - Loss: 2.2971 | Val Acc: 20.46%
Stopping early at epoch 2 (val acc = 20.46%)
Saved Stage 0 trained model.


=== Pruning Stage 1 ===
Target sparsity: 40.0%
→ Computing saliency scores...


  model.load_state_dict(torch.load(prev_stage_model, map_location=device))


[rank_by_saliency] Pruned 6101431 / 15253578 (target 6101431). Kept 9152147 params. threshold=5.49452e-07
→ Applied pruning mask up to 40.0% sparsity (threshold=5.494516130966076e-07).
Epoch [1/50] - Loss: 1.6394 | Val Acc: 48.50%
Stopping early at epoch 1 (val acc = 48.50%)
Stage 1 complete and saved.

=== Pruning Stage 2 ===
Target sparsity: 60.0%
→ Computing saliency scores...
[rank_by_saliency] Pruned 9152148 / 15253578 (target 9152147). Kept 6101430 params. threshold=0.000128909
→ Applied pruning mask up to 60.0% sparsity (threshold=0.00012890863581560552).
Epoch [1/50] - Loss: 1.1848 | Val Acc: 65.23%
Stopping early at epoch 1 (val acc = 65.23%)
Stage 2 complete and saved.

=== Pruning Stage 3 ===
Target sparsity: 80.0%
→ Computing saliency scores...
[rank_by_saliency] Pruned 12202863 / 15253578 (target 12202862). Kept 3050715 params. threshold=0.000803289
→ Applied pruning mask up to 80.0% sparsity (threshold=0.000803289411123842).
Stage 3 complete and saved.

=== Final Profilin

  model.load_state_dict(torch.load(f"stage{len(stage_fractions)}_trained_model.pt", map_location=device))


<All keys matched successfully>

In [27]:
import os
import torch
from torch import nn, optim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = torch.hub.load("chenyaofo/pytorch-cifar-models", "cifar10_vgg16_bn", pretrained=False)
modelpath = r"C:\Users\Fatim_Sproj\Desktop\Fatim\Spring 2025\aiedge\Pruning\intermediate_models\stage3_trained_model.pt"
model.load_state_dict(torch.load(modelpath, map_location=device))
model.to(device)

try:
    maskpath = r"C:\Users\Fatim_Sproj\Desktop\Fatim\Spring 2025\aiedge\Pruning\intermediate_models\stage3_mask.pt"
    final_mask = torch.load(maskpath, map_location=device)
    from pruning.GraSP import apply_mask
    apply_mask(model, final_mask)
    print("Applied final pruning mask before fine-tuning.")
except FileNotFoundError:
    final_mask = None
    print("No final mask found. Continuing without reapplying mask.")

for m in model.modules():
    if isinstance(m, nn.BatchNorm2d):
        m.reset_running_stats()

CEloss = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

best_ckpt_path = "best_finetuned_model10.pt"

def evaluate(model, loader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            outputs = model(x)
            _, predicted = outputs.max(1)
            total += y.size(0)
            correct += (predicted == y).sum().item()
    acc = 100.0 * correct / total if total > 0 else 0.0
    return acc

def finetune(model, train_loader, test_loader, loss_fn, optimizer, epochs, device, mask=None, best_ckpt_path=best_ckpt_path):
    best_acc = -1.0
    best_epoch = -1

    for epoch in range(1, epochs + 1):
        model.train()
        total_loss = 0.0
        num_batches = 0

        for x, y in train_loader:
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            outputs = model(x)
            loss = loss_fn(outputs, y)
            loss.backward()
            optimizer.step()

            if mask is not None:
                apply_mask(model, mask)

            total_loss += loss.item()
            num_batches += 1

        avg_loss = total_loss / num_batches if num_batches > 0 else 0.0

        test_acc = evaluate(model, test_loader, device)

        print(f"Epoch [{epoch}/{epochs}] - Train Loss: {avg_loss:.4f} - Test Acc: {test_acc:.2f}%")

        if test_acc > best_acc:
            best_acc = test_acc
            best_epoch = epoch

            if mask is not None:
                apply_mask(model, mask)

            ckpt = {
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'best_acc': best_acc,
            }
            torch.save(ckpt, best_ckpt_path)
            print(f"--> New best model saved (epoch {epoch}, acc {best_acc:.2f}%) to: {best_ckpt_path}")

    print(f"Finished fine-tuning. Best epoch: {best_epoch} with Test Acc: {best_acc:.2f}%")
    return best_epoch, best_acc

best_epoch, best_acc = finetune(
    model=model,
    train_loader=train10,
    test_loader=test10,
    loss_fn=CEloss,
    optimizer=optimizer,
    epochs=50,
    device=device,
    mask=final_mask,
    best_ckpt_path=best_ckpt_path
)

final_state_path = "finetuned_model10.pt"
if final_mask is not None:
    apply_mask(model, final_mask)
torch.save(model.state_dict(), final_state_path)
print(f"Final fine-tuned model saved to: {final_state_path}")

Using cache found in C:\Users\Fatim_Sproj/.cache\torch\hub\chenyaofo_pytorch-cifar-models_master
  model.load_state_dict(torch.load(modelpath, map_location=device))
  final_mask = torch.load(maskpath, map_location=device)


Applied final pruning mask before fine-tuning.
Epoch [1/50] - Train Loss: 0.9047 - Test Acc: 72.73%
--> New best model saved (epoch 1, acc 72.73%) to: best_finetuned_model10.pt
Epoch [2/50] - Train Loss: 0.7952 - Test Acc: 73.83%
--> New best model saved (epoch 2, acc 73.83%) to: best_finetuned_model10.pt
Epoch [3/50] - Train Loss: 0.7260 - Test Acc: 76.22%
--> New best model saved (epoch 3, acc 76.22%) to: best_finetuned_model10.pt
Epoch [4/50] - Train Loss: 0.6764 - Test Acc: 78.53%
--> New best model saved (epoch 4, acc 78.53%) to: best_finetuned_model10.pt
Epoch [5/50] - Train Loss: 0.6341 - Test Acc: 78.92%
--> New best model saved (epoch 5, acc 78.92%) to: best_finetuned_model10.pt
Epoch [6/50] - Train Loss: 0.5963 - Test Acc: 80.01%
--> New best model saved (epoch 6, acc 80.01%) to: best_finetuned_model10.pt
Epoch [7/50] - Train Loss: 0.5578 - Test Acc: 80.54%
--> New best model saved (epoch 7, acc 80.54%) to: best_finetuned_model10.pt
Epoch [8/50] - Train Loss: 0.5309 - Test Ac

### cifar100

In [29]:

from torch.utils.data import Subset
import random

random.seed(42)
indices = random.sample(range(len(train100.dataset)), 500)
subset_500 = Subset(train100.dataset, indices)
subset_loader = torch.utils.data.DataLoader(subset_500, batch_size=64, shuffle=True)


In [30]:
def train_until(model, loss_fn, optimizer, target_acc, train_loader, test_loader, device, epochs=50):
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for (x, y) in train_loader:
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            outputs = model(x)
            loss = loss_fn(outputs, y)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        
        model.eval()
        correct, total = 0, 0
        with torch.no_grad():
            for (x_val, y_val) in test_loader:
                x_val, y_val = x_val.to(device), y_val.to(device)
                preds = model(x_val)
                predicted = preds.argmax(dim=1)
                correct += (predicted == y_val).sum().item()
                total += y_val.size(0)
        
        val_acc = 100 * correct / total
        avg_loss = running_loss / len(train_loader)
        print(f"Epoch [{epoch+1}/{epochs}] - Loss: {avg_loss:.4f} | Val Acc: {val_acc:.2f}%")
        if val_acc >= target_acc:
            print(f"Stopping early at epoch {epoch+1} (val acc = {val_acc:.2f}%)")
            break
    return model

def train_until_masked(model, loss_fn, optimizer, target_acc, train_loader, test_loader, device, mask_dict, epochs=50):
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for (x, y) in train_loader:
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            outputs = model(x)
            loss = loss_fn(outputs, y)
            loss.backward()
            optimizer.step()
            with torch.no_grad():
                for name, param in model.named_parameters():
                    if name in mask_dict:
                        param.mul_(mask_dict[name]) 

            running_loss += loss.item()

        model.eval()
        correct, total = 0, 0
        with torch.no_grad():
            for (x_val, y_val) in test_loader:
                x_val, y_val = x_val.to(device), y_val.to(device)
                preds = model(x_val)
                predicted = preds.argmax(dim=1)
                correct += (predicted == y_val).sum().item()
                total += y_val.size(0)
        
        val_acc = 100 * correct / total
        avg_loss = running_loss / len(train_loader)
        print(f"Epoch [{epoch+1}/{epochs}] - Loss: {avg_loss:.4f} | Val Acc: {val_acc:.2f}%")
        if val_acc >= target_acc:
            print(f"Stopping early at epoch {epoch+1} (val acc = {val_acc:.2f}%)")
            break
    return model

In [22]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model100 = model100.to(device)

initial_target_acc = 50
final_target_sparsity = 0.7
stage_fractions = [0.4, 0.65, 1.0] 
target_accuracies = [60, 70]      
epochs_per_stage = 50

CEloss = torch.nn.CrossEntropyLoss()

optimizer_100 = torch.optim.Adam(model100.parameters(), lr=1e-5)

print("\n[Stage 0] Training CIFAR-100 model to ~20% accuracy")
model100 = train_until(
    model=model100,
    loss_fn=CEloss,
    optimizer=optimizer_100,
    target_acc=initial_target_acc,
    train_loader=train100, 
    test_loader=test100,   
    device=device,
    epochs=epochs_per_stage
)
torch.save(model100.state_dict(), "stage0_trained_model100.pt")
print("Saved Stage 0 trained model for CIFAR-100.\n")

current_mask = None
current_sparsity = 0.0

for stage_idx, fraction in enumerate(stage_fractions):
    target_sparsity = fraction * final_target_sparsity
    print(f"\n=== Pruning Stage {stage_idx + 1} ===")
    print(f"Target sparsity: {target_sparsity*100:.1f}%")
    
    prev_stage_model = f"stage{stage_idx}_trained_model100.pt" if stage_idx > 0 else "stage0_trained_model100.pt"
    model100.load_state_dict(torch.load(prev_stage_model, map_location=device))
    model100.to(device)

    print("→ Computing saliency scores...")
    scores = saliency_scores(model100, subset_loader, device, CEloss) 

    mask, thresh = rank_by_saliency(
        scores=scores,
        current_mask=current_mask,
        current_sparsity=current_sparsity,
        target_sparsity=target_sparsity
    )

    if mask is not None:
        apply_mask(model100, mask)
        print(f"→ Applied pruning mask up to {target_sparsity*100:.1f}% sparsity (threshold={thresh}).")

    for m in model100.modules():
        if isinstance(m, torch.nn.BatchNorm2d):
            m.reset_running_stats()

    if stage_idx < len(stage_fractions) - 1:
        optimizer_stage = torch.optim.Adam(model100.parameters(), lr=1e-4)
        model100 = train_until_masked(
            model=model100,
            loss_fn=CEloss,
            optimizer=optimizer_stage, 
            target_acc=target_accuracies[stage_idx],
            train_loader=train100,
            test_loader=test100,  
            device=device,
            mask_dict=mask if mask is not None else current_mask,
            epochs=epochs_per_stage
        )
    torch.save(model100.state_dict(), f"stage{stage_idx+1}_trained_model100.pt")
    if mask is not None:
        torch.save(mask, f"stage{stage_idx+1}_mask100.pt")
        current_mask = mask
    current_sparsity = target_sparsity
    print(f"Stage {stage_idx + 1} complete and saved.")
print("\n=== Final Profiling ===\n")
model100.load_state_dict(torch.load(f"stage{len(stage_fractions)}_trained_model100.pt", map_location=device))



[Stage 0] Training CIFAR-100 model to ~20% accuracy
Epoch [1/50] - Loss: 0.3833 | Val Acc: 60.84%
Stopping early at epoch 1 (val acc = 60.84%)
Saved Stage 0 trained model for CIFAR-100.


=== Pruning Stage 1 ===
Target sparsity: 28.0%
→ Computing saliency scores...


  model100.load_state_dict(torch.load(prev_stage_model, map_location=device))


[rank_by_saliency] Pruned 4283929 / 15299748 (target 4283929). Kept 11015819 params. threshold=2.21005e-05
→ Applied pruning mask up to 28.0% sparsity (threshold=2.2100510250311345e-05).
Epoch [1/50] - Loss: 0.5293 | Val Acc: 57.32%
Epoch [2/50] - Loss: 0.5029 | Val Acc: 56.45%
Epoch [3/50] - Loss: 0.4974 | Val Acc: 58.46%
Epoch [4/50] - Loss: 0.4696 | Val Acc: 57.89%
Epoch [5/50] - Loss: 0.4567 | Val Acc: 58.67%
Epoch [6/50] - Loss: 0.4467 | Val Acc: 57.95%
Epoch [7/50] - Loss: 0.4358 | Val Acc: 58.14%
Epoch [8/50] - Loss: 0.4184 | Val Acc: 57.90%
Epoch [9/50] - Loss: 0.4161 | Val Acc: 58.83%
Epoch [10/50] - Loss: 0.3913 | Val Acc: 58.91%
Epoch [11/50] - Loss: 0.3923 | Val Acc: 58.69%
Epoch [12/50] - Loss: 0.3953 | Val Acc: 58.01%
Epoch [13/50] - Loss: 0.3807 | Val Acc: 58.40%
Epoch [14/50] - Loss: 0.3671 | Val Acc: 58.38%
Epoch [15/50] - Loss: 0.3660 | Val Acc: 58.47%
Epoch [16/50] - Loss: 0.3633 | Val Acc: 58.38%
Epoch [17/50] - Loss: 0.3494 | Val Acc: 59.71%
Epoch [18/50] - Loss: 0

  model100.load_state_dict(torch.load(f"stage{len(stage_fractions)}_trained_model100.pt", map_location=device))


<All keys matched successfully>

In [31]:
import os
import torch
from torch import nn, optim


model = torch.hub.load("chenyaofo/pytorch-cifar-models", "cifar100_vgg16_bn", pretrained=False)
modelpath = r"C:\Users\Fatim_Sproj\Desktop\Fatim\Spring 2025\aiedge\Pruning\intermediate_models\stage3_trained_model100.pt"
model.load_state_dict(torch.load(modelpath, map_location=device))
model.to(device)

try:
    maskpath = r"C:\Users\Fatim_Sproj\Desktop\Fatim\Spring 2025\aiedge\Pruning\intermediate_models\stage3_mask100.pt"
    final_mask = torch.load(maskpath, map_location=device)
    from pruning.GraSP import apply_mask
    apply_mask(model, final_mask)
    print("Applied final pruning mask before fine-tuning.")
except FileNotFoundError:
    final_mask = None
    print("No final mask found. Continuing without reapplying mask.")

for m in model.modules():
    if isinstance(m, nn.BatchNorm2d):
        m.reset_running_stats()

CEloss = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

best_ckpt_path = "best_finetuned_model100.pt"

def evaluate(model, loader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            outputs = model(x)
            _, predicted = outputs.max(1)
            total += y.size(0)
            correct += (predicted == y).sum().item()
    acc = 100.0 * correct / total if total > 0 else 0.0
    return acc

def finetune(model, train_loader, test_loader, loss_fn, optimizer, epochs, device, mask=None, best_ckpt_path=best_ckpt_path):
    best_acc = -1.0
    best_epoch = -1

    for epoch in range(1, epochs + 1):
        model.train()
        total_loss = 0.0
        num_batches = 0

        for x, y in train_loader:
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            outputs = model(x)
            loss = loss_fn(outputs, y)
            loss.backward()
            optimizer.step()

            if mask is not None:
                apply_mask(model, mask)

            total_loss += loss.item()
            num_batches += 1

        avg_loss = total_loss / num_batches if num_batches > 0 else 0.0

        test_acc = evaluate(model, test_loader, device)

        print(f"Epoch [{epoch}/{epochs}] - Train Loss: {avg_loss:.4f} - Test Acc: {test_acc:.2f}%")

        if test_acc > best_acc:
            best_acc = test_acc
            best_epoch = epoch

            if mask is not None:
                apply_mask(model, mask)

            ckpt = {
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'best_acc': best_acc,
            }
            torch.save(ckpt, best_ckpt_path)
            print(f"--> New best model saved (epoch {epoch}, acc {best_acc:.2f}%) to: {best_ckpt_path}")

    print(f"Finished fine-tuning. Best epoch: {best_epoch} with Test Acc: {best_acc:.2f}%")
    return best_epoch, best_acc

best_epoch, best_acc = finetune(
    model=model,
    train_loader=train100,
    test_loader=test100,
    loss_fn=CEloss,
    optimizer=optimizer,
    epochs=80,
    device=device,
    mask=final_mask,
    best_ckpt_path=best_ckpt_path
)

final_state_path = "finetuned_model100.pt"
if final_mask is not None:
    apply_mask(model, final_mask)
torch.save(model.state_dict(), final_state_path)
print(f"Final fine-tuned model saved to: {final_state_path}")

Using cache found in C:\Users\Fatim_Sproj/.cache\torch\hub\chenyaofo_pytorch-cifar-models_master
  model.load_state_dict(torch.load(modelpath, map_location=device))
  final_mask = torch.load(maskpath, map_location=device)


Applied final pruning mask before fine-tuning.
Epoch [1/80] - Train Loss: 2.7483 - Test Acc: 36.06%
--> New best model saved (epoch 1, acc 36.06%) to: best_finetuned_model100.pt
Epoch [2/80] - Train Loss: 2.4600 - Test Acc: 40.27%
--> New best model saved (epoch 2, acc 40.27%) to: best_finetuned_model100.pt
Epoch [3/80] - Train Loss: 2.3003 - Test Acc: 41.16%
--> New best model saved (epoch 3, acc 41.16%) to: best_finetuned_model100.pt
Epoch [4/80] - Train Loss: 2.1902 - Test Acc: 43.07%
--> New best model saved (epoch 4, acc 43.07%) to: best_finetuned_model100.pt
Epoch [5/80] - Train Loss: 2.1164 - Test Acc: 43.83%
--> New best model saved (epoch 5, acc 43.83%) to: best_finetuned_model100.pt
Epoch [6/80] - Train Loss: 2.0095 - Test Acc: 44.61%
--> New best model saved (epoch 6, acc 44.61%) to: best_finetuned_model100.pt
Epoch [7/80] - Train Loss: 1.9513 - Test Acc: 42.79%
Epoch [8/80] - Train Loss: 1.8848 - Test Acc: 47.32%
--> New best model saved (epoch 8, acc 47.32%) to: best_finet