# Imports ⤵️

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Subset, random_split
from torchvision import models
from torchvision.models import mobilenet_v2
from tqdm import tqdm
from torchsummary import summary
import time
import os

# Loading the dataset (CIFAR-10) 📊

In [2]:
# Fix the seed to ensure reproducibility
torch.manual_seed(42)

# Data augmentation for training (applied only to the train dataset)
train_transform = transforms.Compose([
    transforms.Resize((224, 224)), # Resize to MobileNet input size
    transforms.RandomHorizontalFlip(), # Flip horizontally
    transforms.RandomRotation(15), # Random rotation 15 degress
    transforms.ToTensor(), # Convert to tensor
    transforms.Normalize((0.5,), (0.5,)) # Normalize (mean=0.5, std=0.5)
])

# No augmentation for validation/test (only resizing and normalization)
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])
# Download CIFAR-10 dataset
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=train_transform)
test_data = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=test_transform)

# Split the test_data into test (6k) and dev (4k)
test_size = 6000
dev_size = 4000
test_dataset, dev_dataset = random_split(test_data, [test_size, dev_size])

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)
dev_loader = DataLoader(dev_dataset, batch_size=64, shuffle=False, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=2)

# Print dataset sizes
print(f"Train set size: {len(train_dataset)}")
print(f"Dev set size: {len(dev_dataset)}")
print(f"Test set size: {len(test_dataset)}")

100%|██████████| 170M/170M [00:55<00:00, 3.06MB/s] 


Train set size: 50000
Dev set size: 4000
Test set size: 6000


# Defining the model architecture 📐👷🏻‍♀️ and loading the pre-trained weights 🧠

In [16]:
# Load the pre-trained MobileNet model
base_model = models.mobilenet_v2(weights=models.MobileNet_V2_Weights.IMAGENET1K_V1)

# Unfreeze some of the top layers
for param in base_model.features[:-10].parameters():
    param.requires_grad = False

# Modify the classifier for CIFAR-10 (10 classes)
class MobileNetV2(nn.Module):
    def __init__(self, base_model):
        super(MobileNetV2, self).__init__()
        self.features = base_model.features
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.classifier = nn.Sequential(
            nn.Linear(1280, 256),
            nn.ReLU(),
            nn.BatchNorm1d(256),
            nn.Dropout(0.5),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.BatchNorm1d(128),
            nn.Dropout(0.5),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.BatchNorm1d(64),
            nn.Dropout(0.5),
            nn.Linear(64, 10)  # CIFAR-10 has 10 classes
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

model = MobileNetV2(base_model)

In [17]:
model_weights_path = '../pytorch_models/mobilenet_cifar10.pth'
# load the full model
model.load_state_dict(torch.load(model_weights_path))
# Set the model to evaluation mode
model.eval()
print("Model weights loaded successfully!")

Model weights loaded successfully!


## Setting device and model summary

In [19]:
# Ensure the model is on the correct device (cuda or cpu)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

MobileNetV2(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=

In [20]:
summary(model, (3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 112, 112]             864
       BatchNorm2d-2         [-1, 32, 112, 112]              64
             ReLU6-3         [-1, 32, 112, 112]               0
            Conv2d-4         [-1, 32, 112, 112]             288
       BatchNorm2d-5         [-1, 32, 112, 112]              64
             ReLU6-6         [-1, 32, 112, 112]               0
            Conv2d-7         [-1, 16, 112, 112]             512
       BatchNorm2d-8         [-1, 16, 112, 112]              32
  InvertedResidual-9         [-1, 16, 112, 112]               0
           Conv2d-10         [-1, 96, 112, 112]           1,536
      BatchNorm2d-11         [-1, 96, 112, 112]             192
            ReLU6-12         [-1, 96, 112, 112]               0
           Conv2d-13           [-1, 96, 56, 56]             864
      BatchNorm2d-14           [-1, 96,

# Required functions for training and evaluation

In [6]:
# Training loop
def train_model(model, train_loader, dev_loader, criterion, optimizer, num_epochs=10, mask_enforcer=None):
    for epoch in range(num_epochs):
        ### Training Phase ###
        model.train()  # Set model to training mode
        running_loss = 0.0
        correct, total = 0, 0

        loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", leave=True)
        for images, labels in loop:
            images, labels = images.to(device), labels.to(device)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Enforce pruning mask if provided
            if mask_enforcer:
                mask_enforcer.enforce()

            # Compute training metrics
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

            # Update progress bar
            loop.set_postfix(train_loss=running_loss / total, train_acc=100. * correct / total)

        train_loss = running_loss / len(train_loader)
        train_acc = 100. * correct / total

        ### Validation (Dev) Phase ###
        model.eval()  # Set model to evaluation mode
        dev_loss, dev_correct, dev_total = 0.0, 0, 0

        with torch.no_grad():
            for images, labels in dev_loader:
                images, labels = images.to(device), labels.to(device)

                outputs = model(images)
                loss = criterion(outputs, labels)

                dev_loss += loss.item()
                _, predicted = outputs.max(1)
                dev_total += labels.size(0)
                dev_correct += predicted.eq(labels).sum().item()

        dev_loss /= len(dev_loader)
        dev_acc = 100. * dev_correct / dev_total

        print(f"Epoch {epoch + 1}: Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, Dev Loss: {dev_loss:.4f}, Dev Acc: {dev_acc:.2f}%")

    print("Training complete!")

In [7]:
def evaluate_model(model, test_loader, criterion, device):
    model.eval()  # Set the model to evaluation mode
    test_loss = 0.0
    correct = 0
    total = 0
    start_time = time.time()
    with torch.no_grad():  # No gradients for validation (Disable gradient calculations for efficiency)
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Update loss
            test_loss += loss.item()

            # Get predictions
            _, predicted = outputs.max(1)

            # Update metrics
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    end_time = time.time()
    inference_time = end_time - start_time
    avg_loss = test_loss / len(test_loader)
    accuracy = 100. * correct / total

    #print(f"Test Loss: {avg_loss:.4f}, Test Accuracy: {accuracy:.2f}%")
    return avg_loss, accuracy, inference_time

# Model Evaluation (before pruning)

In [8]:
criterion = nn.CrossEntropyLoss()
avgLoss, acc, inf_time = evaluate_model(model, test_loader, criterion, device)
# Print evaluation results
print(f"Average test Loss: {avgLoss:.4f}, Test Accuracy: {acc:.2f}%, Inference Time for {len(test_dataset)} images: {inf_time:.2f} seconds")

Average test Loss: 0.2781, Test Accuracy: 91.35%, Inference Time for 6000 images: 10.53 seconds


# Applying pruning
following this link: https://nvidia.github.io/TensorRT-Model-Optimizer/guides/2_pruning.html#pruning-fine-tuning

In [None]:
import modelopt.torch.prune as mtp
# Checking available pruning methods
print(dir(mtp))

['__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', 'config', 'fastnas', 'gradnas', 'mcore_gpt_minitron', 'mode', 'modelopt', 'plugins', 'prune', 'pruning']


How does FastNAS work?

- Step 1: Randomly generates multiple smaller sub-models from the original model.
- Step 2: Measures the FLOPs of each sub-model.
- Step 3: Evaluates each sub-model using the score_func (accuracy).
- Step 4: Selects the best-performing pruned model that meets the FLOPs constraint.
- Step 5: Outputs the pruned model (pruned_model) and results (prune_res).

In [8]:
import modelopt.torch.prune as mtp
import modelopt.torch.opt as mto

# Wrap your original validation function to only take the model as input.
# This function acts as the score function to rank models.
def score_func(model):
    accuracy, _ , _ = evaluate_model(model, test_loader, criterion, device)
    return accuracy

# Define a dummy input with similar shape as that of your input data
dummy_input = torch.randn(1, 3, 224, 244).to(device)

# Prune the model to at most 50% of the original FLOPs
prune_constraints = {"flops": "50%"}

pruned_model, prune_res = mtp.prune(
    model=model,
    mode="fastnas",
    constraints=prune_constraints,
    dummy_input=dummy_input,
    config={
        "data_loader": train_loader,  # training data is used for calibrating BN layers
        "score_func": score_func,  # validation score is used to rank the subnets
        # checkpoint to store the search state and resume or re-run the search with different constraint
        "checkpoint": "modelopt_fastnas_search_checkpoint.pth",
    },
)

# Save the pruned model.
mto.save(pruned_model, "modelopt_pruned_model.pth")

  from .autonotebook import tqdm as notebook_tqdm
2025-03-05 22:33:26.961274: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1741206807.067552   15919 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1741206807.099374   15919 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-05 22:33:27.355086: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.



Profiling the following subnets from the given model: ('min', 'centroid', 'max').
--------------------------------------------------------------------------------


[3m                                                                             [0m
[3m                              Profiling Results                              [0m
┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃[1m [0m[1mConstraint  [0m[1m [0m┃[1m [0m[1mmin         [0m[1m [0m┃[1m [0m[1mcentroid    [0m[1m [0m┃[1m [0m[1mmax         [0m[1m [0m┃[1m [0m[1mmax/min ratio[0m[1m [0m┃
┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ flops        │ 55.46M       │ 156.84M      │ 336.89M      │ 6.07          │
│ params       │ 455.21K      │ 1.12M        │ 2.56M        │ 5.62          │
└──────────────┴──────────────┴──────────────┴──────────────┴───────────────┘
[3m                                              [0m
[3m            Constraints Evaluation            [0m
┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┓
┃[1m              [0m┃[1m              [0m┃[1m [0m[1mSatisfiable [0m[1m [

Collecting pre-search statistics: 100%|██████████| 213/213 [06:56<00:00,  1.96s/it, cur=features.17.conv.2.out_channels(320/320): 0.00]  
[num_satisfied] = 1:   0%|          | 1/5000 [00:05<7:05:12,  5.10s/it]


[best_subnet_constraints] = {'params': '455.21K', 'flops': '55.46M'}


In [9]:
import modelopt.torch.prune as mtp
import modelopt.torch.opt as mto

  from .autonotebook import tqdm as notebook_tqdm
2025-04-16 10:16:30.335146: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1744791390.450741     930 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1744791390.483893     930 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-04-16 10:16:30.764288: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [21]:
# Load the pruned weights
pruned_model_path = "../pytorch_models/modelopt_pruned_model.pth"
# get size of pruned model
pruned_model_size = os.path.getsize(pruned_model_path) / (1024 * 1024)
print(f"Size of pruned model: {pruned_model_size:.2f} MB")


Size of pruned model: 1.93 MB


In [22]:
base_pruned = models.mobilenet_v2(weights=None)
model = MobileNetV2(base_pruned)
# Restore the pruned architecture and weights
pruned_model = mto.restore(model, "../pytorch_models/modelopt_pruned_model.pth")

In [23]:
pruned_model.to(device)

MobileNetV2(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(32, eps=

In [24]:
summary(pruned_model, (3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 112, 112]             864
       BatchNorm2d-2         [-1, 32, 112, 112]              64
             ReLU6-3         [-1, 32, 112, 112]               0
            Conv2d-4         [-1, 32, 112, 112]             288
       BatchNorm2d-5         [-1, 32, 112, 112]              64
             ReLU6-6         [-1, 32, 112, 112]               0
            Conv2d-7         [-1, 16, 112, 112]             512
       BatchNorm2d-8         [-1, 16, 112, 112]              32
  InvertedResidual-9         [-1, 16, 112, 112]               0
           Conv2d-10         [-1, 32, 112, 112]             512
      BatchNorm2d-11         [-1, 32, 112, 112]              64
            ReLU6-12         [-1, 32, 112, 112]               0
           Conv2d-13           [-1, 32, 56, 56]             288
      BatchNorm2d-14           [-1, 32,

# Evaluate Model after pruning

In [25]:
criterion = nn.CrossEntropyLoss()
# Evaluate pruned model
avgLoss, acc, inf_time = evaluate_model(pruned_model, test_loader, criterion, device)
# Print evaluation results
print(f"Average test Loss: {avgLoss:.4f}, Test Accuracy: {acc:.2f}%, Inference Time for {len(test_dataset)} images: {inf_time:.2f} seconds")

Average test Loss: 6.4287, Test Accuracy: 9.73%, Inference Time for 6000 images: 5.12 seconds


# Fine tune the pruned model

In [26]:
# fine tine the pruned model
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
train_model(pruned_model, train_loader, dev_loader, criterion, optimizer, num_epochs=50)

Epoch 1/50: 100%|██████████| 782/782 [01:49<00:00,  7.15it/s, train_acc=21.9, train_loss=0.0552]


Epoch 1: Train Loss: 3.5282, Train Acc: 21.93%, Dev Loss: 2.1358, Dev Acc: 29.98%


Epoch 2/50: 100%|██████████| 782/782 [01:49<00:00,  7.14it/s, train_acc=29.7, train_loss=0.0339]


Epoch 2: Train Loss: 2.1706, Train Acc: 29.68%, Dev Loss: 1.6870, Dev Acc: 38.27%


Epoch 3/50: 100%|██████████| 782/782 [01:50<00:00,  7.06it/s, train_acc=35.3, train_loss=0.0287]


Epoch 3: Train Loss: 1.8349, Train Acc: 35.26%, Dev Loss: 1.5453, Dev Acc: 42.77%


Epoch 4/50: 100%|██████████| 782/782 [01:51<00:00,  7.01it/s, train_acc=39.4, train_loss=0.0262]


Epoch 4: Train Loss: 1.6746, Train Acc: 39.44%, Dev Loss: 1.4498, Dev Acc: 45.95%


Epoch 5/50: 100%|██████████| 782/782 [01:51<00:00,  6.99it/s, train_acc=43, train_loss=0.0244]  


Epoch 5: Train Loss: 1.5612, Train Acc: 42.99%, Dev Loss: 1.3786, Dev Acc: 48.27%


Epoch 6/50: 100%|██████████| 782/782 [01:49<00:00,  7.13it/s, train_acc=47.1, train_loss=0.0229]


Epoch 6: Train Loss: 1.4665, Train Acc: 47.08%, Dev Loss: 1.2868, Dev Acc: 52.83%


Epoch 7/50: 100%|██████████| 782/782 [01:50<00:00,  7.11it/s, train_acc=50.2, train_loss=0.0218]


Epoch 7: Train Loss: 1.3941, Train Acc: 50.16%, Dev Loss: 1.2250, Dev Acc: 55.55%


Epoch 8/50: 100%|██████████| 782/782 [01:50<00:00,  7.07it/s, train_acc=53, train_loss=0.0207]  


Epoch 8: Train Loss: 1.3260, Train Acc: 53.01%, Dev Loss: 1.1593, Dev Acc: 57.62%


Epoch 9/50: 100%|██████████| 782/782 [01:49<00:00,  7.11it/s, train_acc=55.4, train_loss=0.0199]


Epoch 9: Train Loss: 1.2718, Train Acc: 55.43%, Dev Loss: 1.1149, Dev Acc: 59.20%


Epoch 10/50: 100%|██████████| 782/782 [01:49<00:00,  7.17it/s, train_acc=57.4, train_loss=0.0191]


Epoch 10: Train Loss: 1.2212, Train Acc: 57.43%, Dev Loss: 1.0721, Dev Acc: 60.77%


Epoch 11/50: 100%|██████████| 782/782 [01:57<00:00,  6.68it/s, train_acc=59.4, train_loss=0.0183]


Epoch 11: Train Loss: 1.1703, Train Acc: 59.39%, Dev Loss: 1.0335, Dev Acc: 62.30%


Epoch 12/50: 100%|██████████| 782/782 [01:52<00:00,  6.95it/s, train_acc=61, train_loss=0.0176]  


Epoch 12: Train Loss: 1.1268, Train Acc: 60.97%, Dev Loss: 0.9821, Dev Acc: 64.75%


Epoch 13/50: 100%|██████████| 782/782 [01:53<00:00,  6.91it/s, train_acc=62.4, train_loss=0.0171]


Epoch 13: Train Loss: 1.0903, Train Acc: 62.44%, Dev Loss: 0.9569, Dev Acc: 64.85%


Epoch 14/50: 100%|██████████| 782/782 [01:52<00:00,  6.93it/s, train_acc=63.5, train_loss=0.0166]


Epoch 14: Train Loss: 1.0634, Train Acc: 63.48%, Dev Loss: 0.9410, Dev Acc: 65.88%


Epoch 15/50: 100%|██████████| 782/782 [01:47<00:00,  7.29it/s, train_acc=64.3, train_loss=0.0163]


Epoch 15: Train Loss: 1.0397, Train Acc: 64.34%, Dev Loss: 0.9124, Dev Acc: 67.35%


Epoch 16/50: 100%|██████████| 782/782 [01:46<00:00,  7.31it/s, train_acc=65.3, train_loss=0.0158]


Epoch 16: Train Loss: 1.0095, Train Acc: 65.26%, Dev Loss: 0.8902, Dev Acc: 67.90%


Epoch 17/50: 100%|██████████| 782/782 [01:47<00:00,  7.29it/s, train_acc=66.1, train_loss=0.0154]


Epoch 17: Train Loss: 0.9848, Train Acc: 66.09%, Dev Loss: 0.8818, Dev Acc: 68.28%


Epoch 18/50: 100%|██████████| 782/782 [01:47<00:00,  7.29it/s, train_acc=66.7, train_loss=0.0151]


Epoch 18: Train Loss: 0.9669, Train Acc: 66.67%, Dev Loss: 0.8589, Dev Acc: 68.85%


Epoch 19/50: 100%|██████████| 782/782 [01:47<00:00,  7.28it/s, train_acc=67.3, train_loss=0.015] 


Epoch 19: Train Loss: 0.9569, Train Acc: 67.34%, Dev Loss: 0.8371, Dev Acc: 70.72%


Epoch 20/50: 100%|██████████| 782/782 [01:47<00:00,  7.29it/s, train_acc=68, train_loss=0.0146]  


Epoch 20: Train Loss: 0.9344, Train Acc: 68.01%, Dev Loss: 0.8139, Dev Acc: 71.30%


Epoch 21/50: 100%|██████████| 782/782 [01:47<00:00,  7.30it/s, train_acc=68.9, train_loss=0.0143]


Epoch 21: Train Loss: 0.9123, Train Acc: 68.91%, Dev Loss: 0.8174, Dev Acc: 71.20%


Epoch 22/50: 100%|██████████| 782/782 [01:47<00:00,  7.29it/s, train_acc=69.4, train_loss=0.014] 


Epoch 22: Train Loss: 0.8977, Train Acc: 69.36%, Dev Loss: 0.8006, Dev Acc: 71.97%


Epoch 23/50: 100%|██████████| 782/782 [01:54<00:00,  6.85it/s, train_acc=69.9, train_loss=0.0138]


Epoch 23: Train Loss: 0.8802, Train Acc: 69.95%, Dev Loss: 0.7771, Dev Acc: 72.85%


Epoch 24/50: 100%|██████████| 782/782 [01:47<00:00,  7.25it/s, train_acc=70.2, train_loss=0.0137]


Epoch 24: Train Loss: 0.8738, Train Acc: 70.15%, Dev Loss: 0.7688, Dev Acc: 72.60%


Epoch 25/50: 100%|██████████| 782/782 [01:46<00:00,  7.31it/s, train_acc=71.3, train_loss=0.0133]


Epoch 25: Train Loss: 0.8500, Train Acc: 71.30%, Dev Loss: 0.7519, Dev Acc: 72.90%


Epoch 26/50: 100%|██████████| 782/782 [01:47<00:00,  7.28it/s, train_acc=71.5, train_loss=0.0132]


Epoch 26: Train Loss: 0.8431, Train Acc: 71.49%, Dev Loss: 0.7315, Dev Acc: 74.28%


Epoch 27/50: 100%|██████████| 782/782 [01:47<00:00,  7.27it/s, train_acc=72, train_loss=0.013]   


Epoch 27: Train Loss: 0.8292, Train Acc: 71.96%, Dev Loss: 0.7338, Dev Acc: 74.42%


Epoch 28/50: 100%|██████████| 782/782 [01:47<00:00,  7.27it/s, train_acc=72.4, train_loss=0.0128]


Epoch 28: Train Loss: 0.8182, Train Acc: 72.36%, Dev Loss: 0.7349, Dev Acc: 74.25%


Epoch 29/50: 100%|██████████| 782/782 [01:47<00:00,  7.26it/s, train_acc=73, train_loss=0.0126]  


Epoch 29: Train Loss: 0.8063, Train Acc: 72.97%, Dev Loss: 0.7145, Dev Acc: 75.00%


Epoch 30/50: 100%|██████████| 782/782 [01:47<00:00,  7.28it/s, train_acc=73.4, train_loss=0.0124]


Epoch 30: Train Loss: 0.7940, Train Acc: 73.38%, Dev Loss: 0.6887, Dev Acc: 75.97%


Epoch 31/50: 100%|██████████| 782/782 [01:48<00:00,  7.23it/s, train_acc=73.7, train_loss=0.0123]


Epoch 31: Train Loss: 0.7861, Train Acc: 73.67%, Dev Loss: 0.7022, Dev Acc: 75.25%


Epoch 32/50: 100%|██████████| 782/782 [01:47<00:00,  7.25it/s, train_acc=73.8, train_loss=0.0122]


Epoch 32: Train Loss: 0.7779, Train Acc: 73.80%, Dev Loss: 0.6938, Dev Acc: 75.30%


Epoch 33/50: 100%|██████████| 782/782 [01:47<00:00,  7.26it/s, train_acc=74.1, train_loss=0.0121]


Epoch 33: Train Loss: 0.7711, Train Acc: 74.14%, Dev Loss: 0.6772, Dev Acc: 76.00%


Epoch 34/50: 100%|██████████| 782/782 [01:47<00:00,  7.26it/s, train_acc=74.5, train_loss=0.0119]


Epoch 34: Train Loss: 0.7605, Train Acc: 74.50%, Dev Loss: 0.6629, Dev Acc: 76.20%


Epoch 35/50: 100%|██████████| 782/782 [01:47<00:00,  7.25it/s, train_acc=74.6, train_loss=0.0117]


Epoch 35: Train Loss: 0.7497, Train Acc: 74.64%, Dev Loss: 0.6607, Dev Acc: 76.95%


Epoch 36/50: 100%|██████████| 782/782 [01:48<00:00,  7.24it/s, train_acc=75.4, train_loss=0.0116]


Epoch 36: Train Loss: 0.7414, Train Acc: 75.43%, Dev Loss: 0.6562, Dev Acc: 76.42%


Epoch 37/50: 100%|██████████| 782/782 [01:47<00:00,  7.24it/s, train_acc=75.4, train_loss=0.0116]


Epoch 37: Train Loss: 0.7402, Train Acc: 75.45%, Dev Loss: 0.6506, Dev Acc: 76.90%


Epoch 38/50: 100%|██████████| 782/782 [01:47<00:00,  7.27it/s, train_acc=75.9, train_loss=0.0114]


Epoch 38: Train Loss: 0.7277, Train Acc: 75.92%, Dev Loss: 0.6392, Dev Acc: 77.20%


Epoch 39/50: 100%|██████████| 782/782 [01:48<00:00,  7.23it/s, train_acc=76, train_loss=0.0113]  


Epoch 39: Train Loss: 0.7218, Train Acc: 76.01%, Dev Loss: 0.6336, Dev Acc: 77.38%


Epoch 40/50: 100%|██████████| 782/782 [01:48<00:00,  7.24it/s, train_acc=76.2, train_loss=0.0112]


Epoch 40: Train Loss: 0.7161, Train Acc: 76.21%, Dev Loss: 0.6342, Dev Acc: 77.85%


Epoch 41/50: 100%|██████████| 782/782 [01:48<00:00,  7.24it/s, train_acc=76.4, train_loss=0.011] 


Epoch 41: Train Loss: 0.7047, Train Acc: 76.37%, Dev Loss: 0.6255, Dev Acc: 78.38%


Epoch 42/50: 100%|██████████| 782/782 [01:47<00:00,  7.25it/s, train_acc=76.6, train_loss=0.011] 


Epoch 42: Train Loss: 0.7024, Train Acc: 76.61%, Dev Loss: 0.6272, Dev Acc: 78.00%


Epoch 43/50: 100%|██████████| 782/782 [01:47<00:00,  7.26it/s, train_acc=76.9, train_loss=0.0109]


Epoch 43: Train Loss: 0.6946, Train Acc: 76.94%, Dev Loss: 0.6170, Dev Acc: 78.47%


Epoch 44/50: 100%|██████████| 782/782 [01:47<00:00,  7.26it/s, train_acc=77.5, train_loss=0.0106]


Epoch 44: Train Loss: 0.6801, Train Acc: 77.50%, Dev Loss: 0.6038, Dev Acc: 78.40%


Epoch 45/50: 100%|██████████| 782/782 [01:48<00:00,  7.22it/s, train_acc=77.5, train_loss=0.0107]


Epoch 45: Train Loss: 0.6832, Train Acc: 77.52%, Dev Loss: 0.6043, Dev Acc: 79.03%


Epoch 46/50: 100%|██████████| 782/782 [01:48<00:00,  7.23it/s, train_acc=77.8, train_loss=0.0106]


Epoch 46: Train Loss: 0.6759, Train Acc: 77.77%, Dev Loss: 0.6078, Dev Acc: 78.90%


Epoch 47/50: 100%|██████████| 782/782 [01:47<00:00,  7.26it/s, train_acc=78, train_loss=0.0105]  


Epoch 47: Train Loss: 0.6682, Train Acc: 78.01%, Dev Loss: 0.5925, Dev Acc: 79.25%


Epoch 48/50: 100%|██████████| 782/782 [01:47<00:00,  7.25it/s, train_acc=78.5, train_loss=0.0103]


Epoch 48: Train Loss: 0.6577, Train Acc: 78.46%, Dev Loss: 0.5933, Dev Acc: 79.30%


Epoch 49/50: 100%|██████████| 782/782 [01:48<00:00,  7.19it/s, train_acc=78.3, train_loss=0.0103]


Epoch 49: Train Loss: 0.6570, Train Acc: 78.31%, Dev Loss: 0.5895, Dev Acc: 79.42%


Epoch 50/50: 100%|██████████| 782/782 [01:48<00:00,  7.23it/s, train_acc=78.7, train_loss=0.0102]


Epoch 50: Train Loss: 0.6499, Train Acc: 78.66%, Dev Loss: 0.5765, Dev Acc: 79.97%
Training complete!


In [30]:
# fine tine the pruned model for more epochs
optimizer = torch.optim.Adam(pruned_model.parameters(), lr=1e-3)
train_model(pruned_model, train_loader, dev_loader, criterion, optimizer, num_epochs=10)

Epoch 1/10: 100%|██████████| 782/782 [01:46<00:00,  7.32it/s, train_acc=72.1, train_loss=0.0132]


Epoch 1: Train Loss: 0.8446, Train Acc: 72.05%, Dev Loss: 0.7944, Dev Acc: 72.33%


Epoch 2/10: 100%|██████████| 782/782 [01:46<00:00,  7.33it/s, train_acc=73.8, train_loss=0.0125]


Epoch 2: Train Loss: 0.8009, Train Acc: 73.78%, Dev Loss: 0.7054, Dev Acc: 75.58%


Epoch 3/10: 100%|██████████| 782/782 [01:46<00:00,  7.31it/s, train_acc=75.3, train_loss=0.0119]


Epoch 3: Train Loss: 0.7608, Train Acc: 75.28%, Dev Loss: 0.7007, Dev Acc: 76.42%


Epoch 4/10: 100%|██████████| 782/782 [01:47<00:00,  7.28it/s, train_acc=76, train_loss=0.0115]  


Epoch 4: Train Loss: 0.7373, Train Acc: 75.97%, Dev Loss: 0.6505, Dev Acc: 78.03%


Epoch 5/10: 100%|██████████| 782/782 [01:47<00:00,  7.26it/s, train_acc=76.9, train_loss=0.0111]


Epoch 5: Train Loss: 0.7114, Train Acc: 76.94%, Dev Loss: 0.6188, Dev Acc: 79.80%


Epoch 6/10: 100%|██████████| 782/782 [01:47<00:00,  7.29it/s, train_acc=77.6, train_loss=0.0108]


Epoch 6: Train Loss: 0.6911, Train Acc: 77.59%, Dev Loss: 0.6204, Dev Acc: 80.15%


Epoch 7/10: 100%|██████████| 782/782 [01:48<00:00,  7.21it/s, train_acc=78.3, train_loss=0.0105]


Epoch 7: Train Loss: 0.6689, Train Acc: 78.35%, Dev Loss: 0.6748, Dev Acc: 77.12%


Epoch 8/10: 100%|██████████| 782/782 [01:47<00:00,  7.28it/s, train_acc=79, train_loss=0.0103]  


Epoch 8: Train Loss: 0.6576, Train Acc: 78.98%, Dev Loss: 0.6213, Dev Acc: 79.70%


Epoch 9/10: 100%|██████████| 782/782 [01:47<00:00,  7.25it/s, train_acc=78.3, train_loss=0.0105]


Epoch 9: Train Loss: 0.6731, Train Acc: 78.35%, Dev Loss: 0.5518, Dev Acc: 81.92%


Epoch 10/10: 100%|██████████| 782/782 [01:47<00:00,  7.25it/s, train_acc=79.8, train_loss=0.00979]


Epoch 10: Train Loss: 0.6257, Train Acc: 79.78%, Dev Loss: 0.5733, Dev Acc: 81.17%
Training complete!


In [32]:
# fine tine the pruned model for more epochs
optimizer = torch.optim.Adam(pruned_model.parameters(), lr=1e-3)
train_model(pruned_model, train_loader, dev_loader, criterion, optimizer, num_epochs=10)

Epoch 1/10: 100%|██████████| 782/782 [01:45<00:00,  7.38it/s, train_acc=80.2, train_loss=0.00969]


Epoch 1: Train Loss: 0.6199, Train Acc: 80.22%, Dev Loss: 0.5908, Dev Acc: 79.92%


Epoch 2/10: 100%|██████████| 782/782 [01:46<00:00,  7.34it/s, train_acc=80.5, train_loss=0.00951]


Epoch 2: Train Loss: 0.6080, Train Acc: 80.48%, Dev Loss: 0.5432, Dev Acc: 81.70%


Epoch 3/10: 100%|██████████| 782/782 [01:46<00:00,  7.32it/s, train_acc=80.9, train_loss=0.00933]


Epoch 3: Train Loss: 0.5967, Train Acc: 80.93%, Dev Loss: 0.5004, Dev Acc: 83.62%


Epoch 4/10: 100%|██████████| 782/782 [01:46<00:00,  7.32it/s, train_acc=81.3, train_loss=0.00917]


Epoch 4: Train Loss: 0.5863, Train Acc: 81.30%, Dev Loss: 0.5523, Dev Acc: 81.25%


Epoch 5/10: 100%|██████████| 782/782 [01:46<00:00,  7.31it/s, train_acc=81.6, train_loss=0.00896]


Epoch 5: Train Loss: 0.5731, Train Acc: 81.58%, Dev Loss: 0.5116, Dev Acc: 82.60%


Epoch 6/10: 100%|██████████| 782/782 [01:47<00:00,  7.30it/s, train_acc=81.9, train_loss=0.00884]


Epoch 6: Train Loss: 0.5653, Train Acc: 81.87%, Dev Loss: 0.4841, Dev Acc: 83.88%


Epoch 7/10: 100%|██████████| 782/782 [01:47<00:00,  7.27it/s, train_acc=82.3, train_loss=0.00863]


Epoch 7: Train Loss: 0.5516, Train Acc: 82.32%, Dev Loss: 0.4782, Dev Acc: 84.15%


Epoch 8/10: 100%|██████████| 782/782 [01:47<00:00,  7.27it/s, train_acc=82.8, train_loss=0.00851]


Epoch 8: Train Loss: 0.5439, Train Acc: 82.80%, Dev Loss: 0.4675, Dev Acc: 84.42%


Epoch 9/10: 100%|██████████| 782/782 [01:48<00:00,  7.19it/s, train_acc=83, train_loss=0.00841]  


Epoch 9: Train Loss: 0.5378, Train Acc: 82.98%, Dev Loss: 0.4800, Dev Acc: 83.53%


Epoch 10/10: 100%|██████████| 782/782 [01:48<00:00,  7.23it/s, train_acc=83.1, train_loss=0.0083] 


Epoch 10: Train Loss: 0.5305, Train Acc: 83.13%, Dev Loss: 0.5113, Dev Acc: 82.97%
Training complete!


# Evaluate after fine Tuning

In [33]:
criterion = nn.CrossEntropyLoss()
# Evaluate pruned model
avgLoss, acc, inf_time = evaluate_model(pruned_model, test_loader, criterion, device)
# Print evaluation results
print(f"Average test Loss: {avgLoss:.4f}, Test Accuracy: {acc:.2f}%, Inference Time for {len(test_dataset)} images: {inf_time:.2f} seconds")

Average test Loss: 0.5165, Test Accuracy: 82.98%, Inference Time for 6000 images: 5.19 seconds


In [36]:
model_weights_path = "../pytorch_models/mobilenet_cifar10.pth"
pruned_weights_path = "../pytorch_models/mobileNet_pruned_finetuned_weights.pth"
# Compare model sizes
original_model_size = os.path.getsize(model_weights_path) / (1024 * 1024)
print(f"Size of original model: {original_model_size:.2f} MB")
pruned_model_size = os.path.getsize(pruned_weights_path) / (1024 * 1024)
print(f"Size of pruned model: {pruned_model_size:.2f} MB")
# Calculate the size reduction
size_reduction = original_model_size - pruned_model_size
print(f"Size reduction: {size_reduction:.2f} MB")
# Calculate the percentage reduction
percentage_reduction = (size_reduction / original_model_size) * 100
print(f"Percentage reduction: {percentage_reduction:.2f}%")

Size of original model: 10.14 MB
Size of pruned model: 1.91 MB
Size reduction: 8.24 MB
Percentage reduction: 81.19%


# Save the model

In [34]:
model_save_path = "./mobileNet_pruned_finetuned_weights.pth"
# Save model weights (recommended approach)
torch.save(pruned_model.state_dict(), model_save_path)
print(f"Model weights saved successfully at: {model_save_path}")

Model weights saved successfully at: ./mobileNet_pruned_finetuned_weights.pth


In [35]:
full_model_save_path = "./mobileNet_pruned_finetuned.pth"
# Save the entire model (optional if you want to keep the architecture too)
torch.save(pruned_model, full_model_save_path)
print(f"Full model saved successfully at: {full_model_save_path}")

Full model saved successfully at: ./mobileNet_pruned_finetuned.pth
