# 🔧 02 - Quantize MobileNetV2 with PyTorch

This notebook applies **post-training static quantization** to a MobileNetV2 model trained on CIFAR-10.  
It compares accuracy and model size before and after quantization.


In [24]:
import torch
import torch.nn as nn
import torch.quantization
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
import os


In [25]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [26]:
device = torch.device("cpu")  # Quantization only works on CPU

# Set quantization engine for Windows CPU
import torch.backends.quantized
print("Supported quantization engines:", torch.backends.quantized.supported_engines)

torch.backends.quantized.engine = 'fbgemm'  # fbgemm is supported on Windows CPU

print(f"Using device: {device} | Quantization engine: {torch.backends.quantized.engine}")


Supported quantization engines: ['qnnpack', 'none', 'onednn', 'x86', 'fbgemm']
Using device: cpu | Quantization engine: fbgemm


In [27]:
transform = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

testset = torchvision.datasets.CIFAR10(root='../data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=32, shuffle=False)


In [28]:
model_fp32 = models.mobilenet_v2(pretrained=False)
model_fp32.classifier[1] = nn.Linear(model_fp32.last_channel, 10)
model_fp32.load_state_dict(torch.load(
    "/content/drive/MyDrive/AI_MODEL_OPTIMIZATION/models/mobilenetv2_cifar10_baseline.pth",
    map_location=device
))

model_fp32.eval()
print(" Loaded baseline FP32 model.")




 Loaded baseline FP32 model.


In [29]:
def evaluate(model, dataloader, device):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in dataloader:
            # Ensure the model is on the correct device
            images = images.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return 100 * correct / total

In [31]:
acc_fp32 = evaluate(model_fp32, testloader, device)
print(f" Baseline FP32 Accuracy: {acc_fp32:.2f}%")

 Baseline FP32 Accuracy: 58.01%


In [34]:
import torch.quantization

model_to_quantize = models.mobilenet_v2(pretrained=False)
model_to_quantize.classifier[1] = nn.Linear(model_to_quantize.last_channel, 10)
model_to_quantize.load_state_dict(torch.load(
    "/content/drive/MyDrive/AI_MODEL_OPTIMIZATION/models/mobilenetv2_cifar10_baseline.pth",
    map_location=device
))
model_to_quantize.eval()

# Apply dynamic quantization
model_dynamic_quantized = torch.quantization.quantize_dynamic(
    model_to_quantize,  # the original model
    {nn.Linear},        # layers to quantize
    dtype=torch.qint8   # quantized dtype
)

print(" Model dynamically quantized.")




 Model dynamically quantized.


In [35]:
acc_int8 = evaluate(model_to_quantize, testloader, device)
print(f" Quantized INT8 Accuracy: {acc_int8:.2f}%")

 Quantized INT8 Accuracy: 58.01%


In [36]:
os.makedirs("../models", exist_ok=True)
torch.save(model_to_quantize.state_dict(), "../models/mobilenetv2_quantized.pth")
print(" Saved to '../models/mobilenetv2_quantized.pth'")


 Saved to '../models/mobilenetv2_quantized.pth'


In [38]:
size_fp32 = os.path.getsize("/content/drive/MyDrive/AI_MODEL_OPTIMIZATION/models/mobilenetv2_cifar10_baseline.pth") / 1e6
size_int8 = os.path.getsize("../models/mobilenetv2_quantized.pth") / 1e6

print(f" FP32 Model Size: {size_fp32:.2f} MB")
print(f" INT8 Model Size: {size_int8:.2f} MB")
print(f" Size Reduction: {(size_fp32 - size_int8) / size_fp32 * 100:.2f}%")


 FP32 Model Size: 9.19 MB
 INT8 Model Size: 9.19 MB
 Size Reduction: 0.03%
