In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
# install the newest version of torch, torchvision, and timm
!pip3 uninstall --yes torch torchaudio torchvision torchtext torchdata timm
!pip3 install torch torchaudio torchvision torchtext torchdata timm

[0mCollecting torch
  Using cached torch-2.3.0-cp310-cp310-manylinux1_x86_64.whl (779.1 MB)
Collecting torchaudio
  Using cached torchaudio-2.3.0-cp310-cp310-manylinux1_x86_64.whl (3.3 MB)
Collecting torchvision
  Using cached torchvision-0.18.0-cp310-cp310-manylinux1_x86_64.whl (7.0 MB)
Collecting torchtext
  Using cached torchtext-0.18.0-cp310-cp310-manylinux1_x86_64.whl (2.0 MB)
Collecting torchdata
  Using cached torchdata-0.7.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.7 MB)
Collecting timm
  Using cached timm-0.9.16-py3-none-any.whl (2.2 MB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.w

In [5]:
import torch
from torch._export import capture_pre_autograd_graph

from torchvision.models import mobilenet_v2
# from torchvision.models.quantization import mobilenet_v2
from torchvision import transforms
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader
import os
from torch import nn
from torch.optim import *
from torch.optim.lr_scheduler import *
import copy

In [6]:
def evaluate_model(model, data_loader,device):

    model.to(device)
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in data_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    print(f'Accuracy of the model on the test images: {accuracy}%')
    return accuracy


In [7]:
def prepare_data(batch_size):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),  # Resize images to match MobileNet input size
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    train_set = CIFAR10(root='./data', train=True, download=True, transform=transform)
    test_set = CIFAR10(root='./data', train=False, download=True, transform=transform)
    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False,drop_last=True)
    return train_loader, test_loader

batch_size = 16
train_loader, test_loader = prepare_data(batch_size)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:05<00:00, 29783563.00it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [13]:
def calibrate(model, data_loader, device):
    # model.eval()
    # torch.ao.quantization.move_exported_model_to_train(model)
    with torch.no_grad():
        for images, labels in data_loader:
            images = images.to(device)
            labels = labels.to(device)
            model(images)

In [35]:
def print_size_of_model(model):
    torch.save(model.state_dict(), "temp.p")
    print('Size (MB):', os.path.getsize("temp.p")/1e6)
    os.remove('temp.p')

In [30]:

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f'device: {device}')
# device = torch.device('cpu')
weight_path = '/content/drive/MyDrive/mobilenetv2_0.963.pth'
model = torch.load(weight_path, map_location=torch.device('cpu'))
model.to(device)
print(next(model.parameters()).is_cuda)
model.eval()
example_inputs = (torch.randn(1, 3, 224, 224).to(device),)
exported_model = capture_pre_autograd_graph(model, example_inputs)
# Step 2. quantization
from torch.ao.quantization.quantize_pt2e import (
  prepare_pt2e,
  convert_pt2e,
)
from torch.ao.quantization.quantizer.xnnpack_quantizer import (
  XNNPACKQuantizer,
  get_symmetric_quantization_config,
)
# from torch.ao.quantization.quantizer import (
#   XNNPACKQuantizer,
#   get_symmetric_quantization_config,
# )
# backend developer will write their own Quantizer and expose methods to allow
# users to express how they
# want the model to be quantized
quantizer = XNNPACKQuantizer().set_global(get_symmetric_quantization_config())
prepared_model = prepare_pt2e(exported_model, quantizer)
# print(prepared_model.graph)
# calibration omitted
calibrate(prepared_model, test_loader, device)  # run calibration on sample data
ptq_quantized_model = convert_pt2e(prepared_model)
# print(ptq_quantized_model)
# we have a model with aten ops doing integer computations when possible

device: cuda:0
True


In [33]:
# Export the model and Save ExportedProgram
pt2e_quantized_model_file_path =  "./mobilenet_quantized.pt"
# capture the model to get an ExportedProgram
example_inputs = (next(iter(test_loader))[0].to(device),)
torch.ao.quantization.move_exported_model_to_eval(ptq_quantized_model)
quantized_ep = torch.export.export(ptq_quantized_model, example_inputs)
# use torch.export.save to save an ExportedProgram
torch.export.save(quantized_ep, pt2e_quantized_model_file_path)
print('Modle has been export!')

Modle has been export!


In [None]:
loaded_quantized_ep = torch.export.load("/content/mobilenet_quantized.pth")
loaded_quantized_model = loaded_quantized_ep.module()
acc = evaluate_model(loaded_quantized_model, test_loader, device)
print_size_of_model(loaded_quantized_model)