In [21]:
from torch._export import capture_pre_autograd_graph

from torch.ao.quantization.quantize_pt2e import (
  prepare_pt2e,
  convert_pt2e,
  prepare_qat_pt2e
)

from torch.ao.quantization.quantizer.xnnpack_quantizer import (
  XNNPACKQuantizer,
  get_symmetric_quantization_config,
)

In [22]:
import torch
from torchvision.models import mobilenet_v2
# from torchvision.models.quantization import mobilenet_v2
from torchvision import transforms
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader
import os
from torch import nn
from torch.optim import *
from torch.optim.lr_scheduler import *
import copy

weight_path = '/home/aa35037123/Wesley/edge_ai/lab3/mobilenetv2_0.963.pth'
model = torch.load(weight_path, map_location=torch.device('cpu'))
model.to('cpu')
model.eval()

MobileNetV2(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=

In [23]:
def prepare_data(batch_size):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),  # Resize images to match MobileNet input size
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    train_set = CIFAR10(root='./data', train=True, download=True, transform=transform)
    test_set = CIFAR10(root='./data', train=False, download=True, transform=transform)
    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False,drop_last=True)
    return train_loader, test_loader

In [24]:
def evaluate_model(model, data_loader,device):

    model.to(device)
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in data_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    print(f'Accuracy of the model on the test images: {accuracy}%')
    return accuracy

def train_one_epoch(model, criterion, optimizer, data_loader, device):

    cnt = 0

    for image, target in data_loader:
        cnt += 1
        image, target = image.to(device), target.to(device)
        output = model(image)
        loss = criterion(output, target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    return

def print_size_of_model(model):
    torch.save(model.state_dict(), "temp.p")
    print('Size (MB):', os.path.getsize("temp.p")/1e6)
    os.remove('temp.p')

In [25]:
batch_size = 16
train_loader, test_loader = prepare_data(batch_size)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f'device: {device}')
model.eval()
print_size_of_model(model)
# evaluate_model(model, test_loader,device)

Files already downloaded and verified
Files already downloaded and verified
device: cpu
Size (MB): 9.169412


In [26]:
def quantize_ptq_model(model: nn.Module) -> None:
    ############### YOUR CODE STARTS HERE ###############

    # model.to(device)
    # Step 1. program capture
    example_inputs = (torch.randn(1, 3, 224, 224),)
    # example_inputs = (example_input.to(device) for example_input in example_inputs)
    model = capture_pre_autograd_graph(model, example_inputs)
    # Step 2. set quantizatizer
    # prepare_pt2e folds BatchNorm operators into preceding Conv2d operators, and inserts observers in appropriate places in the model.
    quantizer = XNNPACKQuantizer().set_global(get_symmetric_quantization_config())
    # Step 3. prepare pt2e
    model = prepare_pt2e(model, quantizer)
    # Step 4. convert model
    model = convert_pt2e(model)
    ############### YOUR CODE ENDS HERE #################
    return model


ptq_model = quantize_ptq_model(model)
torch.ao.quantization.move_exported_model_to_eval(ptq_model)



GraphModule()

In [27]:
def prepare_qat_model(model: nn.Module) -> None:
    ############### YOUR CODE STARTS HERE ###############

    # Step 1. program capture
    example_inputs = (torch.randn(1, 3, 224, 224),)
    model = capture_pre_autograd_graph(model, example_inputs)
    # Step 2. set quantizatizer
    quantizer = XNNPACKQuantizer().set_global(get_symmetric_quantization_config())
    # Step 3. prepare qat pt2e
    model = prepare_qat_pt2e(model, quantizer)

    ############### YOUR CODE ENDS HERE #################

    return model

In [28]:

############### YOUR CODE STARTS HERE ###############
num_epochs = 3
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
############### YOUR CODE ENDS HERE #################

num_observer_update_epochs = 4
num_batch_norm_update_epochs = 3
num_epochs_between_evals = 2

criterion = nn.CrossEntropyLoss()

weight_path = '/home/aa35037123/Wesley/edge_ai/lab3/mobilenetv2_0.963.pth'
model = torch.load(weight_path, map_location="cpu")
prepared_model  = prepare_qat_model(model)
prepared_model = prepared_model.to(device)

# QAT takes time and one needs to train over a few epochs.
for epoch in range(num_epochs):
    train_one_epoch(prepared_model, criterion, optimizer, train_loader, device)

    # Optionally disable observer/batchnorm stats after certain number of epochs
    if epoch >= num_observer_update_epochs:
        print("Disabling observer for subseq epochs, epoch = ", epoch)
        prepared_model.apply(torch.ao.quantization.disable_observer)
    if epoch >= num_batch_norm_update_epochs:
        print("Freezing BN for subseq epochs, epoch = ", epoch)
        for n in prepared_model.graph.nodes:
            # Args: input, weight, bias, running_mean, running_var, training, momentum, eps
            # set the `training` flag to False here to freeze BN stats
            if n.target in [
                torch.ops.aten._native_batch_norm_legit.default,
                torch.ops.aten.cudnn_batch_norm.default,
            ]:
                new_args = list(n.args)
                new_args[5] = False
                n.args = new_args
        prepared_model.recompile()

    # Check the quantized accuracy every N epochs

    if (epoch + 1) % num_epochs_between_evals == 0:
        prepared_model_copy = copy.deepcopy(prepared_model)
        quantized_model = convert_pt2e(prepared_model_copy)
        acc = evaluate_model(quantized_model, test_loader,device)


Accuracy of the model on the test images: 89.83%


In [29]:
qat_quantized_model = convert_pt2e(prepared_model)
torch.ao.quantization.move_exported_model_to_eval(qat_quantized_model)

GraphModule()

In [30]:
# Export the model and Save ExportedProgram
file_path =  "./mobilenet_quantized.pt"
# capture the model to get an ExportedProgram
example_inputs = (torch.randn(1, 3, 224, 224),)
quantized_ep = torch.export.export(qat_quantized_model, example_inputs)
# use torch.export.save to save an ExportedProgram
torch.export.save(quantized_ep, file_path)

In [31]:
from torch._export import capture_pre_autograd_graph
from torch.export import export, ExportedProgram
from executorch.exir import EdgeProgramManager, to_edge, ExecutorchProgramManager
from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner
from executorch.exir.passes import ScalarToTensorPass
from executorch.exir import EdgeCompileConfig

example_args = (torch.randn(1, 3, 224, 224),)

aten_dialect: ExportedProgram = export(qat_quantized_model, example_args)
print("Exportation succeed")

edge_program: EdgeProgramManager = to_edge(aten_dialect, compile_config=EdgeCompileConfig(_check_ir_validity=False))
edge_program = edge_program.to_backend(XnnpackPartitioner)
print("Edge Dialect graph generation succeed")

executorch_program: ExecutorchProgramManager = edge_program.to_executorch()

with open("./quantized_mobilenet.pte", "wb") as file:
    file.write(executorch_program.buffer)

ImportError: cannot import name 'get_lifted_tensor_constant' from 'torch._export.utils' (/home/aa35037123/miniconda3/envs/lab3/lib/python3.10/site-packages/torch/_export/utils.py)