In [1]:
import time

import numpy as np
import torch
import torch.ao.quantization.quantize_fx as quantize_fx
import torch.utils.data
import torchvision.datasets as datasets
import torchvision.transforms as T
from models import resnet50
from torch.ao.quantization import get_default_qconfig_mapping
from torch.utils.data import Subset

In [2]:
np.random.seed(0)

model = resnet50(pretrained=True)
model.eval()

transform = T.Compose([T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

full_dataset = datasets.CIFAR10(root="./data", train=False, download=True, transform=transform)
subset_indices = np.random.randint(0, len(full_dataset), 1000)
subset_dataset = Subset(full_dataset, subset_indices)
loader = torch.utils.data.DataLoader(subset_dataset, batch_size=1, shuffle=False)

Files already downloaded and verified


In [3]:
# fp32 での推論
answers = []
start = time.time()
with torch.no_grad():
    c = 0
    for images, _ in loader:
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        answers.append(predicted.item())
print(time.time() - start)
# Intel Core i7-12700 で 7.85 秒

6.966524839401245


In [4]:
# 準備
qconfig_mapping = get_default_qconfig_mapping("x86")
model_prepared = quantize_fx.prepare_fx(model, qconfig_mapping, full_dataset.data[0])

# 較正
with torch.no_grad():
    for images, _ in loader:
        model_prepared(images)

# 量子化
model_quantized = quantize_fx.convert_fx(model_prepared)
model_quantized.eval()



GraphModule(
  (conv1): QuantizedConvReLU2d(3, 64, kernel_size=(7, 7), stride=(2, 2), scale=0.0030952803790569305, zero_point=0, padding=(3, 3))
  (maxpool): Identity()
  (layer1): Module(
    (0): Module(
      (conv1): QuantizedConvReLU2d(64, 64, kernel_size=(1, 1), stride=(1, 1), scale=0.0013514463789761066, zero_point=0)
      (conv2): QuantizedConvReLU2d(64, 64, kernel_size=(3, 3), stride=(1, 1), scale=0.001981052104383707, zero_point=0, padding=(1, 1))
      (conv3): QuantizedConv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), scale=0.0037456878926604986, zero_point=64)
      (downsample): Module(
        (0): QuantizedConv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), scale=0.004810944199562073, zero_point=73)
      )
    )
    (1): Module(
      (conv1): QuantizedConvReLU2d(256, 64, kernel_size=(1, 1), stride=(1, 1), scale=0.001273467089049518, zero_point=0)
      (conv2): QuantizedConvReLU2d(64, 64, kernel_size=(3, 3), stride=(1, 1), scale=0.0019142132950946689, zero_point=0, p

In [5]:
# INT8 での推論
answers_quantized = []
start = time.time()
with torch.no_grad():
    for images, _ in loader:
        outputs = model_quantized(images)
        _, predicted = torch.max(outputs, 1)
        answers_quantized.append(predicted.item())
print(time.time() - start)
# Intel Core i7-12700 で 4.41 秒

3.5624163150787354


In [6]:
# 一致度
print("acc:", sum([answers[i] == answers_quantized[i] for i in range(len(answers))]) / len(answers))
# -> 0.994

acc: 0.994
