In [5]:
import os
import time
import torch
from PIL import Image
from torchvision import models
from torchvision import transforms


transform = transforms.Compose(
    [
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(
            mean=[0.48235, 0.45882, 0.40784],
            std=[0.229, 0.224, 0.225],
        ),
    ]
)

image = Image.open("../datasets/images/cat.jpg")
inputs = transform(image).unsqueeze(0)

model = models.vgg16(num_classes=2, pretrained=False)
model.load_state_dict(torch.load("../models/VGG16.pt"))

device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)
model.eval()

model_static_quantized = torch.jit.load("../models/PTSQ_VGG16.pt")

with torch.no_grad():
    start_time = time.time()
    outputs = model(inputs.to(device))
    file_size = os.path.getsize("../models/VGG16.pt") / 1e6
    print("양자화 적용 전:")
    print(f"출력 결과: {outputs}")
    print(f"추론 시간: {time.time() - start_time:.4f}s")
    print(f"파일 크기: {file_size:.2f} MB")
    print("\n")

start_time = time.time()
outputs = model_static_quantized(inputs)
file_size = os.path.getsize("../models/PTSQ_VGG16.pt") / 1e6
end_time = time.time() - start_time
print("양자화 적용 후:")
print(f"출력 결과: {outputs}")
print(f"추론 시간: {end_time:.4f}s")
print(f"파일 크기: {file_size:.2f} MB")

FileNotFoundError: [Errno 2] No such file or directory: '../models/VGG16.pt'

In [6]:
import torch

# 양자화 백엔드 설정
torch.backends.quantized.engine = 'qnnpack'  # 또는 'fbgemm'

print("Current quantized engine:", torch.backends.quantized.engine)


Current quantized engine: qnnpack


In [7]:
import torch
from torchvision import models

# 백엔드 활성화
torch.backends.quantized.engine = 'qnnpack'

# 기본 VGG16 모델 (pretrained=False 또는 pretrained=True)
device = torch.device('cpu')  # 양자화 모델은 CPU에서 실행됨
original_model = models.vgg16(pretrained=False)
original_model.to(device)

# 동적 양자화 적용
quantized_vgg16 = torch.quantization.quantize_dynamic(
    original_model,  # 원본 모델
    {torch.nn.Linear},  # 양자화할 레이어 종류
    dtype=torch.qint8  # 양자화 데이터 타입
)

# 예제 입력
dummy_input = torch.rand(1, 3, 224, 224).to(device)

# 테스트 실행
output = quantized_vgg16(dummy_input)
print(output)


tensor([[ 3.0256e-02,  2.8610e-02,  1.5141e-02, -1.1033e-02,  3.6020e-02,
         -4.4120e-03, -1.8670e-02,  9.5491e-03,  1.3924e-02,  4.2411e-03,
         -1.2143e-02,  4.3209e-02,  2.7170e-02,  3.7288e-02, -3.2777e-02,
          3.5690e-03, -4.2151e-02, -3.7039e-03, -1.7512e-02, -1.3442e-03,
         -3.6341e-02, -4.4383e-02, -1.4925e-02, -3.0506e-03,  2.1187e-02,
          1.9326e-02,  4.0454e-02, -2.9515e-02, -5.6498e-03,  8.1603e-03,
          1.8804e-02, -3.9759e-04,  2.7200e-02, -1.5769e-02,  2.6792e-02,
          3.6987e-02,  2.0566e-02,  1.0887e-02,  2.6035e-02,  1.0880e-02,
         -5.1104e-02,  3.8977e-02,  8.2212e-03, -1.3021e-03,  2.0488e-03,
          3.8707e-02, -1.7959e-02, -5.5744e-02, -4.0680e-03,  2.4547e-02,
          2.1758e-02,  1.1656e-02,  1.8289e-02,  2.9709e-03,  3.9912e-03,
          5.1492e-02, -3.6092e-04,  1.0764e-03,  1.6449e-02,  3.0676e-02,
         -2.9121e-02, -5.9305e-02,  6.8805e-03, -2.3615e-02, -4.8412e-02,
         -3.2174e-03,  1.7636e-02,  1.