## 데이터셋

- CIFAR 10
- train 데이터를 train 과 val로 split(8:2)
- 이미지 : 32 -> 128 resize
- RandomHorizontalFlip(p = 0.5)
- 정규화
- batch_size = 32 / 32

In [1]:
# modeling.ipynb : CIFAR-10 train만 받아서 train/val로 사용 (8:2, 계층분할)

import numpy as np
import torch, torchvision as tv, torchvision.transforms as T
from torchvision.transforms import InterpolationMode
from torch.utils.data import Subset, DataLoader

# --------------------
# 기본 설정 & 재현성
# --------------------
device = "cuda" if torch.cuda.is_available() else "cpu"
SEED, VAL_RATIO = 42, 0.2

np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# DataLoader shuffle의 재현성 보장용
g = torch.Generator()
g.manual_seed(SEED)

# --------------------
# 정규화 통계
# - ImageNet 사전학습 모델 사용 시: IMAGENET_STATS
# - scratch 학습 시: CIFAR10_STATS
# --------------------
IMAGENET_MEAN, IMAGENET_STD = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
CIFAR10_MEAN,  CIFAR10_STD  = [0.4914,0.4822,0.4465], [0.2023,0.1994,0.2010]

USE_IMAGENET_NORM = True  # ← VGG/ResNet 등 ImageNet pretrained면 True
MEAN = IMAGENET_MEAN if USE_IMAGENET_NORM else CIFAR10_MEAN
STD  = IMAGENET_STD  if USE_IMAGENET_NORM else CIFAR10_STD

# --------------------
# Transforms
# --------------------
transform_train = T.Compose([
    T.Resize(128, interpolation=InterpolationMode.BILINEAR, antialias=True),
    T.RandomHorizontalFlip(p=0.5),
    T.ToTensor(),
    T.Normalize(MEAN, STD),
])

transform_val = T.Compose([
    T.Resize(128, interpolation=InterpolationMode.BILINEAR, antialias=True),
    T.ToTensor(),
    T.Normalize(MEAN, STD),
])

# --------------------
# 데이터 로딩 (train만 두 번 생성: 증강/비증강 분리용)
# --------------------
train_aug = tv.datasets.CIFAR10(root="./data", train=True, download=True,  transform=transform_train)
val_eval  = tv.datasets.CIFAR10(root="./data", train=True, download=False, transform=transform_val)

# --------------------
# 계층(클래스별) 분할
# --------------------
labels = np.array(train_aug.targets)             # 길이 50,000
train_idx, val_idx = [], []
rng = np.random.default_rng(SEED)

for c in np.unique(labels):
    idx = np.where(labels == c)[0]
    rng.shuffle(idx)
    n_val = max(1, int(round(len(idx) * VAL_RATIO)))
    val_idx.extend(idx[:n_val])
    train_idx.extend(idx[n_val:])

# Subset 구성: 같은 인덱스를 서로 다른 transform에 적용
train_ds = Subset(train_aug, train_idx)  # 증강 O
val_ds   = Subset(val_eval,  val_idx)    # 증강 X

# --------------------
# DataLoader
# --------------------
pin = torch.cuda.is_available()
num_workers = 0 # Changed from 2 to 0

train_loader = DataLoader(
    train_ds, batch_size=32, shuffle=True, generator=g,
    num_workers=num_workers, pin_memory=pin, drop_last=True,
    persistent_workers=(num_workers > 0)
)

val_loader = DataLoader(
    val_ds, batch_size=32, shuffle=False,
    num_workers=num_workers, pin_memory=pin,
    persistent_workers=(num_workers > 0)
)

print(f"train: {len(train_ds)} | val: {len(val_ds)}")  # 40000 / 10000
classes = train_aug.classes
print("classes:", classes)
print("device:", device)

100%|██████████| 170M/170M [00:12<00:00, 13.4MB/s]


train: 40000 | val: 10000
classes: ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
device: cuda


## 학습 모델
- VGG11-BN (ImageNet1K 사전학습 가중치)
- 입력 크기에 맞춰 avgpool / classifier 재구성
- Full Fine-Tuning

In [2]:
import torch, torch.nn as nn
from torchvision.models import vgg11_bn, VGG11_BN_Weights

device = "cuda" if torch.cuda.is_available() else "cpu"
num_classes = 10
IMG_SIZE = 128  # 128 해상도

# 사전학습 모델 로드
vgg = vgg11_bn(weights=VGG11_BN_Weights.IMAGENET1K_V1)

# 실제 feature map 크기 계산
with torch.no_grad():
    dummy = torch.zeros(1, 3, IMG_SIZE, IMG_SIZE)
    c, h, w = vgg.features(dummy).shape[1:]   # c=512, h=w=4 (IMG_SIZE=128)

assert h >= 1 and w >= 1, "입력 해상도가 너무 작아요."

# avgpool과 classifier 갱신
vgg.avgpool = nn.AdaptiveAvgPool2d((h, w))            # 안전하게 고정
vgg.classifier[0] = nn.Linear(c * h * w, 4096)        # in_features만 교체
vgg.classifier[6] = nn.Linear(4096, num_classes)      # 출력 차원 교체

# Full fine-tuning
vgg.requires_grad_(True)  # 전체 학습

vgg = vgg.to(device)

Downloading: "https://download.pytorch.org/models/vgg11_bn-6002323d.pth" to /root/.cache/torch/hub/checkpoints/vgg11_bn-6002323d.pth


100%|██████████| 507M/507M [00:02<00:00, 236MB/s]


In [3]:
vgg

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU(inplace=True)
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU(inplace=True)
    (11): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (13): ReLU(inplace=True)
    (14): MaxPool2d(ke

## 기본 모델 생성

In [4]:
import os, torch, torch.nn as nn

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(vgg.parameters(), lr=0.005, momentum=0.9, weight_decay=5e-4)

def train_one_epoch(model, loader):
    model.train()
    total, correct, run_loss = 0, 0, 0.0
    for x, y in loader:
        x = x.to(device, non_blocking=True)
        y = y.to(device, non_blocking=True)

        optimizer.zero_grad(set_to_none=True)
        out = model(x)
        loss = criterion(out, y)
        loss.backward()
        optimizer.step()

        run_loss += loss.item() * x.size(0)
        pred = out.argmax(1)
        correct += (pred == y).sum().item()
        total += y.size(0)
    return run_loss / total, correct / total

@torch.no_grad()
def evaluate(model, loader):
    model.eval()
    total, correct, run_loss = 0, 0, 0.0
    for x, y in loader:
        x = x.to(device, non_blocking=True)
        y = y.to(device, non_blocking=True)
        out = model(x)
        loss = criterion(out, y)

        run_loss += loss.item() * y.size(0)
        pred = out.argmax(1)
        correct += (pred == y).sum().item()
        total += y.size(0)
    return run_loss / total, correct / total

In [5]:
# 학습
epochs = 5
best = 0.0
for e in range(1, epochs + 1):
    tr_loss, tr_acc = train_one_epoch(vgg, train_loader)
    te_loss, te_acc = evaluate(vgg, val_loader)

    if te_acc > best:
        best = te_acc
        torch.save(vgg.state_dict(), "mission_16_vgg11bn_fp32.pth")

    print(f"[Epoch {e:02d}] train loss {tr_loss:.4f} | acc {tr_acc*100:.2f}%  ||  "
          f"val loss {te_loss:.4f} | acc {te_acc*100:.2f}%")

print(f"Best FP32 val acc: {best*100:.2f}% (saved to mission_16_vgg11bn_fp32.pth)")

[Epoch 01] train loss 0.5679 | acc 80.54%  ||  val loss 0.3263 | acc 88.81%
[Epoch 02] train loss 0.2324 | acc 92.05%  ||  val loss 0.2300 | acc 92.44%
[Epoch 03] train loss 0.1571 | acc 94.73%  ||  val loss 0.2311 | acc 92.43%
[Epoch 04] train loss 0.1257 | acc 95.77%  ||  val loss 0.2362 | acc 92.43%
[Epoch 05] train loss 0.1009 | acc 96.63%  ||  val loss 0.2330 | acc 92.79%
Best FP32 val acc: 92.79% (saved to mission_16_vgg11bn_fp32.pth)


## PTQ(Post-Training Quantization)
- 모델을 훈련 종료 후 , 가중치를변환
### Dynamic quantization
- 모델의 **Linear 층 가중치**를 미리 **INT8**로 압축해 둡니다
- 추론할 때 들어오는 **입력 활성값**은 그 순간 **on-the-fly로 INT8 변환**해서
    정수 곱셈으로 빠르게 계산하고, 결과는 다시 **FP32**로 되돌립니다.

In [6]:
# Dynamic Quantization (Linear만 INT8로 변환)
import copy, torch.nn as nn, os

dq_model = torch.quantization.quantize_dynamic(
    copy.deepcopy(vgg).cpu().eval(),
    {nn.Linear},
    dtype=torch.qint8)

# 저장
torch.save(dq_model.state_dict(), "mission_16_vgg11bn_dynamic_int8.pth")

For migrations of users: 
1. Eager mode quantization (torch.ao.quantization.quantize, torch.ao.quantization.quantize_dynamic), please migrate to use torchao eager mode quantize_ API instead 
2. FX graph mode quantization (torch.ao.quantization.quantize_fx.prepare_fx,torch.ao.quantization.quantize_fx.convert_fx, please migrate to use torchao pt2e quantization API instead (prepare_pt2e, convert_pt2e) 
3. pt2e quantization has been migrated to torchao (https://github.com/pytorch/ao/tree/main/torchao/quantization/pt2e) 
see https://github.com/pytorch/ao/issues/2259 for more details
  dq_model = torch.quantization.quantize_dynamic(


### Static quantization
- 정적 양자화에서는 사전 Calibration을 통해 가중치와 활성값의 분포를 미리 추정하고, 추론 시 가중치는 int8로, 활성값은 저장된 분포(scale/zero_point)를 사용해 int8로 변환한다.

In [7]:
# --- Static Quantization (PTQ, INT8) - FX(Graph) 모드 / 128x128 입력
import copy, torch
from tqdm.auto import tqdm
from torch.ao.quantization.quantize_fx import prepare_fx, convert_fx
from torch.ao.quantization import get_default_qconfig_mapping

# 양자화 백엔진 선택 (x86 CPU → fbgemm / ARM → qnnpack)
torch.backends.quantized.engine = "fbgemm"

# 원본 FP32 모델을 CPU+eval 로 복제
fp32_cpu = copy.deepcopy(vgg).to("cpu", dtype=torch.float32).eval()

# prepare_fx가 참고할 '양자화 설계도' 가져오기(Observer/스킴 배치 규칙, x86=FBGEMM)
qmap = get_default_qconfig_mapping("fbgemm")

# FX 트레이싱에 사용할 예제 입력
example_inputs = (torch.randn(1, 3, 128, 128),)

# prepare_fx: 모델을 FX로 트레이싱하고, 각 텐서 경계에 Observer/Quant-DeQuant 스텁을 삽입
# - example_inputs로 그래프 캡처
# - Observer가 캘리브레이션 동안 활성/가중치 범위(min/max) 수집
# - 이 단계는 여전히 FP32로만 계산 (INT8 치환은 convert_fx에서 수행)
prepared = prepare_fx(fp32_cpu, qmap, example_inputs)

# Calibration: 대표 배치 몇 개를 흘려보내 통계 수집 (CPU 실행)
MAX_CALIB_BATCHES = 20
prepared.eval()
with torch.inference_mode():
    for i, (x, _) in tqdm(enumerate(train_loader), total=min(MAX_CALIB_BATCHES, len(train_loader)),
                          desc="Calibrating (CPU)", leave=False):
        prepared(x.cpu())
        if (i + 1) >= MAX_CALIB_BATCHES:
            break

# convert_fx: 실제 INT8 연산 모듈로 치환 (정적 양자화 완료)
int8_static = convert_fx(prepared).eval()

# 저장 (state_dict만 저장)
torch.save(int8_static.state_dict(), "mission_16_vgg11bn_static_int8_fx.pth")
print("saved: mission_vgg11bn_static_int8_fx.pth")

For migrations of users: 
1. Eager mode quantization (torch.ao.quantization.quantize, torch.ao.quantization.quantize_dynamic), please migrate to use torchao eager mode quantize_ API instead 
2. FX graph mode quantization (torch.ao.quantization.quantize_fx.prepare_fx,torch.ao.quantization.quantize_fx.convert_fx, please migrate to use torchao pt2e quantization API instead (prepare_pt2e, convert_pt2e) 
3. pt2e quantization has been migrated to torchao (https://github.com/pytorch/ao/tree/main/torchao/quantization/pt2e) 
see https://github.com/pytorch/ao/issues/2259 for more details
  prepared = prepare_fx(fp32_cpu, qmap, example_inputs)


Calibrating (CPU):   0%|          | 0/20 [00:00<?, ?it/s]

For migrations of users: 
1. Eager mode quantization (torch.ao.quantization.quantize, torch.ao.quantization.quantize_dynamic), please migrate to use torchao eager mode quantize_ API instead 
2. FX graph mode quantization (torch.ao.quantization.quantize_fx.prepare_fx,torch.ao.quantization.quantize_fx.convert_fx, please migrate to use torchao pt2e quantization API instead (prepare_pt2e, convert_pt2e) 
3. pt2e quantization has been migrated to torchao (https://github.com/pytorch/ao/tree/main/torchao/quantization/pt2e) 
see https://github.com/pytorch/ao/issues/2259 for more details
  int8_static = convert_fx(prepared).eval()


saved: mission_vgg11bn_static_int8_fx.pth


## QAT

forward에 **가짜 양자화(q→dq)**를 넣어 양자화 노이즈를 포함한 손실로 학습 →
그라디언트는 가중치를 업데이트해서 노이즈에 강건해지게 하고,
scale/zero_point는 observer가 런타임 분포를 보고 갱신(중간에 고정) → 최종 INT8 변환해도 정확도 유지.


In [8]:
# =========================================================
# QAT (조건 일치: FP32와 동일 하이퍼/학습 흐름)
#   - LR=0.05, momentum=0.9, WD=5e-4 (FP32와 동일)
# =========================================================

import copy
import torch
from torch.ao.quantization.quantize_fx import prepare_qat_fx, convert_fx

# reduce_range 경고 방지용: quant_min/max를 명시한 QConfig 사용
from torch.ao.quantization.fake_quantize import FakeQuantize
from torch.ao.quantization.observer import MovingAverageMinMaxObserver, PerChannelMinMaxObserver
from torch.ao.quantization.qconfig import QConfig
from torch.ao.quantization.qconfig_mapping import QConfigMapping

# -------------------------
# 0) 백엔드 & QConfig 구성
# -------------------------
torch.backends.quantized.engine = "fbgemm"  # x86 CPU 대상

# activation: uint8(per-tensor-affine), weight: int8(per-channel-symmetric)
act_fake = FakeQuantize.with_args(
    observer=MovingAverageMinMaxObserver,
    dtype=torch.quint8, qscheme=torch.per_tensor_affine,
    quant_min=0, quant_max=255
)
wgt_fake = FakeQuantize.with_args(
    observer=PerChannelMinMaxObserver,
    dtype=torch.qint8, qscheme=torch.per_channel_symmetric, ch_axis=0,
    quant_min=-128, quant_max=127
)
qmap_qat = QConfigMapping().set_global(QConfig(activation=act_fake, weight=wgt_fake))

# -------------------------
# 1) QAT 준비 (FX graph mode)
#    - FP32 vgg를 복사 → prepare_qat_fx
# -------------------------
example_inputs = (torch.randn(1, 3, IMG_SIZE, IMG_SIZE),)
fp32_for_qat = copy.deepcopy(vgg).to("cpu").train()
qat_model = prepare_qat_fx(fp32_for_qat, qmap_qat, example_inputs).to(device).train()

# -------------------------
# 2) 학습 설정 (FP32와 동일)
# -------------------------
qat_optimizer = torch.optim.SGD(
    qat_model.parameters(), lr=0.005, momentum=0.9, weight_decay=5e-4
)

def train_one_epoch_qat(model, loader, optimizer):
    """FP32와 동일 형태의 학습 루프 (observer/BN 동결 없음)"""
    model.train()
    total = correct = 0
    run_loss = 0.0
    for x, y in loader:
        x = x.to(device, non_blocking=True)
        y = y.to(device, non_blocking=True)

        optimizer.zero_grad(set_to_none=True)
        out = model(x)
        loss = criterion(out, y)
        loss.backward()
        optimizer.step()

        run_loss += loss.item() * y.size(0)
        pred = out.argmax(1)
        correct += (pred == y).sum().item()
        total += y.size(0)
    return run_loss / total, correct / total

# -------------------------
# 3) 학습 실행 (에폭 수는 FP32와 동일)
# -------------------------
EPOCHS = 5
best_qat = 0.0
for e in range(1, EPOCHS + 1):
    tr_loss, tr_acc = train_one_epoch_qat(qat_model, train_loader, qat_optimizer)
    va_loss, va_acc = evaluate(qat_model, val_loader)  # 기존 evaluate() 재사용 (device 평가)

    if va_acc > best_qat:
        best_qat = va_acc
        torch.save(qat_model.state_dict(), "mission_16_vgg11bn_qat_fake_fp32_match.pth")

    print(f"[Epoch {e:02d}] train loss {tr_loss:.4f} | acc {tr_acc*100:.2f}%  ||  "
          f"val loss {va_loss:.4f} | acc {va_acc*100:.2f}%")

print(f"Best QAT(fake-FP32) val acc: {best_qat*100:.2f}% "
      f"(saved to mission_16_vgg11bn_qat_fake_fp32_match.pth)")

# ---------------------------------------------------------
# 4)  INT8로 변환 후 CPU에서 평가/저장
#    - 배포 기준 정확도는 여기 값으로 판단
# ---------------------------------------------------------
@torch.no_grad()
def evaluate_int8_cpu(model_int8, loader):
    model_int8.eval()
    total = correct = 0
    run_loss = 0.0
    for x, y in loader:
        out  = model_int8(x.cpu())
        loss = criterion(out, y.cpu())
        run_loss += loss.item() * y.size(0)
        pred = out.argmax(1)
        correct += (pred.cpu() == y.cpu()).sum().item()
        total   += y.size(0)
    return run_loss/total, correct/total

qat_cpu = copy.deepcopy(qat_model).to("cpu").eval()
int8_qat = convert_fx(qat_cpu).eval()
val_loss_int8, val_acc_int8 = evaluate_int8_cpu(int8_qat, val_loader)
print(f"[INT8] val loss {val_loss_int8:.4f} | acc {val_acc_int8*100:.2f}%")

torch.save(int8_qat.state_dict(), "mission_16_vgg11bn_qat_int8_fp32match.pth")
print("Saved INT8 state_dict → mission_16_vgg11bn_qat_int8_fp32match.pth")

For migrations of users: 
1. Eager mode quantization (torch.ao.quantization.quantize, torch.ao.quantization.quantize_dynamic), please migrate to use torchao eager mode quantize_ API instead 
2. FX graph mode quantization (torch.ao.quantization.quantize_fx.prepare_fx,torch.ao.quantization.quantize_fx.convert_fx, please migrate to use torchao pt2e quantization API instead (prepare_pt2e, convert_pt2e) 
3. pt2e quantization has been migrated to torchao (https://github.com/pytorch/ao/tree/main/torchao/quantization/pt2e) 
see https://github.com/pytorch/ao/issues/2259 for more details
  qat_model = prepare_qat_fx(fp32_for_qat, qmap_qat, example_inputs).to(device).train()


[Epoch 01] train loss 0.0803 | acc 97.24%  ||  val loss 0.2162 | acc 93.20%
[Epoch 02] train loss 0.0663 | acc 97.75%  ||  val loss 0.2706 | acc 92.24%
[Epoch 03] train loss 0.0661 | acc 97.75%  ||  val loss 0.2554 | acc 92.52%
[Epoch 04] train loss 0.0571 | acc 98.06%  ||  val loss 0.2551 | acc 92.52%
[Epoch 05] train loss 0.0526 | acc 98.19%  ||  val loss 0.2642 | acc 92.25%
Best QAT(fake-FP32) val acc: 93.20% (saved to mission_16_vgg11bn_qat_fake_fp32_match.pth)


For migrations of users: 
1. Eager mode quantization (torch.ao.quantization.quantize, torch.ao.quantization.quantize_dynamic), please migrate to use torchao eager mode quantize_ API instead 
2. FX graph mode quantization (torch.ao.quantization.quantize_fx.prepare_fx,torch.ao.quantization.quantize_fx.convert_fx, please migrate to use torchao pt2e quantization API instead (prepare_pt2e, convert_pt2e) 
3. pt2e quantization has been migrated to torchao (https://github.com/pytorch/ao/tree/main/torchao/quantization/pt2e) 
see https://github.com/pytorch/ao/issues/2259 for more details
  int8_qat = convert_fx(qat_cpu).eval()


[INT8] val loss 0.2651 | acc 92.26%
Saved INT8 state_dict → mission_16_vgg11bn_qat_int8_fp32match.pth


## FP32 모델을 ONNX로 내보내기

In [9]:
!pip -q install onnx onnxruntime onnxruntime-tools

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.6/17.6 MB[0m [31m116.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.5/16.5 MB[0m [31m117.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m212.7/212.7 kB[0m [31m22.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.0/46.0 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m55.5/55.5 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.8/86.8 kB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [10]:
import torch, torch.nn as nn, torchvision as tv

# 1) 가중치 로드
state = torch.load("mission_16_vgg11bn_fp32.pth", map_location="cpu")

# 2) 학습 때와 같은 구조로 VGG 재구성 (128x128 → 4x4 → 8192)
model = tv.models.vgg11_bn(num_classes=10)
model.avgpool = nn.Identity()                 # ← 7x7로 강제 풀링을 끔(4x4 유지)
model.classifier[0] = nn.Linear(512*4*4, 4096)  # ← 첫 FC 입력을 8192로 교체

# 3) 가중치 로드
model.load_state_dict(state)
model.eval().cpu()

# 4) ONNX export (입력 128×128 기준)
dummy = torch.randn(1, 3, 128, 128)
torch.onnx.export(
    model, dummy, "mission16_vgg11bn_fp32.onnx",
    input_names=["input"], output_names=["logits"],
    dynamic_axes={"input": {0: "batch"}, "logits": {0: "batch"}},
    opset_version=13, do_constant_folding=True,
)
print("saved: mission_vgg11bn_fp32.onnx")

  torch.onnx.export(


saved: mission_vgg11bn_fp32.onnx


## ONNX를 INT8로 동적 양자화

In [11]:
from onnxruntime.quantization import quantize_dynamic, QuantType

quantize_dynamic(
    model_input="mission16_vgg11bn_fp32.onnx",
    model_output="mission16_vgg11bn_int8_dynamic.onnx",
    weight_type=QuantType.QInt8,
    op_types_to_quantize=["MatMul", "Gemm"],  # Linear(FC)만
    per_channel=False,
)



## 정적 양자화

In [12]:
# === ONNX → ORT 정적 양자화(PTQ, INT8) ===
# pip install onnx onnxruntime onnxruntime-tools

import onnx, onnxruntime as ort
import torchvision as tv, torchvision.transforms as T
from torch.utils.data import DataLoader
from itertools import islice
from onnxruntime.quantization import (
    quantize_static, CalibrationDataReader,
    QuantType, CalibrationMethod, QuantFormat
)

FP32_ONNX = "mission16_vgg11bn_fp32.onnx"
INT8_ONNX = "mission16_vgg11bn_int8_static.onnx"
IMG_SIZE  = 128
BATCH     = 32
MAX_BATCH = 40   # 32 * 40 = 1280장

# shape inference로 메타정보 보강
onnx.save(onnx.shape_inference.infer_shapes(onnx.load(FP32_ONNX)), FP32_ONNX)

# 전처리: 학습/검증과 동일
tf = T.Compose([
    T.Resize(IMG_SIZE),
    T.ToTensor(),
    T.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
])

# 캘리브레이션 데이터 , num_workers=0
calib_ds = tv.datasets.CIFAR10("./data", train=False, download=True, transform=tf)
calib_ld = DataLoader(calib_ds, batch_size=BATCH, shuffle=False, num_workers=0)

# CalibrationDataReader
class ORTCalib(CalibrationDataReader):
    def __init__(self, onnx_path, loader, max_batches):
        self.sess = ort.InferenceSession(onnx_path, providers=["CPUExecutionProvider"])
        self.name = self.sess.get_inputs()[0].name
        self.loader = loader
        self.max_batches = max_batches
        self._reset()
    def _reset(self):
        self._it = iter(islice(self.loader, self.max_batches))
    def get_next(self):
        try:
            x, _ = next(self._it)
            return {self.name: x.numpy()}  # float32 NCHW
        except StopIteration:
            return None
    def rewind(self):
        self._reset()

dr = ORTCalib(FP32_ONNX, calib_ld, MAX_BATCH)

# 정적 양자화 실행: Conv/Gemm(MatMul) 모두 INT8 대상
quantize_static(
    model_input=FP32_ONNX,
    model_output=INT8_ONNX,
    calibration_data_reader=dr,
    activation_type=QuantType.QUInt8,           # 활성값 UINT8
    weight_type=QuantType.QInt8,                # 가중치 INT8
    calibrate_method=CalibrationMethod.MinMax,  # 필요시 Entropy/Percentile로 바꿔 비교
    per_channel=True,                           # Conv 가중치 per-channel
    op_types_to_quantize=["Conv","Gemm","MatMul"],
    quant_format=QuantFormat.QDQ,               # 최신 권장 포맷(QDQ)
)

print(f"saved: {INT8_ONNX}")



saved: mission16_vgg11bn_int8_static.onnx


## QAT UNNX 내보내기

In [13]:
import torch, os

onnx_path = "mission_16_vgg11bn_qat_int8_fp32match.onnx"

# 배치 크기만 동적, 나머지는 고정
dynamic_axes = {"input": {0: "batch"}, "logits": {0: "batch"}}

# 더 안전한 내보내기를 위해 eval + CPU
model_export = int8_qat.to("cpu").eval()

# 더 과한 폴딩이 양자화 Q/DQ를 망치지 않게끔 비활성화 권장
dummy = torch.randn(1, 3, IMG_SIZE, IMG_SIZE, dtype=torch.float32)

torch.onnx.export(
    model_export,
    dummy,
    onnx_path,
    export_params=True,
    opset_version=17,                      # ORT 최신 권장 (>=13)
    do_constant_folding=False,             # Q/DQ 패턴 보존
    input_names=["input"],
    output_names=["logits"],
    dynamic_axes=dynamic_axes,
)

print(f"[OK] Exported QAT INT8 model to ONNX → {os.path.abspath(onnx_path)}")

  torch.onnx.export(


[OK] Exported QAT INT8 model to ONNX → /content/mission_16_vgg11bn_qat_int8_fp32match.onnx
