# PTQ

In [6]:
import torch
import torch.nn as nn
import torch.quantization as quantization
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

class PTQModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.quant = quantization.QuantStub()
        self.conv1 = nn.Conv2d(1, 16, 3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(16)
        self.relu1 = nn.ReLU()
        self.conv2 = nn.Conv2d(16, 16, 3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(16)
        self.relu2 = nn.ReLU()
        self.fc = nn.Linear(16 * 28 * 28, 10)
        self.dequant = quantization.DeQuantStub()

    def forward(self, x):
        x = self.quant(x)
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu1(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu2(x)
        x = x.reshape(x.size(0), -1)
        x = self.fc(x)
        x = self.dequant(x)
        return x

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,)),
])

train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)


def train_fp32_model(model, dataloader, criterion, optimizer, epochs=10):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for images, labels in dataloader:
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {total_loss / len(dataloader):.4f}")


def evaluate_model(model, dataloader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in dataloader:
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    print(f"Accuracy: {accuracy:.2f}%")
    return accuracy


def apply_ptq_with_calibration(model, calibration_loader):
    model.eval()
    model.qconfig = quantization.get_default_qconfig("x86")
    model_fused = quantization.fuse_modules(model, [["conv1", "bn1", "relu1"], ["conv2", "bn2", "relu2"]])
    model_prepared = quantization.prepare(model_fused)
    with torch.no_grad():
        for images, _ in calibration_loader:
            model_prepared(images)
    model_int8 = quantization.convert(model_prepared)
    return model_int8


model_fp32 = PTQModel()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_fp32.parameters(), lr=0.001)

print("Training FP32 model...")
train_fp32_model(model_fp32, train_loader, criterion, optimizer, epochs=5)

print("Evaluating FP32 model...")
evaluate_model(model_fp32, test_loader)

print("Applying PTQ with calibration...")
calibration_loader = DataLoader(train_dataset, batch_size=64, shuffle=False)
model_int8 = apply_ptq_with_calibration(model_fp32, calibration_loader)

print("Evaluating INT8 model...")
evaluate_model(model_int8, test_loader)

Training FP32 model...
Epoch [1/5], Loss: 0.1524
Epoch [2/5], Loss: 0.0544
Epoch [3/5], Loss: 0.0380
Epoch [4/5], Loss: 0.0279
Epoch [5/5], Loss: 0.0196
Evaluating FP32 model...
Accuracy: 98.39%
Applying PTQ with calibration...
Evaluating INT8 model...
Accuracy: 98.35%


98.35

#QAT

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.ao.quantization as quantization

class QATModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.quant = quantization.QuantStub()
        self.conv1 = nn.Conv2d(1, 16, 3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(16)
        self.relu1 = nn.ReLU()
        self.conv2 = nn.Conv2d(16, 16, 3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(16)
        self.relu2 = nn.ReLU()
        self.fc = nn.Linear(16 * 28 * 28, 10)
        self.dequant = quantization.DeQuantStub()

    def forward(self, x):
        x = self.quant(x)
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu1(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu2(x)
        x = x.reshape(x.size(0), -1)
        x = self.fc(x)
        x = self.dequant(x)
        return x

def get_data_loaders(batch_size=64):
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])
    train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
    test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    return train_loader, test_loader

def training_loop(model, train_loader, criterion, optimizer, num_epochs=1):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for images, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}")

def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    print(f"Accuracy: {accuracy:.2f}%")

train_loader, test_loader = get_data_loaders()

model_fp32 = QATModel()
model_fp32.qconfig = quantization.get_default_qat_qconfig('x86')

model_fp32.eval()
model_fp32_fused = quantization.fuse_modules(model_fp32,[["conv1", "bn1", "relu1"], ["conv2", "bn2", "relu2"]])

model_fp32_prepared = quantization.prepare_qat(model_fp32_fused.train())

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_fp32_prepared.parameters(), lr=0.001)

print("Training QAT model...")
training_loop(model_fp32_prepared, train_loader, criterion, optimizer, num_epochs=5)

print("Converting to INT8 model...")
model_fp32_prepared.eval()
model_int8 = quantization.convert(model_fp32_prepared)

print("Evaluating INT8 model...")
evaluate_model(model_int8, test_loader)


Training QAT model...
Epoch [1/5], Loss: 0.2276
Epoch [2/5], Loss: 0.0575
Epoch [3/5], Loss: 0.0399
Epoch [4/5], Loss: 0.0300
Epoch [5/5], Loss: 0.0211
Converting to INT8 model...
Evaluating INT8 model...
Accuracy: 98.32%


양자화 성공적인지 확인

In [None]:
print(model_int8.conv1.weight)

<bound method Conv2d.weight of QuantizedConvReLU2d(1, 16, kernel_size=(3, 3), stride=(1, 1), scale=0.016592519357800484, zero_point=0, padding=(1, 1))>
