In [1]:
import numpy as np

import torch
from torch.utils.data import DataLoader, Subset

import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR100
 
import torch.nn as nn
import torch.optim as optim


from torchvision.models import mobilenet_v2
import torch.quantization
from torch.quantization import QuantStub, DeQuantStub

from tqdm.auto import tqdm

In [2]:
print(torch.cuda.is_available())

True


In [19]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_dataset = CIFAR100(root='./data', train=True, transform=transform, download=True)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)

test_dataset = CIFAR100(root='./data', train=False, transform=transform, download=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)

Files already downloaded and verified
Files already downloaded and verified


In [20]:
class InvertedBottleneck(nn.Module):
    def __init__(self, in_channels, out_channels, t, stride = 1):
        super().__init__()

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.stride = stride
        self.add = nn.quantized.FloatFunctional()

        expand = nn.Sequential(
            nn.Conv2d(in_channels, in_channels * t, 1, bias = False),
            nn.BatchNorm2d(in_channels * t),
            nn.ReLU6(inplace = True),
        )
        depthwise = nn.Sequential(
            nn.Conv2d(in_channels * t, in_channels * t, 3, stride = stride, padding = 1, groups = in_channels * t, bias = False),
            nn.BatchNorm2d(in_channels * t),
            nn.ReLU6(inplace = True),
        )
        pointwise = nn.Sequential(
            nn.Conv2d(in_channels * t, out_channels, 1, bias = False),
            nn.BatchNorm2d(out_channels),
        )
        
        residual_list = []
        if t > 1:
            residual_list += [expand]
        residual_list += [depthwise, pointwise]
        self.residual = nn.Sequential(*residual_list)
    
    def forward(self, x):
        if self.stride == 1 and self.in_channels == self.out_channels:
            out = self.add.add(self.residual(x), x)
        else:
            out = self.residual(x)
    
        return out

In [23]:
class QuantizedMobileNetV2(nn.Module):
    def __init__(self, n_classes = 100):
        super().__init__()

        self.quant = QuantStub()
        self.dequant = DeQuantStub()

        self.first_conv = nn.Sequential(
            nn.Conv2d(3, 32, 3, stride = 2, padding = 1, bias = False),
            nn.BatchNorm2d(32),
            nn.ReLU6(inplace = True)
        )

        self.bottlenecks = nn.Sequential(
            self.make_stage(32, 16, t = 1, n = 1),
            self.make_stage(16, 24, t = 6, n = 2, stride = 2),
            self.make_stage(24, 32, t = 6, n = 3, stride = 2),
            self.make_stage(32, 64, t = 6, n = 4, stride = 2),
            self.make_stage(64, 96, t = 6, n = 3),
            self.make_stage(96, 160, t = 6, n = 3, stride = 2),
            self.make_stage(160, 320, t = 6, n = 1)
        )

        self.last_conv = nn.Sequential(
            nn.Conv2d(320, 1280, 1, bias = False),
            nn.BatchNorm2d(1280),
            nn.ReLU6(inplace = True)
        )

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Sequential(
        	nn.Dropout(0.2), # 채널 축으로 놓여있는 feature 들을 일부 가려보면서 학습
            nn.Linear(1280, n_classes),
        )
    
    def forward(self, x):
        x = self.quant(x)

        x = self.first_conv(x)
        x = self.bottlenecks(x)
        x = self.last_conv(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1) # (N, C, 1, 1) -> (N, C)
        x = self.fc(x)

        x = self.dequant(x)
        return x
    
    def make_stage(self, in_channels, out_channels, t, n, stride = 1):
        layers = [InvertedBottleneck(in_channels, out_channels, t, stride)]
        in_channels = out_channels
        for _ in range(n-1):
            layers.append(InvertedBottleneck(in_channels, out_channels, t))
        
        return nn.Sequential(*layers)

In [25]:
# 모델 학습 함수
def train_model(model, train_loader, criterion, optimizer, num_epochs=5):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        tepoch = tqdm(train_loader, desc=f"Epoch {epoch + 1}/{num_epochs}", unit="batch")
        
        for inputs, labels in tepoch:
            inputs, labels = inputs.cuda(), labels.cuda()
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            tepoch.set_postfix(loss=running_loss / (tepoch.n or 1))
        print(f"Epoch {epoch + 1}, Loss: {running_loss / len(train_loader)}")

# 모델 평가 함수
def evaluate_model(model, test_loader, is_cuda=True):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        ttest = tqdm(test_loader, desc="Evaluating", unit="batch")

        for inputs, labels in ttest:
            if is_cuda:
                inputs, labels = inputs.cuda(), labels.cuda()
            else:
                inputs, labels = inputs.cpu(), labels.cpu()
                
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = correct / total
    print(f"Accuracy: {accuracy * 100:.2f}% | correct / total: {correct} / {total}")
    return accuracy

In [26]:
sub_dataset = Subset(train_dataset, range(1000))
sub_loader = DataLoader(sub_dataset, batch_size=32, shuffle=True, num_workers=4)

In [27]:
print("Training the quantized model...")
quantized_model = QuantizedMobileNetV2(n_classes=100).cuda()

# 양자화 준비
quantized_model.qconfig = torch.quantization.QConfig(
    weight=torch.quantization.MinMaxObserver.with_args(dtype=torch.qint8),
    activation=torch.quantization.MovingAverageMinMaxObserver.with_args(dtype=torch.quint8),
)

torch.quantization.prepare_qat(quantized_model, inplace=True)  # QAT 양자화

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(quantized_model.parameters(), lr=0.01, momentum=0.9)

train_model(quantized_model, sub_loader, criterion, optimizer, num_epochs=1)

Training the quantized model...


Epoch 1/1:   0%|          | 0/32 [00:17<?, ?batch/s]

Epoch 1, Loss: 4.676937982439995


In [28]:
# 양자화 후 모델 변환
torch.quantization.convert(quantized_model.cpu(), inplace=True)



QuantizedMobileNetV2(
  (quant): Quantize(scale=tensor([0.0187]), zero_point=tensor([114]), dtype=torch.quint8)
  (dequant): DeQuantize()
  (first_conv): Sequential(
    (0): QuantizedConv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), scale=0.06207526847720146, zero_point=136, padding=(1, 1), bias=False)
    (1): QuantizedBatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): QuantizedReLU6(inplace=True)
  )
  (bottlenecks): Sequential(
    (0): Sequential(
      (0): InvertedBottleneck(
        (add): QFunctional(
          scale=1.0, zero_point=0
          (activation_post_process): Identity()
        )
        (residual): Sequential(
          (0): Sequential(
            (0): QuantizedConv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), scale=0.03298689052462578, zero_point=127, padding=(1, 1), groups=32, bias=False)
            (1): QuantizedBatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): QuantizedReLU6(

In [29]:
print("Evaluating the quantized model...")
quantized_accuracy = evaluate_model(quantized_model.cpu(), test_loader, is_cuda=False)

print(f"Quantized Model Accuracy: {quantized_accuracy * 100:.2f}%")

Evaluating the quantized model...


Evaluating:   0%|          | 0/313 [00:14<?, ?batch/s]

Accuracy: 1.08% | correct / total: 108 / 10000
Quantized Model Accuracy: 1.08%


In [None]:
print("Training and evaluating the original model...")
original_model = mobilenet_v2(pretrained=False)
original_model.classifier[1] = nn.Linear(original_model.last_channel, 100)  # CIFAR100에 맞게 수정
original_model = original_model.cuda()

train_model(original_model, train_loader, criterion, optimizer, num_epochs=5)
original_accuracy = evaluate_model(original_model, test_loader)

print(f"Original Model Accuracy: {original_accuracy * 100:.2f}%")

Training and evaluating the original model...




In [None]:
'''
quantized_model = torch.quantization.quantize_dynamic(
    original_model,
    {torch.nn.Linear, torch.nn.Conv2d},
    dtype=torch.qint8
)
'''