### 📌前置工作

Ⅰ、从`torchvision.datasets`中加载数据集

In [None]:
import torchvision
import torchvision.transforms as transforms
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import time
import copy
import torch.ao.quantization.quantize_fx as quantize_fx
import os
from torch.ao.quantization import (
  get_default_qconfig_mapping,
  get_default_qat_qconfig_mapping,
  QConfigMapping,
)

In [None]:
# 数据预处理：resize 和 normalization
transform = transforms.Compose([
    transforms.Resize(224),  # 因为 VGG 输入是 224x224
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# 加载训练集
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=64, shuffle=True, num_workers=2)

# 加载测试集
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = DataLoader(testset, batch_size=64, shuffle=False, num_workers=2)
n_test = len(testset)
example_input = (testset[0])

Ⅱ、导入`Google Drive`，便于保存模型

In [None]:
from torchvision import models
import torch
import torch.nn as nn
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


<All keys matched successfully>

In [None]:
# basemodel.to(device)

device(type='cuda')

In [None]:
# next(basemodel.parameters()).device

device(type='cpu')

Ⅲ、定义一些预制函数

In [None]:
# 用于测试模型精度以及速度的函数
def test(model, test_loader, debug = False):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        start_time = time.time()
        for data in test_loader:
            images, labels = data
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        end_time = time.time()
    accuracy = 100 * correct / total
    time_cost = end_time - start_time
    if debug:
        print('Accuracy of the network on the %d test images: %.2f %%' % (n_test, accuracy))
        print('Time cost: %.2f s' % time_cost)
    return accuracy, time_cost

In [None]:
# 用于获取模型大小的函数，单位为 MB
def get_model_size(model):
    torch.save(model.state_dict(), "temp.pth")
    size = os.path.getsize("temp.pth")/1e6
    os.remove('temp.pth')
    return size

### 📌模型训练、保存

#### VGG11模型训练+查看大小

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

# ---------- 1. 模型定义 ----------
# VGG11 配置
vgg11_cfg = [64, 'M', 128, 'M', 256, 'M', 512, 'M', 512, 'M']


class VGG11(nn.Module):
    def __init__(self):
        super(VGG11, self).__init__()
        self.features = self._make_layers(vgg11_cfg)
        self.classifier = nn.Linear(512, 10)  # CIFAR-10 has 10 classes

    def forward(self, x):
        out = self.features(x)
        out = out.view(out.size(0), -1)
        out = self.classifier(out)
        return out

    def _make_layers(self, cfg):
        layers = []
        in_channels = 3
        for x in cfg:
            if x == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                layers += [
                    nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
                    nn.BatchNorm2d(x),
                    nn.ReLU(inplace=True)
                ]
                in_channels = x
        layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
        return nn.Sequential(*layers)

In [None]:
# ---------- 2. 数据预处理 ----------
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform_train)
trainloader = DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform_test)
testloader = DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)

# ---------- 3. 初始化模型 ----------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = VGG11().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.05, momentum=0.9, weight_decay=5e-4)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.1)

# ---------- 4. 训练 ----------
EPOCHS = 40
best_acc = 0.0

for epoch in range(EPOCHS):
    model.train()
    running_loss = 0.0
    for inputs, labels in trainloader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    scheduler.step()

    # ---------- 测试 ----------
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in testloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    acc = 100. * correct / total
    print(f"Epoch {epoch+1}/{EPOCHS} - Loss: {running_loss:.3f} - Test Acc: {acc:.2f}%")

    # 保存最好的模型
    if acc > best_acc:
        best_acc = acc
        from google.colab import drive
        drive.mount('/content/drive')
        # 定义保存路径
        save_path = "/content/drive/MyDrive/第二次尝试/VGG11_CIFAR10.pth"
        # 保存模型参数
        torch.save(model.state_dict(), save_path)

print("Training complete. Best accuracy: {:.2f}%".format(best_acc))

In [None]:
import os
# 用于获取模型大小的函数，单位为 MB
def get_model_size(model):
    torch.save(model.state_dict(), "temp.pth")
    size = os.path.getsize("temp.pth")/1e6
    os.remove('temp.pth')
    return size
print(get_model_size(model))

15.697388


#### VGG13模型训练+查看大小

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

# ---------- 1. 模型定义 ----------
# VGG11 配置
vgg13_cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M']  # 正确


class VGG13(nn.Module):
    def __init__(self):
        super(VGG13, self).__init__()
        self.features = self._make_layers(vgg13_cfg)
        self.classifier = nn.Linear(512, 10)  # CIFAR-10 has 10 classes

    def forward(self, x):
        out = self.features(x)
        out = out.view(out.size(0), -1)
        out = self.classifier(out)
        return out

    def _make_layers(self, cfg):
        layers = []
        in_channels = 3
        for x in cfg:
            if x == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                layers += [
                    nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
                    nn.BatchNorm2d(x),
                    nn.ReLU(inplace=True)
                ]
                in_channels = x
        layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
        return nn.Sequential(*layers)

In [None]:
# ---------- 2. 数据预处理 ----------
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform_train)
trainloader = DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform_test)
testloader = DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)

# ---------- 3. 初始化模型 ----------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = VGG13().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.05, momentum=0.9, weight_decay=5e-4)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.1)

# ---------- 4. 训练 ----------
EPOCHS = 40
best_acc = 0.0

for epoch in range(EPOCHS):
    model.train()
    running_loss = 0.0
    for inputs, labels in trainloader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    scheduler.step()

    # ---------- 测试 ----------
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in testloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    acc = 100. * correct / total
    print(f"Epoch {epoch+1}/{EPOCHS} - Loss: {running_loss:.3f} - Test Acc: {acc:.2f}%")

    # 保存最好的模型
    if acc > best_acc:
        best_acc = acc
        from google.colab import drive
        drive.mount('/content/drive')
        # 定义保存路径
        save_path = "/content/drive/MyDrive/第二次尝试/VGG13_CIFAR10.pth"
        # 保存模型参数
        torch.save(model.state_dict(), save_path)

print("Training complete. Best accuracy: {:.2f}%".format(best_acc))


100%|██████████| 170M/170M [00:03<00:00, 43.2MB/s]


Epoch 1/40 - Loss: 873.551 - Test Acc: 26.95%
Mounted at /content/drive
Epoch 2/40 - Loss: 640.761 - Test Acc: 46.87%
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Epoch 3/40 - Loss: 493.378 - Test Acc: 52.91%
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Epoch 4/40 - Loss: 383.448 - Test Acc: 69.04%
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Epoch 5/40 - Loss: 311.752 - Test Acc: 70.43%
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Epoch 6/40 - Loss: 267.469 - Test Acc: 74.12%
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Epoch 7/40 - Loss: 239.215 - Test Acc: 73.96%
Epoch 8

In [None]:
import os
# 用于获取模型大小的函数，单位为 MB
def get_model_size(model):
    torch.save(model.state_dict(), "temp.pth")
    size = os.path.getsize("temp.pth")/1e6
    os.remove('temp.pth')
    return size
print(get_model_size(model))

37.707034


#### VGG16模型训练+查看大小

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

# ---------- 1. 模型定义 ----------
# VGG11 配置
vgg16_cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M']


class VGG16(nn.Module):
    def __init__(self):
        super(VGG16, self).__init__()
        self.features = self._make_layers(vgg16_cfg)
        self.classifier = nn.Linear(512, 10)  # CIFAR-10 has 10 classes

    def forward(self, x):
        out = self.features(x)
        out = out.view(out.size(0), -1)
        out = self.classifier(out)
        return out

    def _make_layers(self, cfg):
        layers = []
        in_channels = 3
        for x in cfg:
            if x == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                layers += [
                    nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
                    nn.BatchNorm2d(x),
                    nn.ReLU(inplace=True)
                ]
                in_channels = x
        layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
        return nn.Sequential(*layers)

In [None]:
# ---------- 2. 数据预处理 ----------
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform_train)
trainloader = DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform_test)
testloader = DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)

# ---------- 3. 初始化模型 ----------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = VGG16().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.05, momentum=0.9, weight_decay=5e-4)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.1)

# ---------- 4. 训练 ----------
EPOCHS = 40
best_acc = 0.0

for epoch in range(EPOCHS):
    model.train()
    running_loss = 0.0
    for inputs, labels in trainloader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    scheduler.step()

    # ---------- 测试 ----------
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in testloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    acc = 100. * correct / total
    print(f"Epoch {epoch+1}/{EPOCHS} - Loss: {running_loss:.3f} - Test Acc: {acc:.2f}%")

    # 保存最好的模型
    if acc > best_acc:
        best_acc = acc
        from google.colab import drive
        drive.mount('/content/drive')
        # 定义保存路径
        save_path = "/content/drive/MyDrive/第二次尝试/VGG16_CIFAR10.pth"
        # 保存模型参数
        torch.save(model.state_dict(), save_path)

print("Training complete. Best accuracy: {:.2f}%".format(best_acc))


Epoch 1/40 - Loss: 936.768 - Test Acc: 21.27%
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Epoch 2/40 - Loss: 742.455 - Test Acc: 32.46%
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Epoch 3/40 - Loss: 628.141 - Test Acc: 44.63%
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Epoch 4/40 - Loss: 507.688 - Test Acc: 53.15%
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Epoch 5/40 - Loss: 407.624 - Test Acc: 55.21%
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Epoch 6/40 - Loss: 340.069 - Test Acc: 63.81%
Drive already mounted at /content/drive; to attempt to forcibly remount, call d

In [None]:
import os
# 用于获取模型大小的函数，单位为 MB
def get_model_size(model):
    torch.save(model.state_dict(), "temp.pth")
    size = os.path.getsize("temp.pth")/1e6
    os.remove('temp.pth')
    return size
print(get_model_size(model))

58.971868


#### VGG19模型训练+查看大小

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

# ---------- 1. 模型定义 ----------
# VGG11 配置
vgg19_cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M']

class VGG19(nn.Module):
    def __init__(self):
        super(VGG19, self).__init__()
        self.features = self._make_layers(vgg19_cfg)
        self.classifier = nn.Linear(512, 10)  # CIFAR-10 has 10 classes

    def forward(self, x):
        out = self.features(x)
        out = out.view(out.size(0), -1)
        out = self.classifier(out)
        return out

    def _make_layers(self, cfg):
        layers = []
        in_channels = 3
        for x in cfg:
            if x == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                layers += [
                    nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
                    nn.BatchNorm2d(x),
                    nn.ReLU(inplace=True)
                ]
                in_channels = x
        layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
        return nn.Sequential(*layers)

In [None]:
# ---------- 2. 数据预处理 ----------
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform_train)
trainloader = DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform_test)
testloader = DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)

# ---------- 3. 初始化模型 ----------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = VGG19().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.05, momentum=0.9, weight_decay=5e-4)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.1)

# ---------- 4. 训练 ----------
EPOCHS = 40
best_acc = 0.0

for epoch in range(EPOCHS):
    model.train()
    running_loss = 0.0
    for inputs, labels in trainloader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    scheduler.step()

    # ---------- 测试 ----------
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in testloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    acc = 100. * correct / total
    print(f"Epoch {epoch+1}/{EPOCHS} - Loss: {running_loss:.3f} - Test Acc: {acc:.2f}%")

    # 保存最好的模型
    if acc > best_acc:
        best_acc = acc
        from google.colab import drive
        drive.mount('/content/drive')
        # 定义保存路径
        save_path = "/content/drive/MyDrive/第二次尝试/VGG19_CIFAR10.pth"
        # 保存模型参数
        torch.save(model.state_dict(), save_path)

print("Training complete. Best accuracy: {:.2f}%".format(best_acc))


100%|██████████| 170M/170M [00:13<00:00, 12.3MB/s]


Epoch 1/40 - Loss: 912.188 - Test Acc: 19.79%
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Epoch 2/40 - Loss: 747.513 - Test Acc: 25.80%
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Epoch 3/40 - Loss: 664.334 - Test Acc: 38.50%
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Epoch 4/40 - Loss: 587.572 - Test Acc: 47.69%
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Epoch 5/40 - Loss: 507.761 - Test Acc: 50.24%
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Epoch 6/40 - Loss: 419.427 - Test Acc: 65.42%
Drive already mounted at /content/drive; to attempt to forcibly remount, call d

### 📌训练结束，开始量化

#### vgg11

In [None]:
# 加载 CIFAR10 数据集
batch_size = 64

transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = torchvision.datasets.CIFAR10(root='/bohr/cifar10-h7hf/v2', train=True, download=True, transform=transform_train)
train_loader = DataLoader(trainset, batch_size=batch_size, shuffle=True)

testset = torchvision.datasets.CIFAR10(root='/bohr/cifar10-h7hf/v2', train=False, download=True, transform=transform_test)
test_loader = DataLoader(testset, batch_size=batch_size, shuffle=False)
n_test = len(testset)
example_input = (testset[0])

In [None]:
# 加载预训练模型
model_fp = VGG11()
model_fp.load_state_dict(torch.load('/content/drive/MyDrive/第二次尝试/VGG11_CIFAR10.pth', map_location=torch.device('cpu')))

In [None]:
model_to_quantize_ptsq = copy.deepcopy(model_fp)
model_to_quantize_ptsq.eval()

qconfig_mapping = get_default_qconfig_mapping("qnnpack")
model_prepared_ptsq = quantize_fx.prepare_fx(model_to_quantize_ptsq, qconfig_mapping, example_input)
# 这里使用全部的训练数据来对模型进行校准
# 注意：校准并不是训练，所以我们不需要对模型做反向传播
model_prepared_ptsq.eval()
with torch.no_grad():
    for images, labels in train_loader:
        model_prepared_ptsq(images)

model_quantized_ptsq = quantize_fx.convert_fx(model_prepared_ptsq)

In [None]:
# 首先我们将原始模型复制一份，以防止影响原始模型
model_to_quantize_ptdq = copy.deepcopy(model_fp)
# 然后我们将模型设置为 eval 模式，因为量化时和量化后我们都不会对模型做任何的训练
model_to_quantize_ptdq.eval()

# 量化模式的设置，这里我们使用的是动态量化配置
qconfig_mapping = QConfigMapping().set_global(torch.ao.quantization.default_dynamic_qconfig)

# 样例输入，用于推断模型的输出形状
example_input = (testset[0])

# 准备并量化模型
model_prepared_ptdq = quantize_fx.prepare_fx(model_to_quantize_ptdq, qconfig_mapping, example_input)
model_quantized_ptdq = quantize_fx.convert_fx(model_prepared_ptdq)

In [None]:
model_size = []
model_time_cost = []
model_accuracy = []
# 测试 PTSQ 模型
print('PTSQ model:')
accuracy, time_cost = test(model_quantized_ptsq, test_loader)
model_size.append(get_model_size(model_quantized_ptsq))
model_time_cost.append(time_cost)
model_accuracy.append(accuracy)
print(f'Accuracy: {accuracy:.2f}%, Time cost: {time_cost:.2f} s, Size: {model_size[-1]:.2f} MB')

PTSQ model:
Accuracy: 68.71%, Time cost: 3.56 s, Size: 3.93 MB


In [None]:
# 测试原始模型
print('Original model:')
accuracy, time_cost = test(model_fp, test_loader)
model_size.append(get_model_size(model_fp))
model_time_cost.append(time_cost)
model_accuracy.append(accuracy)
print(f'Accuracy: {accuracy:.2f}%, Time cost: {time_cost:.2f} s, Size: {model_size[-1]:.2f} MB')

Original model:
Accuracy: 68.81%, Time cost: 6.84 s, Size: 15.70 MB


In [None]:
# 测试 PTDQ 模型
print('PTDQ model:')
accuracy, time_cost = test(model_quantized_ptdq, test_loader)
model_size.append(get_model_size(model_quantized_ptdq))
model_time_cost.append(time_cost)
model_accuracy.append(accuracy)
print(f'Accuracy: {accuracy:.2f}%, Time cost: {time_cost:.2f} s, Size: {model_size[-1]:.2f} MB')

PTDQ model:
Accuracy: 68.81%, Time cost: 7.33 s, Size: 15.65 MB


#### VGG13

In [None]:
# 加载预训练模型
model_fp = VGG13()
model_fp.load_state_dict(torch.load('/content/drive/MyDrive/第二次尝试/VGG13_CIFAR10.pth', map_location=torch.device('cpu')))

<All keys matched successfully>

In [None]:
model_to_quantize_ptsq = copy.deepcopy(model_fp)
model_to_quantize_ptsq.eval()

qconfig_mapping = get_default_qconfig_mapping("qnnpack")
model_prepared_ptsq = quantize_fx.prepare_fx(model_to_quantize_ptsq, qconfig_mapping, example_input)
# 这里使用全部的训练数据来对模型进行校准
# 注意：校准并不是训练，所以我们不需要对模型做反向传播
model_prepared_ptsq.eval()
with torch.no_grad():
    for images, labels in train_loader:
        model_prepared_ptsq(images)

model_quantized_ptsq = quantize_fx.convert_fx(model_prepared_ptsq)

In [None]:
# 首先我们将原始模型复制一份，以防止影响原始模型
model_to_quantize_ptdq = copy.deepcopy(model_fp)
# 然后我们将模型设置为 eval 模式，因为量化时和量化后我们都不会对模型做任何的训练
model_to_quantize_ptdq.eval()

# 量化模式的设置，这里我们使用的是动态量化配置
qconfig_mapping = QConfigMapping().set_global(torch.ao.quantization.default_dynamic_qconfig)

# 样例输入，用于推断模型的输出形状
example_input = (testset[0])

# 准备并量化模型
model_prepared_ptdq = quantize_fx.prepare_fx(model_to_quantize_ptdq, qconfig_mapping, example_input)
model_quantized_ptdq = quantize_fx.convert_fx(model_prepared_ptdq)

In [None]:
model_size = []
model_time_cost = []
model_accuracy = []

# 测试原始模型
print('Original model:')
accuracy, time_cost = test(model_fp, test_loader)
model_size.append(get_model_size(model_fp))
model_time_cost.append(time_cost)
model_accuracy.append(accuracy)
print(f'Accuracy: {accuracy:.2f}%, Time cost: {time_cost:.2f} s, Size: {model_size[-1]:.2f} MB')

# 测试 PTDQ 模型
print('PTDQ model:')
accuracy, time_cost = test(model_quantized_ptdq, test_loader)
model_size.append(get_model_size(model_quantized_ptdq))
model_time_cost.append(time_cost)
model_accuracy.append(accuracy)
print(f'Accuracy: {accuracy:.2f}%, Time cost: {time_cost:.2f} s, Size: {model_size[-1]:.2f} MB')

# 测试 PTSQ 模型
print('PTSQ model:')
accuracy, time_cost = test(model_quantized_ptsq, test_loader)
model_size.append(get_model_size(model_quantized_ptsq))
model_time_cost.append(time_cost)
model_accuracy.append(accuracy)
print(f'Accuracy: {accuracy:.2f}%, Time cost: {time_cost:.2f} s, Size: {model_size[-1]:.2f} MB')

Original model:
Accuracy: 65.14%, Time cost: 14.73 s, Size: 37.71 MB
PTDQ model:
Accuracy: 65.13%, Time cost: 14.50 s, Size: 37.63 MB
PTSQ model:
Accuracy: 65.03%, Time cost: 4.80 s, Size: 9.43 MB


#### VGG16

In [None]:
# 加载预训练模型
model_fp = VGG16()
model_fp.load_state_dict(torch.load('/content/drive/MyDrive/第二次尝试/VGG16_CIFAR10.pth', map_location=torch.device('cpu')))

<All keys matched successfully>

In [None]:
model_to_quantize_ptsq = copy.deepcopy(model_fp)
model_to_quantize_ptsq.eval()

qconfig_mapping = get_default_qconfig_mapping("qnnpack")
model_prepared_ptsq = quantize_fx.prepare_fx(model_to_quantize_ptsq, qconfig_mapping, example_input)
# 这里使用全部的训练数据来对模型进行校准
# 注意：校准并不是训练，所以我们不需要对模型做反向传播
model_prepared_ptsq.eval()
with torch.no_grad():
    for images, labels in train_loader:
        model_prepared_ptsq(images)

model_quantized_ptsq = quantize_fx.convert_fx(model_prepared_ptsq)

In [None]:
# 首先我们将原始模型复制一份，以防止影响原始模型
model_to_quantize_ptdq = copy.deepcopy(model_fp)
# 然后我们将模型设置为 eval 模式，因为量化时和量化后我们都不会对模型做任何的训练
model_to_quantize_ptdq.eval()

# 量化模式的设置，这里我们使用的是动态量化配置
qconfig_mapping = QConfigMapping().set_global(torch.ao.quantization.default_dynamic_qconfig)

# 样例输入，用于推断模型的输出形状
example_input = (testset[0])

# 准备并量化模型
model_prepared_ptdq = quantize_fx.prepare_fx(model_to_quantize_ptdq, qconfig_mapping, example_input)
model_quantized_ptdq = quantize_fx.convert_fx(model_prepared_ptdq)

In [None]:
model_size = []
model_time_cost = []
model_accuracy = []

# 测试原始模型
print('Original model:')
accuracy, time_cost = test(model_fp, test_loader)
model_size.append(get_model_size(model_fp))
model_time_cost.append(time_cost)
model_accuracy.append(accuracy)
print(f'Accuracy: {accuracy:.2f}%, Time cost: {time_cost:.2f} s, Size: {model_size[-1]:.2f} MB')

# 测试 PTDQ 模型
print('PTDQ model:')
accuracy, time_cost = test(model_quantized_ptdq, test_loader)
model_size.append(get_model_size(model_quantized_ptdq))
model_time_cost.append(time_cost)
model_accuracy.append(accuracy)
print(f'Accuracy: {accuracy:.2f}%, Time cost: {time_cost:.2f} s, Size: {model_size[-1]:.2f} MB')

# 测试 PTSQ 模型
print('PTSQ model:')
accuracy, time_cost = test(model_quantized_ptsq, test_loader)
model_size.append(get_model_size(model_quantized_ptsq))
model_time_cost.append(time_cost)
model_accuracy.append(accuracy)
print(f'Accuracy: {accuracy:.2f}%, Time cost: {time_cost:.2f} s, Size: {model_size[-1]:.2f} MB')

Original model:
Accuracy: 75.53%, Time cost: 16.27 s, Size: 58.97 MB
PTDQ model:
Accuracy: 75.56%, Time cost: 16.06 s, Size: 58.87 MB
PTSQ model:
Accuracy: 75.61%, Time cost: 5.60 s, Size: 14.75 MB


#### vgg19

In [None]:
# 加载预训练模型
model_fp = VGG19()
model_fp.load_state_dict(torch.load('/content/drive/MyDrive/第二次尝试/VGG19_CIFAR10.pth', map_location=torch.device('cpu')))

<All keys matched successfully>

In [None]:
model_to_quantize_ptsq = copy.deepcopy(model_fp)
model_to_quantize_ptsq.eval()

qconfig_mapping = get_default_qconfig_mapping("qnnpack")
model_prepared_ptsq = quantize_fx.prepare_fx(model_to_quantize_ptsq, qconfig_mapping, example_input)
# 这里使用全部的训练数据来对模型进行校准
# 注意：校准并不是训练，所以我们不需要对模型做反向传播
model_prepared_ptsq.eval()
with torch.no_grad():
    for images, labels in train_loader:
        model_prepared_ptsq(images)

model_quantized_ptsq = quantize_fx.convert_fx(model_prepared_ptsq)

In [None]:
# 首先我们将原始模型复制一份，以防止影响原始模型
model_to_quantize_ptdq = copy.deepcopy(model_fp)
# 然后我们将模型设置为 eval 模式，因为量化时和量化后我们都不会对模型做任何的训练
model_to_quantize_ptdq.eval()

# 量化模式的设置，这里我们使用的是动态量化配置
qconfig_mapping = QConfigMapping().set_global(torch.ao.quantization.default_dynamic_qconfig)

# 样例输入，用于推断模型的输出形状
example_input = (testset[0])

# 准备并量化模型
model_prepared_ptdq = quantize_fx.prepare_fx(model_to_quantize_ptdq, qconfig_mapping, example_input)
model_quantized_ptdq = quantize_fx.convert_fx(model_prepared_ptdq)

In [None]:
model_size = []
model_time_cost = []
model_accuracy = []

# 测试原始模型
print('Original model:')
accuracy, time_cost = test(model_fp, test_loader)
model_size.append(get_model_size(model_fp))
model_time_cost.append(time_cost)
model_accuracy.append(accuracy)
print(f'Accuracy: {accuracy:.2f}%, Time cost: {time_cost:.2f} s, Size: {model_size[-1]:.2f} MB')

# 测试 PTDQ 模型
print('PTDQ model:')
accuracy, time_cost = test(model_quantized_ptdq, test_loader)
model_size.append(get_model_size(model_quantized_ptdq))
model_time_cost.append(time_cost)
model_accuracy.append(accuracy)
print(f'Accuracy: {accuracy:.2f}%, Time cost: {time_cost:.2f} s, Size: {model_size[-1]:.2f} MB')

# 测试 PTSQ 模型
print('PTSQ model:')
accuracy, time_cost = test(model_quantized_ptsq, test_loader)
model_size.append(get_model_size(model_quantized_ptsq))
model_time_cost.append(time_cost)
model_accuracy.append(accuracy)
print(f'Accuracy: {accuracy:.2f}%, Time cost: {time_cost:.2f} s, Size: {model_size[-1]:.2f} MB')

Original model:
Accuracy: 72.76%, Time cost: 20.76 s, Size: 80.24 MB
PTDQ model:
Accuracy: 72.80%, Time cost: 20.61 s, Size: 80.11 MB
PTSQ model:
Accuracy: 73.15%, Time cost: 6.47 s, Size: 20.07 MB


【Conclusion】
- 使用PTSQ之后模型的大小变为原来的1/4左右，但是PTDQ之后的模型大小变化不大；
- PTSQ模型的推理速度大幅提高，PTDQ变化不大；
- 二者准确率几乎和Original model的准确率一样；
