In [1]:
import torch
import torch.nn as nn
from torchinfo import summary

In [2]:
class VGG(nn.Module):
    def __init__(self, features, num_classes = 1000):
        super(VGG, self).__init__()
        self.features = features
        self.classifier = nn.Sequential(
            nn.Linear(512* 7* 7,4096),
            nn.ReLU(inplace = True),
            nn.Dropout(),
            # 默认概率为0.5
            nn.Linear(4096,4096),
            nn.ReLU(inplace = True),
            nn.Dropout(),
            nn.Linear(4096,num_classes),
        )

    def forward(self,x):
        x = self.features(x)
        x = torch.flatten(x,1)
        # 将特征张量从维度1开始展平，保留批次维度0
        x = self.classifier(x)
        return x

In [3]:
# 定义相关配置项，其中M表示池化层，数值完全对应论文中的表格数值
cfgs = {
    'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}

In [4]:
def make_layers(cfg):
    layers = []
    in_channels = 3
    # 初始输入通道数为3
    for v in cfg:
        if v == 'M':
        # 增加最大池化层
            layers += [nn.MaxPool2d(kernel_size = 2, stride = 2)]
        else:
        # 增加3 * 3卷积 + ReLU
            conv2d = nn.Conv2d(in_channels, v, kernel_size = 3, padding = 1)
            layers += [conv2d, nn.ReLU(inplace = True)]
            in_channels = v
            # 记录输出通道数，作为下一次的in_channels
    return nn.Sequential(*layers)

In [5]:
# 封装函数，依次传入对应的配置项
def vgg11(num_classes=1000):
    return VGG(make_layers(cfgs['vgg11']), num_classes=num_classes)

def vgg13(num_classes=1000):
    return VGG(make_layers(cfgs['vgg13']), num_classes=num_classes)

def vgg16(num_classes=1000):
    return VGG(make_layers(cfgs['vgg16']), num_classes=num_classes)

def vgg19(num_classes=1000):
    return VGG(make_layers(cfgs['vgg19']), num_classes=num_classes)

In [6]:
# 查看模型结构及参数量，input_size表示示例输入数据的维度信息
summary(vgg16(), input_size=(1, 3, 224, 224))

Layer (type:depth-idx)                   Output Shape              Param #
VGG                                      [1, 1000]                 --
├─Sequential: 1-1                        [1, 512, 7, 7]            --
│    └─Conv2d: 2-1                       [1, 64, 224, 224]         1,792
│    └─ReLU: 2-2                         [1, 64, 224, 224]         --
│    └─Conv2d: 2-3                       [1, 64, 224, 224]         36,928
│    └─ReLU: 2-4                         [1, 64, 224, 224]         --
│    └─MaxPool2d: 2-5                    [1, 64, 112, 112]         --
│    └─Conv2d: 2-6                       [1, 128, 112, 112]        73,856
│    └─ReLU: 2-7                         [1, 128, 112, 112]        --
│    └─Conv2d: 2-8                       [1, 128, 112, 112]        147,584
│    └─ReLU: 2-9                         [1, 128, 112, 112]        --
│    └─MaxPool2d: 2-10                   [1, 128, 56, 56]          --
│    └─Conv2d: 2-11                      [1, 256, 56, 56]          29

In [7]:
# 查看torchvision自带的模型结构及参数量
from torchvision import models
summary(models.vgg16(), input_size=(1, 3, 224, 224))

Layer (type:depth-idx)                   Output Shape              Param #
VGG                                      [1, 1000]                 --
├─Sequential: 1-1                        [1, 512, 7, 7]            --
│    └─Conv2d: 2-1                       [1, 64, 224, 224]         1,792
│    └─ReLU: 2-2                         [1, 64, 224, 224]         --
│    └─Conv2d: 2-3                       [1, 64, 224, 224]         36,928
│    └─ReLU: 2-4                         [1, 64, 224, 224]         --
│    └─MaxPool2d: 2-5                    [1, 64, 112, 112]         --
│    └─Conv2d: 2-6                       [1, 128, 112, 112]        73,856
│    └─ReLU: 2-7                         [1, 128, 112, 112]        --
│    └─Conv2d: 2-8                       [1, 128, 112, 112]        147,584
│    └─ReLU: 2-9                         [1, 128, 112, 112]        --
│    └─MaxPool2d: 2-10                   [1, 128, 56, 56]          --
│    └─Conv2d: 2-11                      [1, 256, 56, 56]          29

In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
from tqdm import *
import numpy as np
import sys

In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [10]:
torch.manual_seed(86)

<torch._C.Generator at 0x22113512910>

In [11]:
model = vgg11(num_classes = 102).to(device)
optimizer = optim.SGD(model.parameters(), lr = 0.002, momentum = 0.9)
criterion = nn.CrossEntropyLoss()

In [12]:
transform_train = transforms.Compose([
    transforms.RandomRotation(30),
    transforms.RandomResizedCrop((224,224)),
    transforms.RandomHorizontalFlip(p = 0.5),
    transforms.RandomVerticalFlip(p = 0.5),
    transforms.ToTensor(),
    transforms.Normalize(mean = [0.485,0.456,0.406], std = [0.229,0.224,0.225])
])

In [13]:
transform_test = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean = [0.485, 0.456, 0.406], std = [0.229,0.224,0.225])
])

In [14]:
train_dataset = datasets.Flowers102(root = '/data/flowers102/',
                                    split = "test",
                                    download = True,
                                   transform = transform_train)
train_loader = DataLoader(train_dataset, batch_size = 64, shuffle = True, num_workers = 4)

test_dataset = datasets.Flowers102(root = '/data/flowers102/',
                                 split = "train",
                                 download = True,
                                 transform = transform_test)
test_loader = DataLoader(test_dataset, batch_size = 64, shuffle = False, num_workers = 4)

In [15]:
num_epochs = 200
loss_history = []
acc_history = []

In [None]:
for epoch in tqdm(range(num_epochs), file = sys.stdout):
    total_loss = 0
    total_correct = 0

    model.train()
    for inputs, labels in train_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    model.eval()
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = moedel(inputs)
            total_correct += (outputsargmax(1) == labels).sum().item()

        # 记录训练集损失和测试集准确率
    loss_history.append(np.log10(total_loss))  # 将损失加入损失历史记录列表，由于数值有时较大，这里取对数
    acc_history.append(total_correct / len(test_dataset))# 将准确率加入准确率历史记录列表

    if epoch % 10 == 0:
        tqdm.write("Epoch: {0} Loss: {1} Acc: {2}".format(epoch, loss_history[-1], acc_history[-1]))

# 使用Matplotlib绘制损失和准确率的曲线图
import matplotlib.pyplot as plt
plt.plot(loss_history, label='loss')
plt.plot(acc_history, label='accuracy')
plt.legend()
plt.show()

# 输出准确率
print("Accuracy:", acc_history[-1])       

  0%|          | 0/200 [00:00<?, ?it/s]