# 1. nn.Conv2d

## 1.1 卷积层的权值矩阵

In [1]:
import torch
import torch.nn as nn
torch.manual_seed(123)

conv = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5)
print(conv.weight.shape)  # [out_ch, in_ch, kernel_size, kernel_size]
print(conv.bias.shape)

torch.Size([16, 3, 5, 5])
torch.Size([16])


## 1.2 输入和输出图像的shape

In [2]:
img = torch.randn(1000, 3, 32, 32)  # [b, c, h, w]
out = conv(img)
out.shape

torch.Size([1000, 16, 28, 28])

## 1.3 Padding

In [3]:
# 指定padding参数，将对输入图像的四周用0填充
conv = nn.Conv2d(3, 16, kernel_size=3, padding=1)
img = torch.randn(1000, 3, 32, 32)  # [b, c, h, w]
out = conv(img)
out.shape

torch.Size([1000, 16, 32, 32])

In [4]:
# 显式Padding的效果演示
img = torch.tensor([[1, 2, 3],
                    [4, 5, 6],
                    [7, 8, 9],])
img = nn.ZeroPad2d(2)(img)
img

tensor([[0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0],
        [0, 0, 1, 2, 3, 0, 0],
        [0, 0, 4, 5, 6, 0, 0],
        [0, 0, 7, 8, 9, 0, 0],
        [0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0]])

# 2. 下采样：从大到小

In [5]:
pool = nn.MaxPool2d(2)
img = torch.randn(1000, 3, 32, 32)  # [b, c, h, w]
out = pool(img)
out.shape

torch.Size([1000, 3, 16, 16])

# 3. 网络层搭建

## 3.1 失败的尝试

In [6]:
model = nn.Sequential(
    # [b, 3, h, w] -> [b, 16, h, w]
    nn.Conv2d(3, 16, kernel_size=3, padding=1),
    nn.Tanh(),

    # [b, 16, h, w] -> [b, 16, h/2, w/2]
    nn.MaxPool2d(2),

    # [b, 16, h/2, w/2] -> [b, 8, h/2, w/2]
    nn.Conv2d(16, 8, kernel_size=3, padding=1),
    nn.Tanh(),

    # [b, 8, h/2, w/2] -> [b, 8, h/4, w/4]
    nn.MaxPool2d(2),

    # ===此处缺少了将后三个维度打平的过程===
    # [b, 8, h/4, w/4] -> [b, 8 * 8 * 8]

    # [b, 8 * 8 * 8] -> [b, 32]
    nn.Linear(8 * 8 * 8, 32),
    nn.Tanh(),
    nn.Linear(32, 2),
)

## 3.2 自定义Module

In [7]:
class MyNet(nn.Module):
    def __init__(self):
        super().__init__()

        # 注意，这些子Module必须是顶级属性，才会被加入到
        # 主Module的参数列表中。如果将子Module包裹在list
        # 或dict中，它们并不会被加入到主Module的参数列表，
        # 因此参数得不到更新。如果确实需要使用list或dict，
        # 可以使用nn.ModuleList或nn.ModuleDict。
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.act1 = nn.Tanh()
        self.pool1 = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(16, 8, kernel_size=3, padding=1)
        self.act2 = nn.Tanh()
        self.pool2 = nn.MaxPool2d(2)
        self.fc1 = nn.Linear(8 * 8 * 8, 32)
        self.act3 = nn.Tanh()
        self.fc2 = nn.Linear(32, 2)

    def forward(self, x):
        out = self.pool1(self.act1(self.conv1(x)))
        out = self.pool2(self.act2(self.conv2(out)))

        # [b, 8, h/4, w/4] -> [b, -1]
        out = out.view(out.shape[0], -1)

        out = self.act3(self.fc1(out))
        out = self.fc2(out)
        return out


img = torch.randn(1000, 3, 32, 32)  # [b, c, h, w]
model = MyNet()
out = model(img)
out.shape  # [b, 2]

torch.Size([1000, 2])

In [8]:
# 参数量统计
print(sum(p.numel() for p in model.parameters()))
print(sum(p.numel() for p in model.parameters() if p.requires_grad))

18090
18090


## 3.3 Functional API

除了nn.Tanh()这样的网络层类，PyTorch还提供了相应的函数，
这些函数没有中间状态，即它的输出完全依赖于输入。

因此可以将上述自定义Module改写为：

In [9]:
import torch.nn.functional as F

class MyNet2(nn.Module):
    def __init__(self):
        super(MyNet2, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 8, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(8 * 8 * 8, 32)
        self.fc2 = nn.Linear(32, 2)

    def forward(self, x):
        out = F.max_pool2d(torch.tanh(self.conv1(x)), 2)
        out = F.max_pool2d(torch.tanh(self.conv2(out)), 2)
        out = out.view(-1, 8 * 8 * 8)
        out = torch.tanh(self.fc1(out))
        out = self.fc2(out)
        return out

img = torch.randn(1000, 3, 32, 32)  # [b, c, h, w]
model = MyNet2()
out = model(img)
out.shape  # [b, 2]

torch.Size([1000, 2])

In [10]:
# 参数量统计
print(sum(p.numel() for p in model.parameters()))
print(sum(p.numel() for p in model.parameters() if p.requires_grad))

18090
18090


# 4. 训练

## 4.1 训练主函数

In [11]:
def training_loop(n_epochs, optimizer, model, loss_fn, train_loader):
    for epoch in range(n_epochs):
        total_loss = 0.0
        for imgs, labels in train_loader:
            outputs = model(imgs)
            loss = loss_fn(input=outputs, target=labels)

            # 用梯度信息更新模型参数
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # 以下两种写法完全相同
            # total_loss += float(loss)
            total_loss += loss.item()

        print(f"epoch: {epoch} training loss: {total_loss / len(train_loader)}")

## 4.2 数据集的加载（同02）

In [12]:
from torchvision import datasets
from torchvision import transforms

DATA_PATH = "./data/datasets"
cifar10_train = datasets.CIFAR10(
    DATA_PATH,
    train=True,
    download=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.4915, 0.4823, 0.4468],
                             [0.2470, 0.2435, 0.2616],),
    ]),
)
cifar10_val = datasets.CIFAR10(
    DATA_PATH,
    train=False,
    download=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.4915, 0.4823, 0.4468],
                             [0.2470, 0.2435, 0.2616],),
    ]),
)

label_map = {0: 0, 2: 1}  # 原始数据集中 0: airplane 2:bird
cifar2_train = [(img, label_map[label]) for img, label in cifar10_train if label in (0, 2)]
cifar2_val = [(img, label_map[label]) for img, label in cifar10_val if label in (0, 2)]

train_loader = torch.utils.data.DataLoader(cifar2_train, batch_size=64, shuffle=True)
val_loader = torch.utils.data.DataLoader(cifar2_val, batch_size=64, shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


## 4.3 训练

In [13]:
model = MyNet2()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop(n_epochs=10,
              optimizer=optimizer,
              model=model,
              loss_fn=loss_fn,
              train_loader=train_loader,)

epoch: 0 training loss: 0.5722224509260457
epoch: 1 training loss: 0.48653622778357974
epoch: 2 training loss: 0.459078013327471
epoch: 3 training loss: 0.43603592256831514
epoch: 4 training loss: 0.40733177210115323
epoch: 5 training loss: 0.38148498990733154
epoch: 6 training loss: 0.36367156275898027
epoch: 7 training loss: 0.3476295379126907
epoch: 8 training loss: 0.33781792821398204
epoch: 9 training loss: 0.33118677186738155


# 5. 测试

In [14]:
def validate(model, train_loader, val_loader):
    for name, loader in [('train', train_loader), ('val', val_loader)]:
        correct = total = 0

        with torch.no_grad():
            for imgs, labels in loader:
                outputs = model(imgs)
                _, pred = torch.max(outputs, dim=1)
                total += labels.shape[0]
                correct += int((pred == labels).sum())

        print(f"Accuracy {name}: {correct / total}")

validate(model, train_loader, val_loader)

Accuracy train: 0.8565
Accuracy val: 0.8515


# 6. 保存和加载模型

## 6.1 保存模型到文件

In [15]:
torch.save(model.state_dict(), "./data/conv.pt")

## 6.2 从文件载入模型

In [16]:
# 需要保证模型的结构不变
# 文件中仅仅保存了模型的参数值
loaded_model = MyNet2()
loaded_model.load_state_dict(torch.load("././data/conv.pt"))
# 注：load_state_dict()方法接收map_location=device参数，可以指定参数被载入的设备

<All keys matched successfully>

# 7. 使用GPU训练

Module和Tensor都可以调用to()方法将其转移到GPU上，
但是Module.to()是原地的，Tensor.to()返回新的对象，
因此创建Optimizer需要在所有参数移到GPU以后进行。

In [17]:
# 获取可用设备
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

device(type='cuda')

In [18]:
# 以下训练过程与之前相比只做了微小的改动
def training_loop(n_epochs, optimizer, model, loss_fn, train_loader):
    for epoch in range(n_epochs):
        total_loss = 0.0
        for imgs, labels in train_loader:
            imgs = imgs.to(device=device)
            labels = labels.to(device=device)
            outputs = model(imgs)
            loss = loss_fn(input=outputs, target=labels)

            # 用梯度信息更新模型参数
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        print(f"epoch: {epoch} training loss: {total_loss / len(train_loader)}")

model = MyNet2().to(device=device)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop(n_epochs=10,
              optimizer=optimizer,
              model=model,
              loss_fn=loss_fn,
              train_loader=train_loader,)

epoch: 0 training loss: 0.6250360036731526
epoch: 1 training loss: 0.5023902062397854
epoch: 2 training loss: 0.4668117000418863
epoch: 3 training loss: 0.4298659238466032
epoch: 4 training loss: 0.39260457864232884
epoch: 5 training loss: 0.36830267585386894
epoch: 6 training loss: 0.3514456562935167
epoch: 7 training loss: 0.3410620111378895
epoch: 8 training loss: 0.33337449135294384
epoch: 9 training loss: 0.32586679868637375
