In [1]:
import torch
from torch import nn
import d2lzh_pytorch as d2l
# 二维卷积

In [2]:
# 二维相关运算
X = torch.tensor([[0, 1, 2], [3, 4, 5], [6, 7, 8]])
K = torch.tensor([[0, 1], [2, 3]])
d2l.corr2d(X, K)

tensor([[19., 25.],
        [37., 43.]])

In [3]:
# 二维卷积层
class Conv2D(nn.Module):
    def __init__(self, kernel_size):
        super(Conv2D, self).__init__()
        self.weight = nn.Parameter(torch.randn(kernel_size))
        self.bias = nn.Parameter(torch.randn(1))
    def forward(self, x):
        return d2l.corr2d(x, self.weight) + self.bias

In [4]:
# 图像中物体边缘检测
X = torch.ones(6, 8) # 构造图片
X[:, 2: 6] = 0

In [5]:
X

tensor([[1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.]])

In [6]:
K = torch.tensor([[1, -1]]) # 构造卷积核

In [7]:
Y = d2l.corr2d(X, K)

In [8]:
Y

tensor([[ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.]])

In [9]:
# 通过数据学习核数组
conv2d = Conv2D(kernel_size=(1, 2))
step = 20
lr = 0.01
for i in range(step):
    Y_hat = conv2d(X)
    l = ((Y_hat - Y) ** 2).sum()
    l.backward()
    # 梯度下降
    conv2d.weight.data -= lr * conv2d.weight.grad
    conv2d.bias.data -= lr * conv2d.bias.grad
    # 梯度清零
    conv2d.weight.grad.fill_(0)
    conv2d.bias.grad.fill_(0)
    if (i + 1) % 5 == 0:
        print('Step %d, loss %.3f' % (i + 1, l.item()))

Step 5, loss 8.089
Step 10, loss 1.816
Step 15, loss 0.457
Step 20, loss 0.122


In [10]:
conv2d.weight.data

tensor([[ 0.9041, -0.9212]])

In [11]:
conv2d.bias.data

tensor([0.0096])

In [12]:
# 卷积层的两个超参数：填充和步幅

In [15]:
# 填充
def comp_conv2d(conv2d, X): # 定义一个函数来计算卷积层。它对输入和输出做相应的升维和降维
    X = X.view((1, 1) + X.shape) # (1, 1)代表批量大小和通道数
    Y = conv2d(X)
    return Y.view(Y.shape[2:])

conv2d = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3, padding=1)

X = torch.rand(8, 8)
comp_conv2d(conv2d, X).shape

torch.Size([8, 8])

In [16]:
# 使用高为5、宽为3的卷积核。在高和宽两侧的填充数分别为2和1
conv2d = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(5, 3), padding=(2, 1))
comp_conv2d(conv2d, X).shape

torch.Size([8, 8])

In [17]:
# 步幅
conv2d = nn.Conv2d(1, 1, kernel_size=3, padding=1, stride=2)
comp_conv2d(conv2d, X).shape

torch.Size([4, 4])

In [18]:
conv2d = nn.Conv2d(1, 1, kernel_size=(3, 5), padding=(0, 1), stride=(3, 4))
comp_conv2d(conv2d, X).shape

torch.Size([2, 2])

In [19]:
# 多输入通道和多输出通道

In [20]:
# 多输入通道
# 在每个通道上，二维输入数组与二维核数组做互相关运算，再按通道相加即得到输出。
def corr2d_multi_in(X, K):
    # 沿着X和K的第0维（通道维）分别计算再相加
    res = d2l.corr2d(X[0, :, :], K[0, :, :])
    for i in range(1, X.shape[0]):
        res += d2l.corr2d(X[i, :, :], K[i, :, :])
    return res

In [21]:
X = torch.tensor([[[0, 1, 2], [3, 4, 5], [6, 7, 8]],
              [[1, 2, 3], [4, 5, 6], [7, 8, 9]]])
K = torch.tensor([[[0, 1], [2, 3]], [[1, 2], [3, 4]]])

corr2d_multi_in(X, K)

tensor([[ 56.,  72.],
        [104., 120.]])

In [22]:
# 多输出通道
def corr2d_multi_in_out(X, K):
    # 对K的第0维遍历，每次同输入X做互相关计算。所有结果使用stack函数合并在一起
    return torch.stack([corr2d_multi_in(X, k) for k in K])

In [23]:
K = torch.stack([K, K + 1, K + 2])
K.shape

torch.Size([3, 2, 2, 2])

In [24]:
corr2d_multi_in_out(X, K)

tensor([[[ 56.,  72.],
         [104., 120.]],

        [[ 76., 100.],
         [148., 172.]],

        [[ 96., 128.],
         [192., 224.]]])

In [25]:
# 1 * 1卷积层
# 假设我们将通道维当作特征维，将高和宽维度上的元素当成数据样本，那么1×11×1卷积层的作用与全连接层等价。
def corr2d_multi_in_out_1x1(X, K):
    c_i, h, w = X.shape
    c_o = K.shape[0]
    X = X.view(c_i, h * w)
    K = K.view(c_o, c_i)
    Y = torch.mm(K, X)  # 全连接层的矩阵乘法
    return Y.view(c_o, h, w)

In [26]:
X = torch.rand(3, 3, 3)
K = torch.rand(2, 3, 1, 1)

Y1 = corr2d_multi_in_out_1x1(X, K)
Y2 = corr2d_multi_in_out(X, K)

In [27]:
Y1

tensor([[[0.4697, 0.2185, 0.4592],
         [0.4612, 0.4797, 0.5083],
         [0.1256, 0.4560, 0.5009]],

        [[0.7776, 0.2152, 0.8716],
         [0.5785, 0.6921, 0.8775],
         [0.3986, 0.6708, 0.5825]]])

In [28]:
Y2

tensor([[[0.4697, 0.2185, 0.4592],
         [0.4612, 0.4797, 0.5083],
         [0.1256, 0.4560, 0.5009]],

        [[0.7776, 0.2152, 0.8716],
         [0.5785, 0.6921, 0.8775],
         [0.3986, 0.6708, 0.5825]]])

In [29]:
(Y1 - Y2).norm().item() < 1e-6

True

In [30]:
# 池化层：它的提出是为了缓解卷积层对位置的过度敏感性。

In [31]:
# 最大池化和平均池化
def pool2d(X, pool_size, mode='max'):
    X = X.float()
    p_h, p_w = pool_size
    Y = torch.zeros(X.shape[0] - p_h + 1, X.shape[1] - p_w + 1)
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            if mode == 'max':
                Y[i, j] = X[i: i + p_h, j: j + p_w].max()
            elif mode == 'avg':
                Y[i, j] = X[i: i + p_h, j: j + p_w].mean()       
    return Y

In [32]:
X = torch.tensor([[0, 1, 2], [3, 4, 5], [6, 7, 8]])
pool2d(X, (2, 2))

tensor([[4., 5.],
        [7., 8.]])

In [33]:
pool2d(X, (2, 2), 'avg')

tensor([[2., 3.],
        [5., 6.]])

In [34]:
# 池化层中的填充和步幅
X = torch.arange(16, dtype=torch.float).view((1, 1, 4, 4))

In [35]:
X

tensor([[[[ 0.,  1.,  2.,  3.],
          [ 4.,  5.,  6.,  7.],
          [ 8.,  9., 10., 11.],
          [12., 13., 14., 15.]]]])

In [36]:
pool2d = nn.MaxPool2d(3) # 形状为(3, 3)的池化窗口，默认获得形状为(3, 3)的步幅。

In [37]:
pool2d(X)

tensor([[[[10.]]]])

In [38]:
pool2d = nn.MaxPool2d(3, padding=1, stride=2)

In [39]:
pool2d(X)

tensor([[[[ 5.,  7.],
          [13., 15.]]]])

In [40]:
pool2d = nn.MaxPool2d((2, 4), padding=(1, 2), stride=(2, 3))

In [41]:
pool2d(X)

tensor([[[[ 1.,  3.],
          [ 9., 11.],
          [13., 15.]]]])

In [42]:
# 池化层的多通道
# 池化层对每个输入通道分别池化，而不是像卷积层那样将各通道的输入按通道相加。这意味着池化层的输出通道数与输入通道数相等。
X = torch.cat((X, X + 1), dim=1)

In [43]:
X

tensor([[[[ 0.,  1.,  2.,  3.],
          [ 4.,  5.,  6.,  7.],
          [ 8.,  9., 10., 11.],
          [12., 13., 14., 15.]],

         [[ 1.,  2.,  3.,  4.],
          [ 5.,  6.,  7.,  8.],
          [ 9., 10., 11., 12.],
          [13., 14., 15., 16.]]]])

In [44]:
pool2d = nn.MaxPool2d(3, padding=1, stride=2)

In [45]:
pool2d(X)

tensor([[[[ 5.,  7.],
          [13., 15.]],

         [[ 6.,  8.],
          [14., 16.]]]])