In [28]:
import torch
from torch import nn
from d2l import torch as d2l

In [29]:
def corr2d(X, K):  #@save
    """Compute 2D cross-correlation."""
    # 计算二维互相关，X 是输入矩阵，K 是卷积核矩阵
    h, w = K.shape
    Y = torch.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            Y[i, j] = (X[i:i + h, j:j + w] * K).sum()
            # 这里的 Y[i, j] 是 Pytorch / NumPy 库特有的单括号索引操作，性能比双括号索引 Y[i][j] 更好。
    return Y

In [30]:
X = torch.tensor([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]])
K = torch.tensor([[0.0, 1.0], [2.0, 3.0]])
corr2d(X, K)

tensor([[19., 25.],
        [37., 43.]])

In [31]:
class Conv2D(nn.Module):
    def __init__(self, kernel_size):
        super().__init__()
        self.weight = nn.Parameter(torch.rand(kernel_size))
        self.bias = nn.Parameter(torch.zeros(1))

    def forward(self, x):
        return corr2d(x, self.weight) + self.bias # 普通的前向传播

In [32]:
# 图像中的目标边缘检测
X = torch.ones((6, 8))
X[:, 2:6] = 0 # 切片赋值
X

tensor([[1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.]])

In [33]:
K = torch.tensor([[1.0, -1.0]])
Y = corr2d(X, K)
Y

tensor([[ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.]])

In [34]:
corr2d(X.t(), K) # .t() 方法表示转置

tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]])

In [35]:
# Construct a two-dimensional convolutional layer with 1 output channel and a
# kernel of shape (1, 2). For the sake of simplicity, we ignore the bias here
# 构建一个卷积层，输出通道数为 1，卷积核大小为 (1, 2)，忽略偏置项
# LazyConv2d 是 PyTorch 中的一个懒惰初始化的卷积层，在第一次调用前会自动初始化权重
conv2d = nn.LazyConv2d(1, kernel_size=(1, 2), bias=False)

# The two-dimensional convolutional layer uses four-dimensional input and
# output in the format of (example, channel, height, width), where the batch
# size (number of examples in the batch) and the number of channels are both 1
# 二维卷积层使用四维输入和输出，格式为 (样本数, 通道数, 高度, 宽度)，其中批量大小（样本数）和通道数均为 1
X = X.reshape((1, 1, 6, 8))
Y = Y.reshape((1, 1, 6, 7))
lr = 3e-2  # Learning rate, 3e-2 是 Python 原生的科学计数语法

for i in range(10):
    Y_hat = conv2d(X) # 前向传播一次，顺便初始化了卷积核
    l = (Y_hat - Y) ** 2 # 计算损失
    conv2d.zero_grad() # 梯度清零
    l.sum().backward() # 反向传播
    # Update the kernel 更新卷积核
    conv2d.weight.data[:] -= lr * conv2d.weight.grad
    if (i + 1) % 2 == 0: # 每两步打印一次损失
        print(f'epoch {i + 1}, loss {l.sum():.3f}')

epoch 2, loss 1.263
epoch 4, loss 0.272
epoch 6, loss 0.070
epoch 8, loss 0.022
epoch 10, loss 0.008




In [36]:
conv2d.weight.data.reshape((1, 2))

tensor([[ 0.9861, -1.0034]])

In [None]:
''' Exercise
1. 直接应用，应该可以检测得到边缘，转置 X 的话边缘检测的方向发生改变
转置 K 的话，相当于进行了严格的卷积操作而不是互相关
2. 这些核以后再设计
3. 略
4. 将输入张量和卷积核都展平为矩阵形式，然后进行乘法操作
'''