In [1]:
# 二维互相关运算
import torch
from torch import nn
from d2l import torch as d2l

def corr2d(X, K):
    """
    Calculate 2D correlation of two 2D tensors.
    """
    xh, xw = X.shape
    kh, kw = K.shape
    Y = torch.zeros((xh-kh+1, xw-kw+1))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            Y[i, j] = (X[i:i+kh, j:j+kw] * K).sum()

    return Y

In [2]:
X = torch.tensor([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]])
K = torch.tensor([[0.0, 1.0], [2.0, 3.0]])
corr2d(X, K)

tensor([[19., 25.],
        [37., 43.]])

In [3]:
# 二维卷积层
class Conv2D(nn.Module):
    def __init__(self, kernel_size):
        super().__init__()
        self.weight = nn.Parameter(torch.rand(kernel_size))
        self.bias = nn.Parameter(torch.zeros(1))

    def forward(self, x):
        return corr2d(x, self.weight) + self.bias

In [4]:
X = torch.ones((6, 8))
X[:, 2:6] = 0
X

tensor([[1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.]])

In [6]:
# 检测垂直边缘
K = torch.tensor([[1.0, -1.0]])
K

tensor([[ 1., -1.]])

In [7]:
Y = corr2d(X, K)
Y

tensor([[ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.]])

In [10]:
conv2d = nn.Conv2d(1, 1, kernel_size=(1, 2), bias=False)
X = X.reshape((1, 1, 6, 8))
Y = Y.reshape((1, 1, 6, 7))

for i in range(20):
    Y_hat = conv2d(X)
    l = (Y_hat - Y)**2
    conv2d.zero_grad()
    l.sum().backward()
    conv2d.weight.data[:] -= 3e-2 * conv2d.weight.grad
    if (i+1) % 2 == 0:
        print(f'batch {i+1}, loss {l.sum():.3f}')

batch 2, loss 19.485
batch 4, loss 6.868
batch 6, loss 2.626
batch 8, loss 1.044
batch 10, loss 0.423
batch 12, loss 0.172
batch 14, loss 0.070
batch 16, loss 0.029
batch 18, loss 0.012
batch 20, loss 0.005


In [12]:
conv2d.weight

Parameter containing:
tensor([[[[ 1.0070, -0.9927]]]], requires_grad=True)

# Conv
- deformable conv
- pixel shuffle conv

# 填充+步幅
o = (n + p - k + s) / s

机器学习本质是一个语义压缩算法，将图片的信息压缩成一个个语义标签。