## 图像卷积

互相关运算

In [1]:
import torch
from torch import nn
from d2l import torch as d2l


def corr2d(X, K):
    """
    计算二维互相关运算
    :param X: 输入图像(n_h, n_w)
    :param K: 卷积核(h,w)
    :return: 特征向量(隐藏层输出)(n_h - h + 1, n_w - w + 1)
    """
    h, w = K.shape
    # Y初始化为(n_h - h + 1, n_w - w + 1),全0,尺寸与核的大小有关
    Y = torch.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            Y[i, j] = (X[i: i + h, j: j + w] * K).sum()
    return Y

In [2]:
# 验证上述二维交叉互相关运算的输出
X = torch.tensor([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]])
K = torch.tensor([[0.0, 1.0], [2.0, 3.0]])
corr2d(X, K)

tensor([[19., 25.],
        [37., 43.]])

In [3]:
# 二维卷积层实现
class Conv2D(nn.Module):
    """
    卷积层
    """

    def __init__(self, kernel_size):
        super().__init__()
        self.weight = nn.Parameter(torch.rand(kernel_size))
        self.bias = nn.Parameter(torch.zeros(1))

    def forward(self, x):
        return corr2d(x, self.weight) + self.bias

In [4]:
# 卷积层的一个简单应用: 检测图像中不同颜色的边缘
# 1.检测上下边缘 (假设颜色没变,值中和→0, 否则≠0)
X = torch.ones((6, 8))
X[2:, :] = 0
K = torch.tensor([[1.0], [-1.0]])  # 2*1的卷积核
Y = corr2d(X, K)
K, X, Y

(tensor([[ 1.],
         [-1.]]),
 tensor([[1., 1., 1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1., 1., 1.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.]]),
 tensor([[0., 0., 0., 0., 0., 0., 0., 0.],
         [1., 1., 1., 1., 1., 1., 1., 1.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.]]))

In [5]:
X = torch.ones((6, 8))
# 2.检测左右边缘
X[:, 2:6] = 0
K = K.t()  # 转置为1*2的卷积核
Y = corr2d(X, K)
K, X, Y

(tensor([[ 1., -1.]]),
 tensor([[1., 1., 0., 0., 0., 0., 1., 1.],
         [1., 1., 0., 0., 0., 0., 1., 1.],
         [1., 1., 0., 0., 0., 0., 1., 1.],
         [1., 1., 0., 0., 0., 0., 1., 1.],
         [1., 1., 0., 0., 0., 0., 1., 1.],
         [1., 1., 0., 0., 0., 0., 1., 1.]]),
 tensor([[ 0.,  1.,  0.,  0.,  0., -1.,  0.],
         [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
         [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
         [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
         [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
         [ 0.,  1.,  0.,  0.,  0., -1.,  0.]]))

In [6]:
# 卷积核的学习
conv2d = nn.Conv2d(1, 1, kernel_size=(1, 2), bias=False)  # 1个输入通道, 1个输出通道 (即 灰)

# 已知X→Y,求K
X = X.reshape((1, 1, 6, 8))  # 框架固定维度是4
Y = Y.reshape((1, 1, 6, 7))  # 框架固定维度是4
lr = 3e-2

for i in range(20):
    # 此处没有小批量, 整个X样本空间做一次卷积
    Y_hat = conv2d(X)
    l = (Y_hat - Y) ** 2  # 均方误差作为损失
    conv2d.zero_grad()
    l.sum().backward()
    # 迭代卷积核
    conv2d.weight.data[:] -= lr * conv2d.weight.grad
    if (i + 1) % 2 == 0:
        print(f'epoch {i + 1}, loss {l.sum():.3f}')

print(conv2d.weight.data.reshape(1, 2))  # 跟实际的(1,-1)很接近了

epoch 2, loss 4.082
epoch 4, loss 0.688
epoch 6, loss 0.117
epoch 8, loss 0.020
epoch 10, loss 0.004
epoch 12, loss 0.001
epoch 14, loss 0.000
epoch 16, loss 0.000
epoch 18, loss 0.000
epoch 20, loss 0.000
tensor([[ 0.9996, -1.0001]])


---
### 填充和步幅

In [7]:
# 在所有侧边填充一个像素
def comp_conv2d(conv2d, X):
    X = X.reshape((1, 1) + X.shape)  # 改成4-D的, 尺寸为(1, 1, n_h, n_w)
    Y = conv2d(X)  # 卷积运算, Y.shape = (1, 1, n_h - h + 1, n_w - w + 1)
    return Y.reshape(Y.shape[2:])  # 维度重塑,去掉前俩D, 4-D → 2-D


# 填充padding=1表示在所有侧边填充一个像素
conv2d = nn.Conv2d(1, 1, kernel_size=3, padding=1)  # 卷积核的h,w都是3
X = torch.rand(size=(8, 8))
# X从(8,8)填充为(10,10),然后卷积输出Y(10-3+1, 10-3+1) → (8,8)
Y = comp_conv2d(conv2d, X)
X, Y, X.shape, Y.shape

(tensor([[0.0084, 0.3842, 0.5325, 0.4041, 0.5534, 0.8698, 0.0440, 0.6768],
         [0.2961, 0.0579, 0.9123, 0.0516, 0.7883, 0.8041, 0.0073, 0.1028],
         [0.2554, 0.4194, 0.6112, 0.6528, 0.9175, 0.3694, 0.2766, 0.2828],
         [0.2789, 0.9239, 0.8523, 0.1387, 0.6025, 0.4809, 0.5303, 0.4265],
         [0.3759, 0.8523, 0.4456, 0.3210, 0.7283, 0.6718, 0.0126, 0.5152],
         [0.9906, 0.8544, 0.5672, 0.0470, 0.4552, 0.7114, 0.3671, 0.2222],
         [0.5748, 0.1254, 0.5790, 0.5916, 0.6218, 0.5297, 0.4932, 0.3525],
         [0.7351, 0.7504, 0.4128, 0.4635, 0.2427, 0.8678, 0.8258, 0.8920]]),
 tensor([[-0.0864, -0.2221, -0.2981, -0.2426, -0.4050, -0.3004, -0.0799, -0.0691],
         [-0.2368, -0.1842, -0.3493, -0.5087, -0.1423, -0.2999, -0.3807, -0.0961],
         [-0.3239, -0.4584, -0.1481, -0.1404, -0.3212, -0.4446, -0.3585, -0.1928],
         [-0.1991, -0.2063, -0.3219, -0.3682, -0.3267, -0.2061, -0.1629, -0.3027],
         [-0.3640, -0.2896, -0.2635, -0.1147, -0.2574, -0.4576, -0

In [8]:
# 也可以分别填充高度(上下)和宽度(左右)
conv2d = nn.Conv2d(1, 1, kernel_size=(5,3), padding=(2,1))
# X从(8,8)填充为(12,10),然后卷积输出Y(12-5+1, 10-3+1) → (8,8)
comp_conv2d(conv2d, X).shape

torch.Size([8, 8])

In [9]:
# 设置步幅
conv2d = nn.Conv2d(1, 1, kernel_size=3, padding=1, stride=2)
comp_conv2d(conv2d, X).shape

torch.Size([4, 4])

In [10]:
# 一个稍微复杂的例子
conv2d = nn.Conv2d(1, 1, kernel_size=(3, 5), padding=(0, 1), stride=(3, 4))
comp_conv2d(conv2d, X).shape

torch.Size([2, 2])