# 动手学深度学习 6.2节 图像卷积 示例代码

互相关运算

In [1]:
import torch
from torch import nn
from d2l import torch as d2l


def corr2d(X:torch.Tensor, K:torch.Tensor):
	"""计算二维互相关运算。

	Args:
		X (torch.Tensor): 输入张量
		K (torch.Tensor): 卷积核张量
	
	Return:
		Y (torch.Tensor): 输出张量
	"""

	# 获取卷积核的形状
	h, w = K.shape
	Y = torch.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1))
	for i in range(Y.shape[0]):
		for j in range(Y.shape[1]):
			Y[i, j] = (X[i:i+h, j:j+w] * K).sum()

	return Y

X = torch.tensor([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]])
K = torch.tensor([[0.0, 1.0], [2.0, 3.0]])

corr2d(X, K)

tensor([[19., 25.],
        [37., 43.]])

卷积层

In [2]:
class Conv2D(nn.Module):
	def __init__(self, kernel_size):
		super(Conv2D, self).__init__()
		self.weight = nn.Parameter(torch.rand(kernel_size))
		self.bias = nn.Parameter(torch.zeros(1))

	def forward(self, X):
		"""
		卷积层前向计算本质上就是互相关运算
		"""
		return corr2d(X, self.weight) + self.bias

学习卷积核

In [8]:
X = torch.Tensor(
	[
		[1., 1., 0., 0., 0., 0., 1., 1.],
		[1., 1., 0., 0., 0., 0., 1., 1.],
		[1., 1., 0., 0., 0., 0., 1., 1.],
		[1., 1., 0., 0., 0., 0., 1., 1.],
		[1., 1., 0., 0., 0., 0., 1., 1.],
		[1., 1., 0., 0., 0., 0., 1., 1.]
	]
)

Y = torch.Tensor(
	[
		[0., 1., 0., 0., 0., -1., 0.],
		[0., 1., 0., 0., 0., -1., 0.],
		[0., 1., 0., 0., 0., -1., 0.],
		[0., 1., 0., 0., 0., -1., 0.],
		[0., 1., 0., 0., 0., -1., 0.],
		[0., 1., 0., 0., 0., -1., 0.]
	]
)


conv2d = nn.Conv2d(1, 1, kernel_size=(1, 2), bias=False)

X = X.reshape((1, 1, 6, 8))
Y = Y.reshape((1, 1, 6, 7))
lr = 3e-2

for i in range(10):
	Y_hat = conv2d(X)
	l = (Y_hat - Y) ** 2
	conv2d.zero_grad()
	l.sum().backward()
	conv2d.weight.data -= lr * conv2d.weight.grad	# type: ignore
	if (i + 1) % 2 == 0:
		print(f'batch {i + 1}, loss {l.sum():.3f}')

print(conv2d.weight.reshape((1, 2)))

batch 2, loss 10.489
batch 4, loss 2.257
batch 6, loss 0.582
batch 8, loss 0.181
batch 10, loss 0.065
tensor([[ 1.0097, -0.9601]], grad_fn=<ViewBackward0>)
