In [12]:
import torch
import torch.nn as nn
import torch.optim as optim

In [5]:
def corr2d(X, K):
    # K是卷积核 filter
    h, w = K.shape
    #卷积结果输出
    Y = torch.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1)) #公式 L - h + 1
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            # 对应位置相乘，与矩阵乘法区别，称：互相关运算
            Y[i][j] = (X[i: i + h, j: j + w] * K).sum()
    return Y

In [6]:
X = torch.tensor([[0, 1, 2], [3, 4, 5], [6, 7, 8]])
Y = torch.tensor([[0, 1], [2, 3]])
print(corr2d(X, Y))

tensor([[19., 25.],
        [37., 43.]])


In [18]:
class Conv(nn.Module):
    # 实际上 filter的值都是随机的 所以传入的是filter的shape
    def __init__(self, kernel_size):
        super(Conv,self).__init__()
        #学习一下内部设置参数的方法
        self.weight = nn.Parameter(torch.randn(kernel_size))
        self.bias = nn.Parameter(torch.randn(1))
    
    def forward(self, x):
        return corr2d(x, self.weight) + self.bias
    
conv = Conv(kernel_size=(1, 2))

In [8]:
# 然后模拟灰白图片
X = torch.ones(6, 8)
X[:, 2:6] = 0
print(X)

tensor([[1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.]])


In [10]:
K = torch.tensor([[1, -1])

Y = corr2d(X, K)
print(Y)

tensor([[ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.]])


In [21]:
#test
EPOCH = 20
LR = 0.01

for i in range(EPOCH):
    out = conv(X)
    
    loss = ((out - Y) ** 2).sum()
    loss.backward()
    
    #梯度下降
    conv.weight.data -= LR * conv.weight.grad
    conv.bias.data -= LR * conv.bias.grad
    
    #梯度清0
    conv.weight.grad.fill_(0)
    conv.bias.grad.fill_(0)
    
    if (i + 1) % 5 == 0:
        print('step %d, loss %.3f' % (i + 1, loss.item()))

step 5, loss 7.492
step 10, loss 2.017
step 15, loss 0.554
step 20, loss 0.153


最后的结果和之前设置的$K$比较接近

In [22]:
print(conv.weight.data)
print(conv.bias.data)

tensor([[ 0.8973, -0.9041]])
tensor([0.0038])


### conv2与conv1不同的地方只是修改了一下train方法。

In [43]:
class Conv2(nn.Module):
    # 实际上 filter的值都是随机的 所以传入的是filter的shape
    def __init__(self, kernel_size):
        super(Conv2,self).__init__()
        self.weight = nn.Parameter(torch.randn(kernel_size))
        self.bias = nn.Parameter(torch.randn(1))
    
    def forward(self, x):
        return corr2d(x, self.weight) + self.bias
    
conv2 = Conv2(kernel_size=(1, 2))

In [45]:
loss_func = nn.MSELoss()
optimizer = optim.SGD(conv2.parameters(), lr = 0.01)

for epoch in range(EPOCH):
    out = conv2(X)
    
    loss = loss_func(out, Y)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if (epoch+1) % 5 == 0:
        print('step %d, loss %.3f' % (epoch + 1, loss.item()))

step 5, loss 0.121
step 10, loss 0.097
step 15, loss 0.080
step 20, loss 0.066


### 本节笔记
1. filter与输入相乘的方式，是相同位置上的元素相乘再求和，需要和矩阵区分。此为核运算。
2. 在class内部，定义卷积层的weight和bias，nn.parameter(shape)
3. 输入数组经过filter后的结果成为**特征图**。某时刻输入区域被过滤的部分成为感受野。**我们可以通过更深的卷积神经网络使特征图中单个元素的感受野变得更加广阔，从而捕捉输入上更大尺寸的特征。因为是从左往右层层缩小的。**