## 卷积操作和卷积层的定义

In [5]:
import torch
from torch import nn
import d2l


def corr2d(X, K):
    """
    计算二维互相关运算
    :param X: 要计算的图像
    :param K: 卷积核
    :return:
    """
    height, width = K.shape
    new_size = (X.shape[0] - height + 1, X.shape[1] - width + 1)
    Y = torch.zeros(new_size)
    for i in range(new_size[0]):
        for j in range(new_size[1]):
            Y[i, j] = (X[i:i + height, j:j + width] * K).sum()
    return Y


X = torch.tensor([
    [0.0, 1.0, 2.0],
    [3.0, 4.0, 5.0],
    [6.0, 7.0, 8.0]
])
kernel = torch.tensor([
    [0.0, 1.0],
    [2.0, 3.0]
])

corr2d(X, kernel)

tensor([[19., 25.],
        [37., 43.]])

In [6]:
class Conv2d(nn.Module):
    def __init__(self, kernel_size):
        super().__init__()
        self.weight = nn.Parameter(torch.rand(kernel_size))
        self.bias = nn.Parameter(torch.zeros(1))

    def forward(self, x):
        """计算卷积的操作"""
        return corr2d(x, self.weight) + self.bias

## 利用1*2卷积核对图像进行边缘检测

使用X模拟图像

构造卷积核 高度为1 宽度为2 !!!注意 卷积核一定是tensor 而非向量
进行互运算 如果两个元素相同 输出为0，否则输出1

In [28]:
X = torch.ones((6, 8))
Y = torch.tensor([[1.0, -1.0]])
# X[:, 2:6] = 0
# K_vert = torch.tensor([[1.0, -1.0]])  # 这里的卷积核只可以判断垂直边缘

X[2:4, :] = 0
K_horiz = torch.tensor([[1.0],
                        [-1.0]])  # 这里的卷积核只可以判断水平边缘
result = corr2d(X, K_horiz).abs()
X, result

(tensor([[1., 1., 1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1., 1., 1.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [1., 1., 1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1., 1., 1.]]),
 tensor([[0., 0., 0., 0., 0., 0., 0., 0.],
         [1., 1., 1., 1., 1., 1., 1., 1.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [1., 1., 1., 1., 1., 1., 1., 1.],
         [0., 0., 0., 0., 0., 0., 0., 0.]]))

In [29]:
edge = []
for i in range(result.shape[0]):
    for j in range(result.shape[1]):
        if result[i, j] == 1.0:
            edge.append((i, j))
edge

[(1, 0),
 (1, 1),
 (1, 2),
 (1, 3),
 (1, 4),
 (1, 5),
 (1, 6),
 (1, 7),
 (3, 0),
 (3, 1),
 (3, 2),
 (3, 3),
 (3, 4),
 (3, 5),
 (3, 6),
 (3, 7)]

### 学习卷积核

In [32]:
# 构造一个卷积层， 输出和输出都是单通道 ，形状为1*2
conv2d = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(1, 2), bias=False)

X = torch.ones((6, 8))
K = torch.tensor([[1.0, -1.0]])
X[:, 2:6] = 0
Y = corr2d(X, K)
# 二维的卷积层使用四维的输入和输出格式，（批量大小，通道，高度，宽度）
X = X.reshape((1, 1, 6, 8))
Y = Y.reshape((1, 1, 6, 7))

lr = 3e-2

for i in range(10):
    Y_hat = conv2d(X)
    loss = (Y - Y_hat) ** 2
    conv2d.zero_grad()
    loss.sum().backward()
    # 迭代卷积核
    conv2d.weight.data[:] -= lr * conv2d.weight.grad
    if (i + 1) % 2 == 0:
        print(f'epoch{i + 1},loss={loss.sum():.3f}')

epoch2,loss=0.835
epoch4,loss=0.185
epoch6,loss=0.050
epoch8,loss=0.016
epoch10,loss=0.006


In [33]:
conv2d.weight.data

tensor([[[[ 1.0033, -0.9883]]]])

## 多通道计算

### 多通道输入单通道输出

In [7]:
import torch
import lmy


def corr2d_multi_in(X, K):
    return sum(lmy.corr2d(x, k) for x, k in zip(X, K))


X = torch.Tensor([
    [
        [0.0, 1.0, 2.0],
        [3.0, 4.0, 5.0],
        [6.0, 7.0, 8.0]
    ],
    [
        [1.0, 2.0, 3.0],
        [4.0, 5.0, 6.0],
        [7.0, 8.0, 9.0]
    ]
])
KER = torch.Tensor([
    [
        [0.0, 1.0],
        [2.0, 3.0]
    ],
    [
        [1.0, 2.0],
        [3.0, 4.0]
    ]
])
corr2d_multi_in(X, KER)

tensor([[ 56.,  72.],
        [104., 120.]])

### 多输出通道

In [20]:
from icecream import ic


def corr2d_multi_in_out(X, K):
    # 迭代K的第
    tmp = [corr2d_multi_in(X, k) for k in K]
    ic(tmp)
    return torch.stack(tmp, 0)


K = torch.Tensor([
    [
        [0.0, 1.0],
        [2.0, 3.0]
    ],
    [
        [1.0, 2.0],
        [3.0, 4.0]
    ]
])
K1 = torch.stack((K, K + 1, K + 2), 0)
K.shape, K1.shape, corr2d_multi_in_out(X, K1)

ic| tmp: [tensor([[ 56.,  72.],
                 [104., 120.]]),
          tensor([[ 76., 100.],
                 [148., 172.]]),
          tensor([[ 96., 128.],
                 [192., 224.]])]


(torch.Size([2, 2, 2]),
 torch.Size([3, 2, 2, 2]),
 tensor([[[ 56.,  72.],
          [104., 120.]],
 
         [[ 76., 100.],
          [148., 172.]],
 
         [[ 96., 128.],
          [192., 224.]]]))

## 1X1卷积层
* 不能够提取相邻像素的相关特征
* 经常包含在复杂深层的网络设计中
* 唯一的计算发生在通道上
* 用来调整网络层的通道数和控制模型复杂度

In [None]:
def corr2d_multi_in_out_1x1(X, K):
    c_i, h, w = X.shape
    c_o = K.shape[0]
    print(X)
    X = X.reshape((c_i, h * w))
    print(X)
    print(K)
    K = K.reshape((c_o, c_i))
    print(K)
    Y = torch.mm(K, X)
    return Y.reshape((c_o, h, w))


X = torch.normal(0, 1, (3, 3, 3))
K = torch.normal(0, 1, (2, 3, 1, 1))
corr2d_multi_in_out_1x1(X, K)

## 汇聚层 Pooling Layer
* 降低卷积层对位置的敏感性
* 降低对空间降采样表示的敏感性

处理图像时，我们希望逐渐降低隐藏表示的空间分辨率，聚集信息，这样随着我们在神经网络中层叠的上升，每个神经元对其敏感的感受野（输入）就越大，
而机器学习任务通常会跟全局图像问题有关（图片中有猫？） 所以我们最后一层的神经元应该对整个输入有全局敏感，通过逐渐聚合信息，生成越来越粗糙的映射，最终实现学习全局表示的目标，同时将卷积图层的所有优势保留在中间层

### 最大汇聚层和平均汇聚层
与卷积层类似，汇聚层运算符由一个固定形状的窗口组成，该窗口根据其步幅大小在输入的所有区域上滑动，为固定形状窗口（汇聚窗口）遍历每个位置计算一个输出。
But，different with the cross correlation calculation between the input of convolution layer and the kernel of the convolution,pooling layer has no parameters.On the contrary,**Pool Operation is definite**, we always calculate the maximum or mean of the items in the pooling window.These operations named **maximum pooling(最大汇聚层**） and **mean pooling（平均汇聚层）**

![image-20220406180512205](md.assets/image-20220406180512205.png)

下面的pool2d函数，实现汇聚层的前向传播

In [2]:
import torch
import torch.nn as nn


def pool2d(X, pool_size, mode='max'):
    pool_height, pool_width = pool_size
    Y = torch.zeros((X.shape[0] - pool_height + 1, X.shape[1] - pool_width + 1))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            if mode == 'max':
                Y[i, j] = X[i:i + pool_height, j:j + pool_width].max()
            elif mode == "mean":
                Y[i, j] = X[i:i + pool_height, j:j + pool_width].mean()
            else:
                print("modeError")
                return None
    return Y


X = torch.tensor([
    [0.0, 1.0, 2.0],
    [3.0, 4.0, 5.0],
    [6.0, 7.0, 8.0]
])
pool2d(X, (2, 2)),pool2d(X, (2, 2),mode='mean')

(tensor([[4., 5.],
         [7., 8.]]),
 tensor([[2., 3.],
         [5., 6.]]))

### 填充与步幅

In [3]:
X = torch.arange(16, dtype=torch.float32).reshape((1, 1, 4, 4))
X

tensor([[[[ 0.,  1.,  2.,  3.],
          [ 4.,  5.,  6.,  7.],
          [ 8.,  9., 10., 11.],
          [12., 13., 14., 15.]]]])

在默认情况下，torch中的步幅与汇聚窗口的大小相同，因此，如果我们使用3X3的窗口，默认stride=3X3

In [5]:
pool2d = nn.MaxPool2d((3, 3))
pool2d(X)

MaxPool2d(kernel_size=(3, 3), stride=(3, 3), padding=0, dilation=1, ceil_mode=False)


tensor([[[[10.]]]])

In [7]:
pool2d = nn.MaxPool2d((2, 3),stride=1,padding=0)
pool2d(X)

tensor([[[[ 6.,  7.],
          [10., 11.],
          [14., 15.]]]])

### Pooling 多通道
在处理多通道输入数据时，汇聚层在每个输入通道上单独运算，而不是像卷积层一样在通道上对输入进行汇总。 这意味着汇聚层的输出通道数与输入通道数相同。

In [8]:
X = torch.cat((X, X + 1), dim=1)
X

tensor([[[[ 0.,  1.,  2.,  3.],
          [ 4.,  5.,  6.,  7.],
          [ 8.,  9., 10., 11.],
          [12., 13., 14., 15.]],

         [[ 1.,  2.,  3.,  4.],
          [ 5.,  6.,  7.,  8.],
          [ 9., 10., 11., 12.],
          [13., 14., 15., 16.]]]])

In [9]:
pool2d = nn.MaxPool2d(3, padding=1, stride=2)
pool2d(X)

tensor([[[[ 5.,  7.],
          [13., 15.]],

         [[ 6.,  8.],
          [14., 16.]]]])