In [50]:
%config ZMQInteractiveShell.ast_node_interactivity = "all"
%pprint

Pretty printing has been turned OFF


## 二维卷积

卷积神经网络是含有卷积层的神经网络
- 我们用得最多就是二维的卷积层(有高宽两个维度)
- 多输入通道数和多输出通道数的卷积层都是在这上面进行扩展

### 卷积和互相关运算

通常来说,我们在卷积神经网络上使用的卷积运算并不是真正的卷积运算,其通常是使用互相关运算
- 卷积运算是先将卷积核上下左右翻转后,再对输入数据进行互相关运算
- 但由于深度学习中,卷积核的参数都是学习出来的,无论使用互相关运算或者是真正的卷积运算都不影响模型预测时的输出
- 卷积核其实就是一个特征提取器,运算后的输出可以看成是输入在空间维度(高和宽)上某一级的表征
- 考虑单通道输入和输出,将输入的高宽分别记为$I_h$和$I_w$,卷积核的高宽分别记为$K_h$和$K_w$,输出的的高宽分别记为$O_h$和$O_w$,对高宽进行的padding分别记为$p_h$和$p_w$,对高宽进行的stride分别记为$s_h$和$s_w$,那么对于以下几种情况的卷积输出分别为:
    - 无padding和stride: $O_h = I_h - k_h + 1$, $O_w = I_w - k_w + 1$
    - 有padding和无stride: $O_h = I_h - k_h + p_h + 1$, $O_h = I_w- k_w + p_w + 1$
    - 无padding和有stride: $(O_h = I_h - k_h)/s_h + 1$, $(O_w = I_w - k_w)/s_w + 1$
    - 有padding和有stride: $(O_h = I_h - k_h + p_h)/s_h + 1$, $(O_w = I_w - k_w + p_w)/s_w + 1$
        - 其实第四条就能包括前三条,这里只是列得仔细点,另外p指的是两边一共padding的数量,有的书是用2p(这实际上是指单边的padding数量)

通过我们会将padding设为$k-1$,这样就能获得和输入同shape的tensor(这也叫等宽卷积)
- 卷积核通常也是奇数,这样两端的padding一样,否则为偶数时,一边的padding需要向上取整,另一边padding要向下取整
- 目前多用小的卷积核(像1x1, 3x3等)

我们可以通过更深的网络结构来让感受野变得更加广阔,从而捕捉输入上更大尺寸特征

### 卷积个人实现

在二维互相关运算中(如无特殊说明,深度学习中的卷积就是指互相关运算)
- 就是卷积窗口从输入数组的最上方开始,从左到右,从上到下的顺序,依次做滑窗运算

In [8]:
import torch
import torch.nn as nn

In [21]:
def conv2d(x, k):
    """
    功能: 实现卷积操作(无padding/无stride)
    参数 x: 输入数据
    参数 k; 传入一个卷积核
    """
    # 获取卷积核的大小
    h, w = k.shape
    # 定义输出的shpe
    y = torch.rand((x.shape[0] - h + 1, x.shape[1] - w + 1))
    # 卷积运算
    for i in range(y.shape[0]):
        for j in range(y.shape[1]):
            y[i, j] = (x[i:i+h, j:j+w] * k).sum()
            
    return y

In [6]:
x = torch.arange(9).view(3, 3)
k = torch.arange(4).view(2, 2)

# 卷积运算
conv2d(x, k)

tensor([[19., 25.],
        [37., 43.]])

### 自定义卷积层

In [9]:
import torch.nn as nn

In [31]:
class Conv2D(nn.Module):
    """自定义实现卷积层"""
    def __init__(self, kernel_size):
        super(Conv2D, self).__init__()
        self.weight = nn.Parameter(torch.rand(kernel_size))
        self.bias = nn.Parameter(torch.zeros(1))
        
    def forward(self, x):
        return conv2d(x, self.weight) + self.bias

卷积窗口形状为pxq的卷积层称为pxq卷积层
- 说明卷积核的高和宽分别为p和q

### 图像的物体边缘检测

用卷积层来检测图像中的物体边缘(找到像素变化的位置)

In [13]:
# 构建一个6*8的图像,中间4列为黑,其余为白
x = torch.ones(6, 8)
x[:, 2:6] = 0
x

tensor([[1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.]])

因为实际上是测相邻的元素是否有变化,因此可以定义一个1*2的卷积核[[-1, 1]],只要相邻两行出现变化,卷积计算出来的就不为0,如果没有变化就为0

#### 自定义卷积核的方式

In [51]:
k = torch.tensor([[-1, 1]])
y = conv2d(x, k)
y

tensor([[ 0., -1.,  0.,  0.,  0.,  1.,  0.],
        [ 0., -1.,  0.,  0.,  0.,  1.,  0.],
        [ 0., -1.,  0.,  0.,  0.,  1.,  0.],
        [ 0., -1.,  0.,  0.,  0.,  1.,  0.],
        [ 0., -1.,  0.,  0.,  0.,  1.,  0.],
        [ 0., -1.,  0.,  0.,  0.,  1.,  0.]])

卷积层可以通过重复使用卷积核有效地表征局部空间

#### 网络训练

In [44]:
import sys
sys.path.append("../d2l_func/")
from optim import sgd
from sqdm import sqdm

In [35]:
def squared_loss(y_pred, y):
    return ((y_pred - y)**2).sum()

In [85]:
model  = Conv2D(kernel_size=(1, 2))
loss = squared_loss

process_bar = sqdm()
for epoch in range(50):
    print(f"Epoch [{epoch+1}/{50}]")
    y_pred = model(x)
    l = loss(y_pred, y)
    l.backward()
    
    sgd([model.weight, model.bias], lr=0.01, weight_decay=0.1)
#     _ = model.weight.grad.data.zero_()
#     _ = model.bias.grad.data.zero_()
    _ = model.weight.grad.fill_(0)
    _ = model.bias.grad.fill_(0)
    
    process_bar.show_process(data_num=1, batch_size=1, train_loss=l.item())
    print("\n")

Epoch [1/50]
1/1 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 23.8259, train_score: -, test_loss: -, test_score: -

Epoch [2/50]
1/1 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 9.5828, train_score: -, test_loss: -, test_score: -

Epoch [3/50]
1/1 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 6.6669, train_score: -, test_loss: -, test_score: -

Epoch [4/50]
1/1 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 4.9749, train_score: -, test_loss: -, test_score: -

Epoch [5/50]
1/1 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 3.7512, train_score: -, test_loss: -, test_score: -

Epoch [6/50]
1/1 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 2.8428, train_score: -, test_loss: -, test_score: -

Epoch [7/50]
1/1 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 2.1634, train_score: -, test_loss: -, test_score: -

Epoch [8/50]
1/1 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 1.6526, train_score: -, test_loss: -, test_score: -

Epoch [9/50]
1/1 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss:

In [86]:
# result
model.weight
model.bias

Parameter containing:
tensor([[-0.9904,  0.9904]], requires_grad=True)

Parameter containing:
tensor([-5.5702e-06], requires_grad=True)

结果和自定义的[[-1, 1]]差不多

### 带padding卷积的个人实现

In [89]:
import numpy as np

In [90]:
a = np.arange(16).reshape(2, 8)
a

array([[ 0,  1,  2,  3,  4,  5,  6,  7],
       [ 8,  9, 10, 11, 12, 13, 14, 15]])

In [92]:
np.pad(a, ((1, 1), (2, 2)))

array([[ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  1,  2,  3,  4,  5,  6,  7,  0,  0],
       [ 0,  0,  8,  9, 10, 11, 12, 13, 14, 15,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0]])

In [95]:
def conv2d_padding(x, k, padding):
    """
    function: 实现带padding的卷积
    params x: 输入张量
    params k: 巻积核
    params padding: 传入padding的元组(h, w),高padding多少,宽padding多少,如果为整数就高宽一样
    """
    assert padding > 0
    if padding == 0:
        return conv2d(x, k)
    else:
        if isinstance(padding, int):
            h = w = padding
        else:
            h, w = padding
        x = x.numpy()
        x = torch.from_numpy(np.pad(x, ((w, w), (h, h))))
        return conv2d(x, k)

In [97]:
# 等宽巻积
x = torch.rand(8, 8)
k = torch.rand(3, 3)

conv2d_padding(x, k, padding=1).shape

torch.Size([8, 8])

### 带stride的巻积个人实现

In [131]:
def conv2d_padding_stride(x, k, padding, stride=1):
    """
    function: 实现带padding和stride的巻积
    params x: 输入张量
    params k: 巻积核
    params padding: 传入padding的元组(h, w),高padding多少,宽padding多少,如果为整数就高宽一样
    params stride: 默认为1, 传入(h, w)或者整数
    """
    if stride == 1:
        return conv2d_padding(x, k, padding)
    else:
        kh, kw = k.shape
        if isinstance(padding, int):
            ph = pw = padding
        else:
            ph, pw = padding
            
        if isinstance(stride, int):
            sh = sw = stride
        else:
            sh, sw = stride
        
        y = np.zeros(np.round((x.shape[0] - kh + ph + sh)/sh), 
                        np.round((x.shape[1] - kw + pw + sw)/sw))
        y = torch.from_numpy(y)
        
        # 对x加padding
        x = x.numpy()
        x = torch.from_numpy(np.pad(x, ((pw, pw), (ph, ph))))
        
        for i in range(0, y.shape[0], sh):
            for j in range(0, y.shape[1], sw):
                y[i, j] = (x[i:(i+h), j:(j+w)] * k).sum()
            
        return y

In [119]:
np.floor(2.3)

2.0

In [132]:
x.shape
k = torch.rand(3, 5)
conv2d_padding_stride(x, k, (0, 1), (3, 4))

torch.Size([8, 8])

TypeError: 'numpy.float64' object cannot be interpreted as an integer