# 卷积操作

In [1]:
import torch 
import numpy as np
import torch.nn as nn

## 填充（padding）

填充（padding）是指在输入高和宽的两侧填充元素（通常是0元素)。

通常填充操作的目的是使得输入的特征([w_i, h_i, c_i])与输出的特征([w_o, h_o, c_o])满足[w_i, h_i]=[w_o, h_o], 即卷积操作保持特征的宽度和高度的大小。
* **在卷积操作之前**, 在特征的高方向填充$p_h = k_h - 1$行零行, 在宽方向填充$p_w = k_w - 1$列零列。
* 当卷积核的大小k为奇数，上下方向分别填充$\frac{k-1}{2}$行零行; 左右方向同理进行填充。
* 当卷积核的大小k为偶数，上方填充$\lceil \frac{k-1}{2}\rceil$行零行，下方填充$\lfloor \frac{k-1}{2}\rfloor$行零行; 左右方向同理进行填充。


In [2]:
def corr2d(x, k, padding=0):
    """
    with padding 
    """
    if isinstance(padding, tuple) is False:
        padding = (padding, padding)

    padding_size = padding[0] + padding[1]

    p_x = torch.zeros(x.shape[0] + padding_size, x.shape[1] + padding_size)  # padding x
    p_x[padding[0]:padding[0] + x.shape[0], padding[0]:padding[0] + x.shape[1]] = x

    h, w = k.shape
    
    # output_shape = (x.shape[0] + padding_size - h + 1, x.shape[1] + padding_size - w + 1)
    output_shape = (p_x.shape[0] - h + 1, p_x.shape[1] - w + 1)

    y = torch.zeros(output_shape)

    for i in range(y.shape[0]):
        for j in range(y.shape[1]):
            y[i, j] = (p_x[i:i + h, j:j + w] * k).sum()

    return y

In [3]:
# odd size
x = torch.arange(25).view(5, 5)
k = torch.ones(3, 3)
y = corr2d(x, k)
y_p1 = corr2d(x, k, padding=1)

print(x)
print(k)
print(y)
print(y_p1)

tensor([[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14],
        [15, 16, 17, 18, 19],
        [20, 21, 22, 23, 24]])
tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])
tensor([[ 54.,  63.,  72.],
        [ 99., 108., 117.],
        [144., 153., 162.]])
tensor([[ 12.,  21.,  27.,  33.,  24.],
        [ 33.,  54.,  63.,  72.,  51.],
        [ 63.,  99., 108., 117.,  81.],
        [ 93., 144., 153., 162., 111.],
        [ 72., 111., 117., 123.,  84.]])


In [4]:
# even size
k = torch.ones(4, 4)
y_p2 = corr2d(x, k, padding=(2,1))
print(y_p2)

tensor([[ 12.,  21.,  32.,  40.,  33.],
        [ 33.,  54.,  78.,  90.,  72.],
        [ 64., 102., 144., 160., 126.],
        [104., 162., 224., 240., 186.],
        [ 93., 144., 198., 210., 162.]])


## 步幅 (stride)

输出特征大小: $h = \lfloor \frac{n_h - k_h + p_h}{s_h} \rfloor +1$， $s = \lfloor \frac{n_w - k_w + p_w}{s_w} \rfloor +1$
* no-padding: $h = \lfloor \frac{n_h - k_h}{s_h} \rfloor +1$， $s = \lfloor \frac{n_w - k_w}{s_w} \rfloor +1$
* padding: $h = \lfloor \frac{n_h-1}{s_h} \rfloor +1$， $s = \lfloor \frac{n_w-1}{s_w} \rfloor +1$

In [5]:
def corr2d(x, k, padding=0, stride=1):
    """
    with padding
    """

    if isinstance(padding, tuple) is False:
        padding = (padding, padding)

    if isinstance(stride, tuple) is False:
        stride = (stride, stride)

    padding_size = padding[0] + padding[1]

    p_x = torch.zeros(x.shape[0] + padding_size, x.shape[1] + padding_size)  # padding x
    p_x[padding[0]:padding[0] + x.shape[0], padding[0]:padding[0] + x.shape[1]] = x

    h, w = k.shape

    output_shape = (int(np.floor((x.shape[0] - h + padding_size) / stride[0]) + 1),
                    int(np.floor((x.shape[1] - w + padding_size) / stride[1]) + 1))

    y = torch.zeros(output_shape)

    for i in range(y.shape[0]):
        for j in range(y.shape[1]):
            y[i, j] = (p_x[i * stride[0]:i * stride[0] + h, j * stride[1]:j * stride[1] + w] * k).sum()

    return y

In [6]:
k = torch.ones(3, 3)
# h = w = int(np.floor((5 - 3 + 0) / 2) + 1) = 2 
y_p0_s2 = corr2d(x, k, padding=0, stride=2)
print(y_p0_s2)

tensor([[ 54.,  72.],
        [144., 162.]])


In [7]:
# h = w = int(np.floor((5 - 3 + 2*1) / 2) + 1) = 2 
y_p1_s2 = corr2d(x, k, padding=1, stride=2)
print(y_p1_s2)

tensor([[ 12.,  27.,  24.],
        [ 63., 108.,  81.],
        [ 72., 117.,  84.]])


## 多通道

In [8]:
# multi input
def corr2d_multi_input_single_out(x, k):
    """
    NCHW
    x:[C, H， W]
    k:[C, K, K]
    """
    res = None
    for i in range(x.shape[0]):
        if res is None:
            res = corr2d(x[i, :, :], k[i, :, :])
        else:
            res += corr2d(x[i, :, :], k[i, :, :])
        
        return res


In [9]:
x = torch.arange(3*5*5).view(3, 5, 5)
k = torch.arange(3*3*3).view(3, 3, 3)
y = corr2d_multi_input_single_out(x, k)
print(y.shape)


torch.Size([3, 3])


In [10]:
# multi_output
def corr2d_multi_in_multi_out(x, k):
    # stack
    return torch.stack([corr2d_multi_input_single_out(x, k) for k in k])

In [11]:
x = torch.arange(3*5*5).view(3, 5, 5)
k = torch.arange(4*3*3*3).view(4, 3, 3, 3)
y = corr2d_multi_in_multi_out(x, k)
print(y.shape)

torch.Size([4, 3, 3])


## 池化（pooling）

In [12]:
# pooling
def pool2d(x, pool_size, mode='max'):
    x = x.float()  # convert to float
    p_h, p_w = pool_size
    y = torch.zeros(x.shape[0] - p_h + 1, x.shape[1] - p_w + 1)
    for i in range(x.shape[0]):
        for j in range(x.shape[1]):
            if mode == 'max':
                y[i, j] = x[i: i + p_h, j: j + p_w].max()
            elif mode == 'avg':
                x[i, j] = x[i: i + p_h, j: j + p_w].mean()       
    return y

In [13]:
x = torch.arange(5*5).view(5, 5)
pool_size = (2, 2)
y = pool2d(x, pool_size)
print(y)

IndexError: index 4 is out of bounds for dimension 1 with size 4

In [None]:
def pool2d_multi_in_multi_out(X, pool_size, mode='max'):
    # stack
    return torch.stack([pool2d(x, pool_size, mode) for x in X])

In [None]:
x = torch.arange(3*5*5).view(3, 5, 5)
pool_size = (2, 2)
y = pool2d_multi_in_multi_out(x, pool_size)
print(y)