In [3]:
import numpy as np

In [None]:
def im2col (input_data, filter_h, filter_w, stride=1, pad=0):  # 先实现一个不支持stride和pad的版本
    batch_size, channels, height, width = input_data.shape

    out_h = height - filter_h + 1
    out_w = width - filter_w + 1

    col = np.zeros((batch_size, channels, out_h, out_w, filter_h, filter_w))

    for h in range(out_h):
        for w in range(out_w):
            col[:, :, h, w, :, :] = input_data[:, :, h:h + filter_h, w:w + filter_w]
    
    col = col.transpose(0, 2, 3, 1, 4, 5).reshape(batch_size  * out_h * out_w, channels * filter_h * filter_w)

    return col

In [5]:
x1 = np.random.rand(1, 3, 7, 7)
col = im2col(x1, 5, 5)
print(col.shape)

x1 = np.random.rand(10, 3, 7, 7)
col = im2col(x1, 5, 5)
print(col.shape)

(9, 75)
(90, 75)


In [None]:
class Convolution:  # 目前只支持stride pad采用默认参数
    def __init__(self, W, b, stride=1, pad=0) -> None:
        self.W = W
        self.b = b
        self.stride = stride
        self.pad = pad

    def forward(self, x):
        N, C, H, W = x.shape  # N = batch_size
        FN, C, FH, FW = self.W.shape  # FN是滤波器数量，对应输出的通道数
        # 这两个通道数相同

        col = im2col(x, FH, FW, stride=1, pad=0)
        col_W = self.W.reshape(FN, C * FH * FW).T  # 第0维变成 N * out_h * out_w  # .T和strnapose(1, 0)等价
        out = np.dot(col, col_W) + self.b

        out_h = H - FH + 1  # 只支持stride=1, pad=0
        out_w = W - FW + 1
        out = out.reshape(N, out_h, out_w, FW).transpose(0, 3, 1, 2)  # 通道数可类比为Affine层中的参数维度

        return out

In [7]:
class  Pooling:
    def __init__(self, pool_h, pool_w, stride=1, pad=0) -> None:
        self.pool_h = pool_h  # 卷积层直接传W，而W就包含了FN, C, FH, FW这几个参数，池化层没有参数，只需要传形状h, w
        self.pool_w = pool_w
        self.stride = stride
        self.pad = pad

    def forward(self, x):
        N, C, H, W = x.shape

        col = im2col(x, self.pool_h, self.pool_w, stride=1, pad=0)  # 这里通道数还在1维，需要把通道数放到0维
        col = col.reshape(-1, self.pool_h * self.pool_w)

        out = np.max(col, axis=1)
        
        out_h = H - self.pool_h + 1  # 只支持stride=1, pad=0
        out_w = W - self.pool_w + 1
        out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2)

        return out