### 筆記


In [None]:
import numpy as np

def im2col(input_feat: np.ndarray, N, kh, kw, out_h, out_w, stride):
    im2col_feat = []
    for n in range(N):
        for ih in range(out_h):
            for iw in range(out_w):
                im2col_feat.append(input_feat[n, :, stride * ih:stride * ih + kh, stride * iw:stride * iw + kw])
                # each element -> (C, kh, kw)
                
    return np.array(im2col_feat).reshape(N * out_h * out_w, -1)

def convolution(input_feat: np.ndarray, filter: np.ndarray, kh, kw, stride=1, padding=0, bias=None):
    '''
    input_feat: (N, C, H, W)
    filter: (out_C, in_C, kH, kw)
    bias: (out_C, 1)
    '''
    N, C, H, W = input_feat.shape
    out_h = int((H - kh + 2 * padding) / stride) + 1
    out_w = int((W - kw + 2 * padding) / stride) + 1
    out_c = filter.shape[0]
    
    if padding:
        input_feat = np.pad(input_feat, ((0, 0), (0, 0), (padding, padding), (padding, padding)), 'constant', constant_values=0)

    im2col_feat = im2col(input_feat, N, kh, kw, out_h, out_w, stride)
    # im2col -> (N*out_h*out_w, C*kh*kw)

    filter = filter.reshape(out_c, -1)
    # filter -> (out_c, C*kh*kw)

    # w @ x.T
    # w -> (out_c, C*kh*kw)
    # x.T -> (C*kh*kw, N*out_h*out_w)
    if isinstance(bias, np.ndarray):
        out_feat = filter @ im2col_feat.T + bias
    else:
        out_feat = filter @ im2col_feat.T
    # out_feat -> (out_c, N*out_h*out_w)
    
    # 直接 reshape 成 (N, out_c*out_h*out_w) 會產生順序錯亂
    # 所以先將 dim2 的 out_c*out_h*out_w 拆開後再 permute
    return out_feat.reshape(out_c, N, out_h, out_w).transpose((1, 0, 2, 3))
    
            

In [52]:
import torch
import torch.nn.functional as F

# input feat (c, h, w) -> (3, 3, 3)
# filter (out_c, in_c, kh, kw) -> (3, 3, 2, 2)
# bias (out_c, 1)
bs = 2
feat_h = 3
feat_w = 3
kh = 2
kw = 2
in_c = 3
out_c = 3
padding = 0
stride = 1

input_feat = np.random.randint(0, 20, size=(bs, in_c, feat_h, feat_w))
filter = np.random.randint(0, 5, size=(out_c, in_c, kh, kw))
# bias = np.random.randint(0, 5, size=(2, 1))
bias = np.zeros((out_c, 1))

out = convolution(input_feat, filter, kh=kh, kw=kw, stride=stride, padding=padding, bias=bias)

# 與 pytorch 的實現方法對照結果
out_t = F.conv2d(torch.tensor(input_feat), torch.tensor(filter), stride=stride, padding=padding, bias=torch.tensor(bias).squeeze(1))


# print(f'input feat:\n{input_feat}')
# print('======================')
# print(f'filter\n:{filter}')
# print('======================')
print(f'my out:\n{out}')
print(f'torch out:\n{out_t.numpy()}')
np.allclose(out, out_t)

my out:
[[[[238. 236.]
   [233. 181.]]

  [[137. 149.]
   [102. 141.]]

  [[249. 265.]
   [243. 186.]]]


 [[[272. 213.]
   [302. 305.]]

  [[112. 176.]
   [132. 166.]]

  [[309. 278.]
   [284. 241.]]]]
torch out:
[[[[238 236]
   [233 181]]

  [[137 149]
   [102 141]]

  [[249 265]
   [243 186]]]


 [[[272 213]
   [302 305]]

  [[112 176]
   [132 166]]

  [[309 278]
   [284 241]]]]


True

In [None]:
from my_mlp import MLP, ReLU
import torch.nn as nn
nn.Conv2d()

class Conv2d(MLP):
    def __init__(self, 
                 in_channel: int,
                 out_channel: int,
                 kernel_size: tuple,
                 stride: int,
                 padding: int,
                 bias=False
                 ):
        kh = kernel_size[0]
        kw = kernel_size[1]
        
        params_set_list = [(in_channel * kh * kw, out_channel), ReLU]
        self.params = self.weight_init(params_set_list)
        self.velocity = {
            'w': [np.zeros_like(w) for w in self.params['w']],
            'b': [np.zeros_like(b) for b in self.params['b']]
        }
    

    def convolution(input_feat: np.ndarray, filter: np.ndarray, kh, kw, stride=1, padding=0, bias=None):
        '''
        input_feat: (N, C, H, W)
        filter: (out_C, in_C, kH, kw)
        bias: (out_C, 1)
        '''
        N, C, H, W = input_feat.shape
        out_h = int((H - kh + 2 * padding) / stride) + 1
        out_w = int((W - kw + 2 * padding) / stride) + 1
        out_c = filter.shape[0]
        im2col_feat = []

        if padding:
            input_feat = np.pad(input_feat, ((0, 0), (0, 0), (padding, padding), (padding, padding)), 'constant', constant_values=0)

        for ih in range(out_h):
            for iw in range(out_w):
        
                im2col_feat.append(input_feat[:, :, stride * ih:stride * ih + kh, stride * iw:stride * iw + kw])

        im2col = np.array(im2col_feat).reshape(N * out_h * out_w, -1)
        # im2col -> (N*out_h*out_w, C*kh*kw)

        filter = filter.reshape(out_c, -1)
        # filter -> (out_c, C*kh*kw)

        # w @ x.T
        # w -> (out_c, C*kh*kw)
        # x.T -> (C*kh*kw, N*out_h*out_w)
        if isinstance(bias, np.ndarray):
            out_feat = filter @ im2col.T + bias
        else:
            out_feat = filter @ im2col.T

        # out_feat -> (N, out_c, out_h, out_w)
        return out_feat.reshape(N, out_c, out_h, out_w)

    def forward(self, params: dict, X) -> dict:
        forward_saved = {'I': [], 'Y': []}
        # X shape: (N, C, H, W)
        X = self.convolution()
        for idx, (w, b, act_func) in enumerate(zip(params['w'], params['b'], params['act_func'])):
            if idx == 0:
                I = np.matmul(X, w) + b
            else:
                I = np.matmul(Y, w) + b
            Y = act_func.forward(I)
            forward_saved['I'].append(I)
            forward_saved['Y'].append(Y)

        return forward_saved
    

In [43]:
out.reshape(2, -1) + np.array([bias]).T

array([[ 14,  96,  83, 178, 282, 287, 122, 242, 345],
       [ 35, 192, 136, 222, 225, 277, 120, 235, 358]])