In [None]:
import numpy as np

samples = np.random.randint(0, 256,(10, 3, 28, 28)) # N x C x H x W
samples.shape

(10, 3, 28, 28)

In [None]:
import numpy as np

def padding(input_data, pad=0):
    _, _, h, w = input_data.shape
    padded_data = []
    
    for data in input_data:
        temp_data = []
        for c_data in data:
            padded_c_data = np.zeros((h + 2*pad, w + 2*pad))
            padded_c_data[pad:pad+h, pad:pad+w] = c_data
            temp_data.append(list(padded_c_data))

        padded_data.append(temp_data)

    return np.array(padded_data)

def im2col(input_data, filter_h, filter_w, stride=1, pad=0):
    input_data = padding(input_data, pad=pad)
    
    data_h = input_data.shape[2]
    data_w = input_data.shape[3]
    
    cols = []
    
    end_w_index = data_w - filter_w
    end_h_index = data_h - filter_h
    
    for data in input_data:
        for h_index in range(0, end_h_index+1, stride):
            for w_index in range(0, end_w_index+1, stride):
                feature_map = data[:, h_index:h_index+filter_h, w_index:w_index+filter_w]
                cols.append(list(feature_map.reshape(-1,)))

    return np.array(cols)

def col2im(cols, input_shape, filter_h, filter_w, stride=1, pad=0):
    data_n, data_c, data_h, data_w = input_shape
    output_h = int((data_h + 2 * pad - filter_h) // stride + 1)
    output_w = int((data_w + 2 * pad - filter_w) // stride + 1)

    img = np.zeros((data_n, data_c, data_h + 2 * pad, data_w + 2 * pad))

    cols_reshaped = cols.reshape(data_n, output_h, output_w, data_c, filter_h, filter_w).transpose(0, 3, 4, 5, 1, 2)

    for h_index in range(filter_h):
        h_end = h_index + stride * output_h
        for w_index in range(filter_w):
            w_end = w_index + stride * output_w
            img[:, :, h_index:h_end:stride, w_index:w_end:stride] += cols_reshaped[:, :, h_index, w_index, :, :]

    return img[:, :, pad:pad + data_h, pad:pad + data_w]


class Convolution:
    def __init__(self, W, b, stride=1, pad=0):
        self.W = W              # (N x C x H x W)
        self.b = b
        self.stride = stride
        self.pad = pad
        
    def forward(self, x):
        filter_num, c, filter_h, filter_w = self.W.shape
        data_num, _, h, w = x.shape
        
        output_h = int((h + 2*self.pad - filter_h) / self.stride + 1)
        output_w = int((w + 2*self.pad - filter_w) / self.stride + 1)
        
        cols_data = im2col(x, filter_h, filter_w, stride=self.stride, pad=self.pad)
        print(f"shape of cols_img : {cols_data.shape}")
        cols_W = self.W.reshape(filter_num, -1)
        print(f"shape of cols_W : {cols_W.T.shape}")
        output = np.dot(cols_data, cols_W.T) + self.b
        print(f"shape of output : {output.shape}")
        
        return output.T.reshape(data_num, filter_num, output_h, output_w)
    
    def backward(self, dout):
        grads = {}
        grads["W"] = np.dot(self.X.T, dout)
        grads["X"] = np.dot(dout, self.W.T)
        grads["b"] = np.sum(dout, axis=0)

In [None]:
import numpy as np

class Pooling:
    def __init__(self, pooling_h=2, pooling_w=2, stride=2, pad=0):
        self.pooling_h = pooling_h
        self.pooling_w = pooling_w
        self.stride = stride
        self.pad = pad
        self.OH = None
        self.OW = None
        
    def forward(self, x):
        data_n, data_c, data_h, data_w = x.shape
        output_h = int((data_h + 2*self.pad - self.pooling_h) / self.stride + 1)
        self.OH = output_h
        output_w = int((data_w + 2*self.pad - self.pooling_w) / self.stride + 1)
        self.OW = output_w
        
        cols = im2col(x, self.pooling_h, self.pooling_w, stride=self.stride, pad=self.pad)
        cols = cols.reshape(-1, self.pooling_h*self.pooling_w)
        pooling_res = np.max(cols, axis=1)
        pooling_res = pooling_res.reshape(data_n, output_h, output_w, data_c).transpose(0, 3, 1, 2)
        
        return pooling_res
    
    def backward(self, dout):
        dout = dout.transpose(0, 2, 3, 1).flatten()

        dcol = np.zeros_like(self.col)
        dcol[np.arange(len(self.arg_max)), self.arg_max] = dout

        dcol = dcol.reshape(self.col.shape[0], -1)
        return col2im(dcol, self.x.shape, self.pool_h, self.pool_w, self.stride, self.pad)

In [None]:
import numpy as np
from collections import OrderedDict
from layer import Affine, ReLU, Softmax_with_Loss

def padding(input_data, pad=0):
    _, _, h, w = input_data.shape
    padded_data = []
    
    for data in input_data:
        temp_data = []
        for c_data in data:
            padded_c_data = np.zeros((h + 2*pad, w + 2*pad))
            padded_c_data[pad:pad+h, pad:pad+w] = c_data
            temp_data.append(list(padded_c_data))

        padded_data.append(temp_data)

    return np.array(padded_data)

def im2col(input_data, filter_h, filter_w, stride=1, pad=0):
    input_data = padding(input_data, pad=pad)
    
    data_h = input_data.shape[2]
    data_w = input_data.shape[3]
    
    cols = []
    
    end_w_index = data_w - filter_w
    end_h_index = data_h - filter_h
    
    for data in input_data:
        for h_index in range(0, end_h_index+1, stride):
            for w_index in range(0, end_w_index+1, stride):
                feature_map = data[:, h_index:h_index+filter_h, w_index:w_index+filter_w]
                cols.append(list(feature_map.reshape(-1,)))

    return np.array(cols)

def col2im(cols, input_shape, filter_h, filter_w, stride=1, pad=0):
    data_n, data_c, data_h, data_w = input_shape
    output_h = int((data_h + 2 * pad - filter_h) // stride + 1)
    output_w = int((data_w + 2 * pad - filter_w) // stride + 1)

    img = np.zeros((data_n, data_c, data_h + 2 * pad, data_w + 2 * pad))

    cols_reshaped = cols.reshape(data_n, output_h, output_w, data_c, filter_h, filter_w).transpose(0, 3, 4, 5, 1, 2)

    for h_index in range(filter_h):
        h_end = h_index + stride * output_h
        for w_index in range(filter_w):
            w_end = w_index + stride * output_w
            img[:, :, h_index:h_end:stride, w_index:w_end:stride] += cols_reshaped[:, :, h_index, w_index, :, :]

    return img[:, :, pad:pad + data_h, pad:pad + data_w]

class Convolution:
    def __init__(self, W, b, stride=1, pad=0):
        self.W = W              # (N x C x H x W)
        self.b = b
        self.stride = stride
        self.pad = pad
        
    def forward(self, x):
        filter_num, filter_c, filter_h, filter_w = self.W.shape
        data_num, _, h, w = x.shape
        
        output_h = int((h + 2*self.pad - filter_h) / self.stride + 1)
        output_w = int((w + 2*self.pad - filter_w) / self.stride + 1)
        
        cols_data = im2col(x, filter_h, filter_w, stride=self.stride, pad=self.pad)
        print(f"shape of cols_img : {cols_data.shape}")
        cols_W = self.W.reshape(filter_num, -1)
        print(f"shape of cols_W : {cols_W.T.shape}")
        output = np.dot(cols_data, cols_W.T) + self.b
        print(f"shape of output : {output.shape}")
        
        return output.T.reshape(data_num, filter_num, output_h, output_w)
    
    def backward(self, dout):
        FN, C, FH, FW = self.W.shape
        dout = dout.transpose(0, 2, 3, 1).reshape(-1, FN)  # (N*out_h*out_w, FN)

        self.db = np.sum(dout, axis=0)
        self.dW = np.dot(self.col.T, dout)  # (C*FH*FW, FN)
        self.dW = self.dW.transpose(1, 0).reshape(FN, C, FH, FW)

        dcol = np.dot(dout, self.col_W.T)  # (N*out_h*out_w, C*FH*FW)
        dx = col2im(dcol, self.x.shape, FH, FW, self.stride, self.pad)

        return dx
        
class Pooling:
    def __init__(self, pooling_h=2, pooling_w=2, stride=2, pad=0):
        self.pooling_h = pooling_h
        self.pooling_w = pooling_w
        self.stride = stride
        self.pad = pad
        self.OH = None
        self.OW = None
        
    def forward(self, x):
        data_n, data_c, data_h, data_w = x.shape
        output_h = int((data_h + 2*self.pad - self.pooling_h) / self.stride + 1)
        self.OH = output_h
        output_w = int((data_w + 2*self.pad - self.pooling_w) / self.stride + 1)
        self.OW = output_w
        
        cols = im2col(x, self.pooling_h, self.pooling_w, stride=self.stride, pad=self.pad)
        cols = cols.reshape(-1, self.pooling_h*self.pooling_w)
        pooling_res = np.max(cols, axis=1)
        pooling_res = pooling_res.reshape(data_n, output_h, output_w, data_c).transpose(0, 3, 1, 2)
        
        return pooling_res
    
    def backward(self, dout):
        dout = dout.transpose(0, 2, 3, 1).flatten()

        dcol = np.zeros_like(self.col)
        dcol[np.arange(len(self.arg_max)), self.arg_max] = dout

        dcol = dcol.reshape(self.col.shape[0], -1)
        return col2im(dcol, self.x.shape, self.pool_h, self.pool_w, self.stride, self.pad)
    
class SimpleConvNet:
    def __init__(self, input_dim, conv_param, hidden_size=100, output_size=10, weight_init_std=0.001):
        self.data_c = input_dim[0]
        self.data_h = input_dim[1]
        self.data_w = input_dim[2]
        
        self.filter_num = conv_param["filter_num"]
        self.filter_h = conv_param["filter_size"][0]
        self.filter_w = conv_param["filter_size"][1]
        
        self.stride = conv_param["stride"]
        self.pad = conv_param["pad"]
        
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.weight_init_std = weight_init_std
        
        self.conv_output_h = None
        self.conv_output_w = None
        
        self.pooling_output_h = None
        self.pooling_output_w = None
        
        self.params = {}
        self.layers = OrderedDict()
        
        # input : N x C x H x W
        # output : N x FN x OH x OW
        # OH = (H + 2*pad - filter_h)/stride + 1
        # OW = (W + 2*pad - filter_w)/stride + 1
        self.params["W1"] = np.random.randn(self.filter_num, self.data_c, self.filter_h, self.filter_w) * self.weight_init_std
        self.params["B1"] = np.random.randn(self.filter_num) * self.weight_init_std
        self.layers["Conv"] = Convolution(self.params["W1"], self.params["B1"], stride=self.stride, pad=self.pad)
        self.conv_output_h = int((self.data_h + 2*self.pad - self.filter_h)/self.stride + 1)
        self.conv_output_w = int((self.data_w + 2*self.pad - self.filter_w)/self.stride + 1)
        
        self.layers["ReLU1"] = ReLU()
        
        # input : N x FN x OH x OW
        # output : N x FN x Pooling_OH x Pooling_OW
        # Pooling_OH = (OH + 2*pad - pooling_h)/stride + 1 -> (OH - 2)/2 + 1 
        # Pooling_OW = (OW + 2*pad - pooling_h)/stride + 1
        pooling_h = 2
        pooling_w = 2
        pooling_stride = 2
        pooling_pad = 0
        self.layers["Pooling"] = Pooling(pooling_h, pooling_w, pooling_stride, pooling_pad)
        self.pooling_output_h = int((self.conv_output_h + 2*pooling_pad - pooling_h)/pooling_stride + 1)
        self.pooling_output_w = int((self.conv_output_w + 2*pooling_pad - pooling_w)/pooling_stride + 1)
        
        # input : N x FN x OH x OW
        # output : N x FN x Pooling_OH x Pooling_OW
        # OH = (OH + 2*pad - pooling_h)/stride + 1
        # OW = (OW + 2*pad - pooling_h)/stride + 1
        feature_size = self.filter_num*self.pooling_output_h*self.pooling_output_w
        self.params["W2"] = np.random.randn(feature_size, self.hidden_size) * self.weight_init_std
        self.params["B2"] = np.random.randn(self.hidden_size) * self.weight_init_std
        self.layers["Affine1"] = Affine(self.params["W2"], self.params["B2"])
        self.layers["ReLU2"] = ReLU()
        
        
        self.params["W3"] = np.random.randn(self.hidden_size, self.output_size) * self.weight_init_std
        self.params["B3"] = np.random.randn(self.output_size)
        self.layers["Affine2"] = Affine(self.params["W3"], self.params["B3"])
        
        self.last_layer = Softmax_with_Loss()
    
    def predict(self, x):
        data_n = x.shape[0]
        f = self.layers["Conv"].forward(x)
        f = self.layers["ReLU1"].forward(f)
        f = self.layers["Pooling"].forward(f)
        f = f.reshape(data_n, -1)
        h = self.layers["Affine1"].forward(f)
        h = self.layers["ReLU2"].forward(h)
        y = self.layers["Affine2"].forward(h)
        
        return y
    
    def loss(self, x, t):
        y = self.predict(x)
        loss = self.last_layer.forward(y, t)
        return loss
    
    def gradient(self, x, t):
        grads = {}
    
        # Forward pass 저장용
        self.loss(x, t)  # 내부적으로 forward 수행됨
    
        # Backward from Softmax
        dout = self.last_layer.backward()
    
        # Affine2 backward
        temp_grads = self.layers["Affine2"].backward(dout)
        grads["W3"] = temp_grads["W"]
        grads["B3"] = temp_grads["b"]
        dout = temp_grads["X"]
    
        # ReLU2 backward
        dout = self.layers["ReLU2"].backward(dout)
    
        # Affine1 backward
        temp_grads = self.layers["Affine1"].backward(dout)
        grads["W2"] = temp_grads["W"]
        grads["B2"] = temp_grads["b"]
        dout = temp_grads["X"]
    
        # Reshape to N x C x H x W before Pooling backward
        dout = dout.reshape(-1, self.filter_num, self.pooling_output_h, self.pooling_output_w)
    
        # Pooling backward
        dout = self.layers["Pooling"].backward(dout)
    
        # ReLU1 backward
        dout = self.layers["ReLU1"].backward(dout)
    
        # Conv backward
        temp_grads = self.layers["Conv"].backward(dout)
        grads["W1"] = temp_grads["W"]
        grads["B1"] = temp_grads["b"]
    
        return grads

In [262]:
# 배치 사이즈 10, 채널 3, 높이 28, 너비 28
x = np.random.rand(10, 3, 28, 28)

conv_param = {
    "filter_num": 16,
    "filter_size": (3, 3),
    "stride": 1,
    "pad": 0  # padding 1 → 크기 유지
}

In [263]:
network = SimpleConvNet(input_dim=x.shape, conv_param=conv_param, hidden_size=100, output_size=10)

In [None]:
out = network.predict(x)
print("출력 shape:", out.shape)

shape of cols_img : (6760, 27)
shape of cols_W : (27, 16)
shape of output : (6760, 16)
출력 shape: (10, 10)


: 

In [252]:
np.zeros((10, 16, 13, 13)).reshape(10, -1).shape

(10, 2704)

In [None]:
import numpy as np

x = np.arange(2*3*5*5).reshape(2, 3, 5, 5)
x.reshape(-1, )

array([[[[  0,   1,   2,   3,   4],
         [  5,   6,   7,   8,   9],
         [ 10,  11,  12,  13,  14],
         [ 15,  16,  17,  18,  19],
         [ 20,  21,  22,  23,  24]],

        [[ 25,  26,  27,  28,  29],
         [ 30,  31,  32,  33,  34],
         [ 35,  36,  37,  38,  39],
         [ 40,  41,  42,  43,  44],
         [ 45,  46,  47,  48,  49]],

        [[ 50,  51,  52,  53,  54],
         [ 55,  56,  57,  58,  59],
         [ 60,  61,  62,  63,  64],
         [ 65,  66,  67,  68,  69],
         [ 70,  71,  72,  73,  74]]],


       [[[ 75,  76,  77,  78,  79],
         [ 80,  81,  82,  83,  84],
         [ 85,  86,  87,  88,  89],
         [ 90,  91,  92,  93,  94],
         [ 95,  96,  97,  98,  99]],

        [[100, 101, 102, 103, 104],
         [105, 106, 107, 108, 109],
         [110, 111, 112, 113, 114],
         [115, 116, 117, 118, 119],
         [120, 121, 122, 123, 124]],

        [[125, 126, 127, 128, 129],
         [130, 131, 132, 133, 134],
         [135, 1