# 2. 합성곱 신경망 내부 구조

## 2.3  합성곱/풀링 구현

### 컨볼루션 층 테스트

In [None]:
import sys, os
sys.path.append(os.pardir)
import numpy as np
from common.layers import Convolution

image = np.arange(16).reshape(1,1,4,4)
print(image.shape)
print(image)

W = np.ones((1,1,2,2))
print(W)
b = np.full((1,), 3)
print(b)
conv = Convolution(W, b)
out = conv.forward(image)
print(out.shape)
print(out)

### im2col 함수 테스트

In [14]:
import sys, os
sys.path.append(os.pardir)
import numpy as np
from common.util import im2col

x1 = np.random.rand(1, 3, 7, 7)
col1 = im2col(x1, 5, 5)
print(col1.shape)  # (9, 25) => (N*OH*OW, C*FH*FW)

x2 = np.random.rand(10, 3, 7, 7)
col2 = im2col(x2, 5, 5)
print(col2.shape)  # (90, 75)

x3 = np.random.rand(2, 4, 5, 5)
col2 = im2col(x3, 2, 2)
print(col2.shape)  # (32, 16)

x4 = np.random.rand(1, 1, 4, 4)
col2 = im2col(x4, 2, 2)
print(col2.shape)  # (9,4)

(9, 75)
(90, 75)
(32, 16)
(9, 4)


### np.pad() 의 동작

In [20]:
import numpy as np
pad=0
input_data = np.arange(16).reshape((1,1,4,4))
print(input_data.shape)
print(input_data)
img = np.pad(input_data, [(0,0), (0,0), (pad, pad), (pad, pad)], 'constant')
print(img)

(1, 1, 4, 4)
[[[[ 0  1  2  3]
   [ 4  5  6  7]
   [ 8  9 10 11]
   [12 13 14 15]]]]
[[[[ 0  1  2  3]
   [ 4  5  6  7]
   [ 8  9 10 11]
   [12 13 14 15]]]]


#### 배열 슬라이싱 과 인덱싱의 차이점

In [22]:
a = np.array([1,2,3,4])
print(a[1])    # 2
print(a[1:2])  # [2]

2
[2]


In [28]:
a = np.array([[1,2],
              [3,4]])

print(a.shape[0])
print(len(a))
print(a[1])    # [3,4]
print(a[1:2])  # [[3,4]]

2
2
[3 4]
[[3 4]]


In [31]:
img = np.arange(16).reshape(1,1,4,4)
print(img[:,:,0:3:1,0:3:1].shape)
print(img[:,:,0:3:1,0:3:1])

(1, 1, 3, 3)
[[[[ 0  1  2]
   [ 4  5  6]
   [ 8  9 10]]]]


### input 채널이 1개인 경우

In [44]:
import sys, os
sys.path.append(os.pardir)
import numpy as np
pad=0
input_data = np.arange(16).reshape((1,1,4,4))
img = np.pad(input_data, [(0,0), (0,0), (pad, pad), (pad, pad)], 'constant')
# print(img.shape)
# print(img)


col = np.zeros((1, 1, 2, 2, 3, 3))

# print(img[:, :, 0:3:1, 0:3:1].shape)
# print(col[:, :, 0, 0, :, :].shape)
col[:, :, 0, 0, :, :] = img[:, :, 0:3:1, 0:3:1]
col[:, :, 0, 1, :, :] = img[:, :, 0:3:1, 1:4:1]
col[:, :, 1, 0, :, :] = img[:, :, 1:4:1, 0:3:1]
col[:, :, 1, 1, :, :] = img[:, :, 1:4:1, 1:4:1]

# print(col)
col = col.transpose(0, 4, 5, 1, 2, 3)
# print(col.shape)
# print(col)
col = col.reshape( 1*3*3, -1 )
# print(col.shape)
# print(col)

W = np.ones((1,1,2,2))
print(W)
col_W = W.reshape(1, -1).T
print(col_W.shape)
b = np.full((1,),3)
out = np.dot(col, col_W) + b
print(out)
out = out.reshape(1, 3, 3, -1)
print( out.shape )
print( out )
# out = out.transpose(0, 3, 1, 2)
# print( out.shape )
# print(out)

[[[[1. 1.]
   [1. 1.]]]]
(4, 1)
[[13.]
 [17.]
 [21.]
 [29.]
 [33.]
 [37.]
 [45.]
 [49.]
 [53.]]
(1, 3, 3, 1)
[[[[13.]
   [17.]
   [21.]]

  [[29.]
   [33.]
   [37.]]

  [[45.]
   [49.]
   [53.]]]]


### input 채널이 2개인 경우

In [None]:
import sys, os
sys.path.append(os.pardir)
import numpy as np
pad=0
input_data = np.arange(32).reshape((1,2,4,4))
img = np.pad(input_data, [(0,0), (0,0), (pad, pad), (pad, pad)], 'constant')
col = np.zeros((1, 2, 2, 2, 3, 3))

# print(img.shape)
# # print(img)
# print(col.shape)
# print(img[:, :, 0:3:1, 0:3:1].shape)  # (1, 2, 3, 3)
# print(col[:, :, 0, 0, :, :].shape)
col[:, :, 0, 0, :, :] = img[:, :, 0:3:1, 0:3:1]
col[:, :, 0, 1, :, :] = img[:, :, 0:3:1, 1:4:1]
col[:, :, 1, 0, :, :] = img[:, :, 1:4:1, 0:3:1]
col[:, :, 1, 1, :, :] = img[:, :, 1:4:1, 1:4:1]

# print(col)
col = col.transpose(0, 4, 5, 1, 2, 3)
# print(col.shape)
# print(col)
col = ret.reshape( 1*3*3, -1 )
print(col.shape)
print(col)

W = np.ones((1,2,2,2))
# print(W)
col_W = W.reshape(1, -1).T
# print(col_W.shape)
b = np.full((1,),3)
out = np.dot(col, col_W) 
# print( out )
out = out + b
# print( out )
out = out.reshape(1, 3, 3, -1).transpose(0, 3, 1, 2)
print( out )

### input 그림이 1개, 채널이 2개, 필터가 3개인 경우

In [None]:
import sys, os
sys.path.append(os.pardir)
import numpy as np
pad=0
input_data = np.arange(32).reshape((1,2,4,4))
img = np.pad(input_data, [(0,0), (0,0), (pad, pad), (pad, pad)], 'constant')
col = np.zeros((1, 2, 2, 2, 3, 3))

# print(img.shape)
# # print(img)
# print(col.shape)
# print(img[:, :, 0:3:1, 0:3:1].shape)  # (1, 2, 3, 3)
# print(col[:, :, 0, 0, :, :].shape)
col[:, :, 0, 0, :, :] = img[:, :, 0:3:1, 0:3:1]
col[:, :, 0, 1, :, :] = img[:, :, 0:3:1, 1:4:1]
col[:, :, 1, 0, :, :] = img[:, :, 1:4:1, 0:3:1]
col[:, :, 1, 1, :, :] = img[:, :, 1:4:1, 1:4:1]

# print(col)
col = col.transpose(0, 4, 5, 1, 2, 3)
# print(col.shape)
# print(col)
col = ret.reshape( 1*3*3, -1 )
print(col.shape)
print(col)

W = np.array([[[[1,1],[1,1]],[[1,1],[1,1]]],
              [[[2,2],[2,2]],[[2,2],[2,2]]],
              [[[3,3],[3,3]],[[3,3],[3,3]]]])
# print(W.shape)
col_W = W.reshape(3, -1).T
# print(col_W.shape)
# print(col_W)
b = np.full((1,),3)
out = np.dot(col, col_W) 
# print(out)
out = out + b
# print(out.shape)
out = out.reshape(1, 3, 3, -1).transpose(0, 3, 1, 2)
print( out.shape )
print(out)

### input 그림이 2개, 채널이 2개, 필터가 3개인 경우

In [None]:
import sys, os
sys.path.append(os.pardir)
import numpy as np
pad=0
input_data = np.arange(64).reshape((2,2,4,4))
img = np.pad(input_data, [(0,0), (0,0), (pad, pad), (pad, pad)], 'constant')
col = np.zeros((2, 2, 2, 2, 3, 3))

# print(img.shape)
# print(img)

col[:, :, 0, 0, :, :] = img[:, :, 0:3:1, 0:3:1]
col[:, :, 0, 1, :, :] = img[:, :, 0:3:1, 1:4:1]
col[:, :, 1, 0, :, :] = img[:, :, 1:4:1, 0:3:1]
col[:, :, 1, 1, :, :] = img[:, :, 1:4:1, 1:4:1]

# print(col)
col = col.transpose(0, 4, 5, 1, 2, 3)
# print(col.shape)
# print(col)
col = col.reshape( 2*3*3, -1 )
print(col.shape)
print(col)

W = np.ones((3,2,2,2))
# # # print(W)
W = W.reshape(3,-1).T
# # # print(W)
b = np.full((1,),3)
out_temp = np.dot(col, W) + b
out = out_temp.reshape(2,3,3,-1).transpose(0, 3, 1, 2)
print(out.shape)
print(out)

### 합성곱 연산의 정확한 고찰

In [45]:
import sys, os
sys.path.append(os.pardir)
import numpy as np

filter_h=2
filter_w=2
out_h=3
out_w=3
input_data = np.arange(16).reshape((1,1,4,4))
print(input_data.shape)
col = np.zeros((1, 1,out_h, out_w, filter_h, filter_w))
print(col.shape)

for y in range(out_h):
    y_max = y + filter_h
    for x in range(out_w):
        x_max = x + filter_w
        col[:, :, y, x, :, :] = input_data[:, :, y:y_max, x:x_max]
#           1  1  2 2                      1  1   2        2

print(col)
# ret = col.transpose(0, 4, 5, 1, 2, 3)
# ret1 = ret.reshape( 3*3, -1 )
# print(ret1)

(1, 1, 4, 4)
(1, 1, 3, 3, 2, 2)
[[[[[[ 0.  1.]
     [ 4.  5.]]

    [[ 1.  2.]
     [ 5.  6.]]

    [[ 2.  3.]
     [ 6.  7.]]]


   [[[ 4.  5.]
     [ 8.  9.]]

    [[ 5.  6.]
     [ 9. 10.]]

    [[ 6.  7.]
     [10. 11.]]]


   [[[ 8.  9.]
     [12. 13.]]

    [[ 9. 10.]
     [13. 14.]]

    [[10. 11.]
     [14. 15.]]]]]]


In [47]:
import sys, os
sys.path.append(os.pardir)
import numpy as np

filter_h=2
filter_w=2
out_h=3
out_w=3
input_data = np.arange(16).reshape((1,1,4,4))
print(input_data.shape)
col = np.zeros((1, 1,filter_h, filter_w,out_h, out_w ))
print(col.shape)

for y in range(filter_h):
    y_max = y + out_h
    for x in range(filter_w):
        x_max = x + out_w
        col[:, :, y, x, :, :] = input_data[:, :, y:y_max, x:x_max]
#           1  1  3 3                      1  1   3        3

# print(col)
col = col.transpose(0, 4, 5, 1, 2, 3)
print(col)

(1, 1, 4, 4)
(1, 1, 2, 2, 3, 3)
[[[[[[ 0.  1.]
     [ 4.  5.]]]


   [[[ 1.  2.]
     [ 5.  6.]]]


   [[[ 2.  3.]
     [ 6.  7.]]]]



  [[[[ 4.  5.]
     [ 8.  9.]]]


   [[[ 5.  6.]
     [ 9. 10.]]]


   [[[ 6.  7.]
     [10. 11.]]]]



  [[[[ 8.  9.]
     [12. 13.]]]


   [[[ 9. 10.]
     [13. 14.]]]


   [[[10. 11.]
     [14. 15.]]]]]]


In [None]:
import numpy as np
def my_im2col(input_data, filter_h, filter_w, stride=1, pad=0):
    N, C, H, W = input_data.shape
    out_h = (H + 2*pad - filter_h)//stride + 1
    out_w = (W + 2*pad - filter_w)//stride + 1

    img = np.pad(input_data, [(0,0), (0,0), (pad, pad), (pad, pad)], 'constant')
    col = np.zeros((N, C, filter_h, filter_w, out_h, out_w))

    for y in range(filter_h):
        y_max = y + stride*out_h
        for x in range(filter_w):
            x_max = x + stride*out_w
            col[:, :, y, x, :, :] = img[:, :, y:y_max:stride, x:x_max:stride]

#     print(col)
    col = col.transpose(0, 4, 5, 1, 2, 3).reshape(N*out_h*out_w, -1)
    return col

In [None]:
# input_data = np.arange(9).reshape((1,1,3,3))  
# input_data = np.arange(18).reshape((2,1,3,3))            
# input_data = np.arange(18).reshape((1,2,3,3))            
# input_data = np.arange(54).reshape((2,3,3,3))    
input_data = np.random.randn(1,3,448,640)
my_col = my_im2col(input_data, 5, 5)            
print(my_col.shape)   # (282384, 75)     

### Convolution test

In [None]:
import sys, os
sys.path.append(os.pardir)  # 부모 디렉터리의 파일을 가져올 수 있도록 설정
import numpy as np
from common.layers import *

w = np.ones((1,1,2,2))
# print(w)
b = np.full((1,), 3)
# print(b)
conv = Convolution(w, b)

x = np.arange(9).reshape((1,1,3,3))
out = conv.forward(x)
print(out)

In [None]:
import sys, os
sys.path.append(os.pardir)  # 부모 디렉터리의 파일을 가져올 수 있도록 설정
import numpy as np
from common.layers import *

w = np.ones((1,1,2,2))
# print(w)
b = np.full((1,), 3)
# print(b)
conv = Convolution(w, b)

x = np.arange(16).reshape((1,1,4,4))
out = conv.forward(x)
print(out)

In [None]:
import sys, os
sys.path.append(os.pardir)  # 부모 디렉터리의 파일을 가져올 수 있도록 설정
import numpy as np
from common.layers import *

w = np.ones((1,1,2,2))
# print(w)
b = np.full((1,), 3)
# print(b)
conv = Convolution(w, b, pad=1)

x = np.arange(9).reshape((1,1,3,3))
out = conv.forward(x)
print(out)

### 합성곱 미분

In [None]:
def my_col2im(col, input_shape, filter_h, filter_w, stride=1, pad=0):
    N, C, H, W = input_shape
    out_h = (H + 2*pad - filter_h)//stride + 1
    out_w = (W + 2*pad - filter_w)//stride + 1
    col = col.reshape(N, out_h, out_w, C, filter_h, filter_w)
    print(col.shape)
    print(col)
    col = col.transpose(0, 3, 4, 5, 1, 2)
#     print(col.shape)
#     print(col)

    img = np.zeros((N, C, H + 2*pad + stride - 1, W + 2*pad + stride - 1))
    for y in range(filter_h):
        y_max = y + stride*out_h
        for x in range(filter_w):
            x_max = x + stride*out_w
            img[:, :, y:y_max:stride, x:x_max:stride] += col[:, :, y, x, :, :]

    return img[:, :, pad:H + pad, pad:W + pad]

In [None]:
col = np.array([[0,1,4,5],
                [1,2,5,6],
                [2,3,6,7],
                [4,5,8,9],
                [5,6,9,10],
                [6,7,10,11],
                [7,9,12,13], 
                [9,10,13,14], 
                [10,11,14,15]])
dout = np.arange(9).reshape(1,1,3,3)
print(dout)
W = np.ones((1,1,2,2))
col_W = np.ones((4,1))
FN, C, FH, FW = W.shape
dout = dout.transpose(0,2,3,1).reshape(-1, FN)
# print(dout)
db = np.sum(dout, axis=0)
# print(db)
dW = np.dot(col.T, dout)
# print(dW)
dW = dW.transpose(1, 0).reshape(FN, C, FH, FW)
# print(dW)
dcol = np.dot(dout, col_W.T)
# print(dcol)
dx=my_col2im(dcol, (1,1,4,4), 2, 2)
print(dx)

In [None]:
import sys, os
sys.path.append(os.pardir)
import numpy as np
from common.util import im2col, col2im

x1 = np.arange(16).reshape((1,1,4,4))
col = im2col(x1, 2, 2)
print(x1)
print(col)

x2 = my_col2im(col,x1.shape, 2, 2)
print(x2)

### 풀링 소스 분석

In [None]:
import sys, os
sys.path.append(os.pardir)
import numpy as np
from common.util import im2col
from common.util import col2im
from common.layers import Convolution

class MyPooling:
    def __init__(self, pool_h, pool_w, stride=1, pad=0):
        self.pool_h = pool_h
        self.pool_w = pool_w
        self.stride = stride
        self.pad = pad
        
        self.x = None
        self.arg_max = None

    def forward(self, x):
        N, C, H, W = x.shape
        out_h = int(1 + (H - self.pool_h) / self.stride)
        out_w = int(1 + (W - self.pool_w) / self.stride)

        col = my_im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)
        print('col', col)
        col = col.reshape(-1, self.pool_h*self.pool_w)
        print('col', col)

        arg_max = np.argmax(col, axis=1)
        print('arg_max', arg_max)
        out = np.max(col, axis=1)
        print('out', out)
        out = out.reshape(N, out_h, out_w, C)
        print('out', out)
        out = out.transpose(0, 3, 1, 2)
#         print('out', out)

        self.x = x
        self.arg_max = arg_max

        return out

    def backward(self, dout):
        dout = dout.transpose(0, 2, 3, 1)
        
        pool_size = self.pool_h * self.pool_w
        dmax = np.zeros((dout.size, pool_size))
        dmax[np.arange(self.arg_max.size), self.arg_max.flatten()] = dout.flatten()
        print('dmax', dmax)
        dmax = dmax.reshape(dout.shape + (pool_size,)) 
        
        dcol = dmax.reshape(dmax.shape[0] * dmax.shape[1] * dmax.shape[2], -1)
        print('dcol', dcol)
        dx = my_col2im(dcol, self.x.shape, self.pool_h, self.pool_w, self.stride, self.pad)
        
        return dx

### 채널이 1개인 경우 맥스 풀링 테스트

In [None]:
x = np.array([[[[ 7, 11, 13, 15],
                [ 3,  4,  2,  3],
                [ 1,  2, 17,  9],
                [ 1,  8,  3, 10]]]])
print(x)
pool = MyPooling(2,2,2)
out = pool.forward(x)
print(out)

### 채널이 2개인 경우 맥스 풀링 테스트

In [None]:
x = np.array([[[[ 7, 11, 13, 15],
                [ 3,  4,  2,  3],
                [ 1,  2, 17,  9],
                [ 1,  8,  3, 10]],
               [[ 1,  5, 13,  6],
                [ 9,  4,  8,  7],
                [ 2, 10,  5,  9],
                [ 3,  7,  3, 16]]]])
print(x)
pool = MyPooling(2,2,2)
out = pool.forward(x)
print(out)

In [None]:
dout = np.arange(1,9).reshape(1,2,2,2)
# print(dout.shape)
dx = pool.backward(dout)
print(dx)