# 2. 합성곱 신경망 내부 구조

## 2.3  합성곱/풀링 구현

### 컨볼루션 층 테스트

In [1]:
import sys, os
sys.path.append(os.pardir)
import numpy as np
from common.layers import Convolution

image = np.arange(16).reshape(1,1,4,4)
print(image.shape)
print(image)

W = np.ones((1,1,2,2))
print(W)
b = np.full((1,), 3)
print(b)
conv = Convolution(W, b)
out = conv.forward(image)
print(out.shape)
print(out)

(1, 1, 4, 4)
[[[[ 0  1  2  3]
   [ 4  5  6  7]
   [ 8  9 10 11]
   [12 13 14 15]]]]
[[[[1. 1.]
   [1. 1.]]]]
[3]
(1, 1, 3, 3)
[[[[13. 17. 21.]
   [29. 33. 37.]
   [45. 49. 53.]]]]


### im2col 함수 테스트

In [2]:
import sys, os
sys.path.append(os.pardir)
import numpy as np
from common.util import im2col

x1 = np.random.rand(1, 3, 7, 7)
col1 = im2col(x1, 5, 5)
print(col1.shape)

x2 = np.random.rand(10, 3, 7, 7)
col2 = im2col(x2, 5, 5)
print(col2.shape)

x3 = np.random.rand(2, 4, 5, 5)
col2 = im2col(x3, 2, 2)
print(col2.shape)

(9, 75)
(90, 75)
(32, 16)


### np.pad() 의 동작

In [3]:
import numpy as np
pad=1
input_data = np.arange(9).reshape((1,1,3,3))
print(input_data)
img = np.pad(input_data, [(0,0), (0,0), (pad, pad), (pad, pad)], 'constant')
print(img)

[[[[0 1 2]
   [3 4 5]
   [6 7 8]]]]
[[[[0 0 0 0 0]
   [0 0 1 2 0]
   [0 3 4 5 0]
   [0 6 7 8 0]
   [0 0 0 0 0]]]]


### input 채널이 1개인 경우

In [4]:
import sys, os
sys.path.append(os.pardir)
import numpy as np
pad=0
input_data = np.array([[[[0,1,2],
                         [3,4,5],
                         [6,7,8]]]])
img = np.pad(input_data, [(0,0), (0,0), (pad, pad), (pad, pad)], 'constant')
col = np.zeros((1, 1, 2, 2, 2, 2))

# print(img.shape)
# print(img)
# print(col.shape)
# print(img[:, :, 0:2:1, 0:2:1])
# print(img[:, :, 0:2:1, 0:2:1].shape)
# print(col[:, :, 0, 0, :, :].shape)
col[:, :, 0, 0, :, :] = img[:, :, 0:2:1, 0:2:1]
col[:, :, 0, 1, :, :] = img[:, :, 0:2:1, 1:3:1]
col[:, :, 1, 0, :, :] = img[:, :, 1:3:1, 0:2:1]
col[:, :, 1, 1, :, :] = img[:, :, 1:3:1, 1:3:1]

# print(col)
ret = col.transpose(0, 4, 5, 1, 2, 3)
# print(ret.shape)
# print(ret)
col = ret.reshape( 1*2*2, -1 )
print(col.shape)
print(col)

W = np.ones((1,1,2,2))
print(W)
col_W = W.reshape(1, -1).T
print(col_W.shape)
b = np.full((1,),3)
out = np.dot(col, col_W) + b
out = out.reshape(1, 2, 2, -1).transpose(0, 3, 1, 2)
print( out )

(4, 4)
[[0. 1. 3. 4.]
 [1. 2. 4. 5.]
 [3. 4. 6. 7.]
 [4. 5. 7. 8.]]
[[[[1. 1.]
   [1. 1.]]]]
(4, 1)
[[[[11. 15.]
   [23. 27.]]]]


### input 채널이 2개인 경우

In [5]:
import sys, os
sys.path.append(os.pardir)
import numpy as np
pad=0
input_data = np.arange(18).reshape((1,2,3,3))
img = np.pad(input_data, [(0,0), (0,0), (pad, pad), (pad, pad)], 'constant')
col = np.zeros((1, 2, 2, 2, 2, 2))

# print(img.shape)
# print(img)
# print(col.shape)
# print(img[:, :, 0:2:1, 0:2:1].shape)  # (1, 2, 2, 2)
# print(col[:, :, 0, 0, :, :].shape)
col[:, :, 0, 0, :, :] = img[:, :, 0:2:1, 0:2:1]
col[:, :, 0, 1, :, :] = img[:, :, 0:2:1, 1:3:1]
col[:, :, 1, 0, :, :] = img[:, :, 1:3:1, 0:2:1]
col[:, :, 1, 1, :, :] = img[:, :, 1:3:1, 1:3:1]

# print(col)
ret = col.transpose(0, 4, 5, 1, 2, 3)
# # print(ret.shape)
# # print(ret)
col = ret.reshape( 1*2*2, -1 )
print(col.shape)
print(col)

W = np.ones((1,2,2,2))
# print(W)
col_W = W.reshape(1, -1).T
print(col_W.shape)
b = np.full((1,),3)
out = np.dot(col, col_W) + b
out = out.reshape(1, 2, 2, -1).transpose(0, 3, 1, 2)
print( out )

(4, 8)
[[ 0.  1.  3.  4.  9. 10. 12. 13.]
 [ 1.  2.  4.  5. 10. 11. 13. 14.]
 [ 3.  4.  6.  7. 12. 13. 15. 16.]
 [ 4.  5.  7.  8. 13. 14. 16. 17.]]
(8, 1)
[[[[55. 63.]
   [79. 87.]]]]


### input 그림이 1개, 채널이 2개, 필터가 3개인 경우

In [6]:
import sys, os
sys.path.append(os.pardir)
import numpy as np
pad=0
input_data = np.arange(18).reshape((1,2,3,3))
img = np.pad(input_data, [(0,0), (0,0), (pad, pad), (pad, pad)], 'constant')
col = np.zeros((1, 2, 2, 2, 2, 2))

# print(img.shape)
# print(img)
# print(col.shape)
# print(img[:, :, 0:2:1, 0:2:1].shape)  # (1, 2, 2, 2)
# print(col[:, :, 0, 0, :, :].shape)
col[:, :, 0, 0, :, :] = img[:, :, 0:2:1, 0:2:1]
col[:, :, 0, 1, :, :] = img[:, :, 0:2:1, 1:3:1]
col[:, :, 1, 0, :, :] = img[:, :, 1:3:1, 0:2:1]
col[:, :, 1, 1, :, :] = img[:, :, 1:3:1, 1:3:1]

# print(col)
ret = col.transpose(0, 4, 5, 1, 2, 3)
# # print(ret.shape)
# # print(ret)
col = ret.reshape( 1*2*2, -1 )
print(col.shape)
# print(col)

W = np.ones((3,2,2,2))
# print(W)
col_W = W.reshape(3, -1).T
print(col_W.shape)
b = np.full((1,),3)
out = np.dot(col, col_W) + b
print(out.shape)
out = out.reshape(1, 2, 2, -1).transpose(0, 3, 1, 2)
print( out.shape )
print(out)

(4, 8)
(8, 3)
(4, 3)
(1, 3, 2, 2)
[[[[55. 63.]
   [79. 87.]]

  [[55. 63.]
   [79. 87.]]

  [[55. 63.]
   [79. 87.]]]]


### input 그림이 2개, 채널이 2개, 필터가 3개인 경우

In [7]:
import sys, os
sys.path.append(os.pardir)
import numpy as np
pad=0
input_data = np.arange(36).reshape((2,2,3,3))
img = np.pad(input_data, [(0,0), (0,0), (pad, pad), (pad, pad)], 'constant')
col = np.zeros((2, 2, 2, 2, 2, 2))

# print(img.shape)
# print(img)
# print(col.shape)
# print(img[:, :, 0:2:1, 0:2:1])
# print(col)
col[:, :, 0, 0, :, :] = img[:, :, 0:2:1, 0:2:1]
col[:, :, 0, 1, :, :] = img[:, :, 0:2:1, 1:3:1]
col[:, :, 1, 0, :, :] = img[:, :, 1:3:1, 0:2:1]
col[:, :, 1, 1, :, :] = img[:, :, 1:3:1, 1:3:1]

# print(col)
ret = col.transpose(0, 4, 5, 1, 2, 3)
# print(ret.shape)
# print(ret)
ret1 = ret.reshape( 2*2*2, -1 )
# # print(ret1.shape)
print(ret1)

W = np.ones((3,2,2,2))
# # # print(W)
W = W.reshape(3,-1).T
# # # print(W)
out_temp = np.dot(ret1, W) 
out = out_temp.reshape(2, 2, 2, -1).transpose(0, 3, 1, 2)
print(out.shape)

[[ 0.  1.  3.  4.  9. 10. 12. 13.]
 [ 1.  2.  4.  5. 10. 11. 13. 14.]
 [ 3.  4.  6.  7. 12. 13. 15. 16.]
 [ 4.  5.  7.  8. 13. 14. 16. 17.]
 [18. 19. 21. 22. 27. 28. 30. 31.]
 [19. 20. 22. 23. 28. 29. 31. 32.]
 [21. 22. 24. 25. 30. 31. 33. 34.]
 [22. 23. 25. 26. 31. 32. 34. 35.]]
(2, 3, 2, 2)


### 합성곱 연산의 정확한 고찰

In [8]:
import sys, os
sys.path.append(os.pardir)
import numpy as np

filter_h=2
filter_w=2
out_h=3
out_w=3
input_data = np.arange(16).reshape((1,1,4,4))
print(input_data.shape)
col = np.zeros((1, 1,out_h, out_w, filter_h, filter_w))
print(col.shape)

for y in range(out_h):
    y_max = y + filter_h
    for x in range(out_w):
        x_max = x + filter_w
        col[:, :, y, x, :, :] = input_data[:, :, y:y_max, x:x_max]
#           1  1  2 2                      1  1   2        2

print(col)
# ret = col.transpose(0, 4, 5, 1, 2, 3)
# ret1 = ret.reshape( 3*3, -1 )
# print(ret1)

(1, 1, 4, 4)
(1, 1, 3, 3, 2, 2)
[[[[[[ 0.  1.]
     [ 4.  5.]]

    [[ 1.  2.]
     [ 5.  6.]]

    [[ 2.  3.]
     [ 6.  7.]]]


   [[[ 4.  5.]
     [ 8.  9.]]

    [[ 5.  6.]
     [ 9. 10.]]

    [[ 6.  7.]
     [10. 11.]]]


   [[[ 8.  9.]
     [12. 13.]]

    [[ 9. 10.]
     [13. 14.]]

    [[10. 11.]
     [14. 15.]]]]]]


In [9]:
import sys, os
sys.path.append(os.pardir)
import numpy as np

filter_h=2
filter_w=2
out_h=3
out_w=3
input_data = np.arange(16).reshape((1,1,4,4))
print(input_data.shape)
col = np.zeros((1, 1,filter_h, filter_w,out_h, out_w ))
print(col.shape)

for y in range(filter_h):
    y_max = y + out_h
    for x in range(filter_w):
        x_max = x + out_w
        col[:, :, y, x, :, :] = input_data[:, :, y:y_max, x:x_max]
#           1  1  3 3                      1  1   3        3

print(col)
col = col.transpose(0, 4, 5, 1, 2, 3)
print(col)

(1, 1, 4, 4)
(1, 1, 2, 2, 3, 3)
[[[[[[ 0.  1.  2.]
     [ 4.  5.  6.]
     [ 8.  9. 10.]]

    [[ 1.  2.  3.]
     [ 5.  6.  7.]
     [ 9. 10. 11.]]]


   [[[ 4.  5.  6.]
     [ 8.  9. 10.]
     [12. 13. 14.]]

    [[ 5.  6.  7.]
     [ 9. 10. 11.]
     [13. 14. 15.]]]]]]
[[[[[[ 0.  1.]
     [ 4.  5.]]]


   [[[ 1.  2.]
     [ 5.  6.]]]


   [[[ 2.  3.]
     [ 6.  7.]]]]



  [[[[ 4.  5.]
     [ 8.  9.]]]


   [[[ 5.  6.]
     [ 9. 10.]]]


   [[[ 6.  7.]
     [10. 11.]]]]



  [[[[ 8.  9.]
     [12. 13.]]]


   [[[ 9. 10.]
     [13. 14.]]]


   [[[10. 11.]
     [14. 15.]]]]]]


In [10]:
import numpy as np
def my_im2col(input_data, filter_h, filter_w, stride=1, pad=0):
    N, C, H, W = input_data.shape
    out_h = (H + 2*pad - filter_h)//stride + 1
    out_w = (W + 2*pad - filter_w)//stride + 1

    img = np.pad(input_data, [(0,0), (0,0), (pad, pad), (pad, pad)], 'constant')
    col = np.zeros((N, C, filter_h, filter_w, out_h, out_w))

    for y in range(filter_h):
        y_max = y + stride*out_h
        for x in range(filter_w):
            x_max = x + stride*out_w
            col[:, :, y, x, :, :] = img[:, :, y:y_max:stride, x:x_max:stride]

    col = col.transpose(0, 4, 5, 1, 2, 3).reshape(N*out_h*out_w, -1)
    return col
            
# input_data = np.arange(9).reshape((1,1,3,3))  
# input_data = np.arange(18).reshape((2,1,3,3))            
# input_data = np.arange(18).reshape((1,2,3,3))            
# input_data = np.arange(54).reshape((2,3,3,3))    
input_data = np.random.randn(1,3,448,640)
my_col = my_im2col(input_data, 5, 5)            
print(my_col.shape)   # (282384, 75)     

(282384, 75)


### Convolution test

In [11]:
import sys, os
sys.path.append(os.pardir)  # 부모 디렉터리의 파일을 가져올 수 있도록 설정
import numpy as np
from common.layers import *

w = np.ones((1,1,2,2))
# print(w)
b = np.full((1,), 3)
# print(b)
conv = Convolution(w, b)

x = np.arange(9).reshape((1,1,3,3))
out = conv.forward(x)
print(out)

[[[[11. 15.]
   [23. 27.]]]]


In [12]:
import sys, os
sys.path.append(os.pardir)  # 부모 디렉터리의 파일을 가져올 수 있도록 설정
import numpy as np
from common.layers import *

w = np.ones((1,1,2,2))
# print(w)
b = np.full((1,), 3)
# print(b)
conv = Convolution(w, b)

x = np.arange(16).reshape((1,1,4,4))
out = conv.forward(x)
print(out)

[[[[13. 17. 21.]
   [29. 33. 37.]
   [45. 49. 53.]]]]


In [13]:
import sys, os
sys.path.append(os.pardir)  # 부모 디렉터리의 파일을 가져올 수 있도록 설정
import numpy as np
from common.layers import *

w = np.ones((1,1,2,2))
# print(w)
b = np.full((1,), 3)
# print(b)
conv = Convolution(w, b, pad=1)

x = np.arange(9).reshape((1,1,3,3))
out = conv.forward(x)
print(out)

[[[[ 3.  4.  6.  5.]
   [ 6. 11. 15. 10.]
   [12. 23. 27. 16.]
   [ 9. 16. 18. 11.]]]]


### 합성곱 미분

In [14]:
def my_col2im(col, input_shape, filter_h, filter_w, stride=1, pad=0):
    N, C, H, W = input_shape
    out_h = (H + 2*pad - filter_h)//stride + 1
    out_w = (W + 2*pad - filter_w)//stride + 1
    col = col.reshape(N, out_h, out_w, C, filter_h, filter_w).transpose(0, 3, 4, 5, 1, 2)
#     print(col)
#     print(col.shape)
    img = np.zeros((N, C, H + 2*pad + stride - 1, W + 2*pad + stride - 1))
    for y in range(filter_h):
        y_max = y + stride*out_h
        for x in range(filter_w):
            x_max = x + stride*out_w
            img[:, :, y:y_max:stride, x:x_max:stride] += col[:, :, y, x, :, :]

    return img[:, :, pad:H + pad, pad:W + pad]

In [15]:
import sys, os
sys.path.append(os.pardir)
import numpy as np
from common.util import im2col, col2im

x1 = np.arange(16).reshape((1,1,4,4))
col = im2col(x1, 2, 2)
print(x1)
# print(col)

x2 = my_col2im(col,x1.shape, 2, 2)
print(x2)

[[[[ 0  1  2  3]
   [ 4  5  6  7]
   [ 8  9 10 11]
   [12 13 14 15]]]]
[[[[ 0.  2.  4.  3.]
   [ 8. 20. 24. 14.]
   [16. 36. 40. 22.]
   [12. 26. 28. 15.]]]]


### 풀링 소스 분석

In [16]:
import sys, os
sys.path.append(os.pardir)
import numpy as np
from common.util import im2col
from common.util import col2im
from common.layers import Convolution

class MyPooling:
    def __init__(self, pool_h, pool_w, stride=1, pad=0):
        self.pool_h = pool_h
        self.pool_w = pool_w
        self.stride = stride
        self.pad = pad
        
        self.x = None
        self.arg_max = None

    def forward(self, x):
        N, C, H, W = x.shape
        out_h = int(1 + (H - self.pool_h) / self.stride)
        out_w = int(1 + (W - self.pool_w) / self.stride)

        col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)
        print('col', col)
        col = col.reshape(-1, self.pool_h*self.pool_w)
        print('col', col)

        arg_max = np.argmax(col, axis=1)
        print('arg_max', arg_max)
        out = np.max(col, axis=1)
        print('out', out)
        out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2)
        print('out', out)

        self.x = x
        self.arg_max = arg_max

        return out

    def backward(self, dout):
        dout = dout.transpose(0, 2, 3, 1)
        
        pool_size = self.pool_h * self.pool_w
        dmax = np.zeros((dout.size, pool_size))
        dmax[np.arange(self.arg_max.size), self.arg_max.flatten()] = dout.flatten()
        print('dmax', dmax)
        dmax = dmax.reshape(dout.shape + (pool_size,)) 
        
        dcol = dmax.reshape(dmax.shape[0] * dmax.shape[1] * dmax.shape[2], -1)
        print('dcol', dcol)
        dx = col2im(dcol, self.x.shape, self.pool_h, self.pool_w, self.stride, self.pad)
        
        return dx

### 채널이 1개인 경우 맥스 풀링 테스트

In [17]:
x = np.array([[[[ 7, 11, 13, 15],
                [ 3,  4,  2,  3],
                [ 1,  2, 15,  9],
                [ 1,  8,  3, 10]]]])
print(x)

[[[[ 7 11 13 15]
   [ 3  4  2  3]
   [ 1  2 15  9]
   [ 1  8  3 10]]]]


In [18]:
pool = MyPooling(2,2,2)
pool.forward(x)

col [[ 7. 11.  3.  4.]
 [13. 15.  2.  3.]
 [ 1.  2.  1.  8.]
 [15.  9.  3. 10.]]
col [[ 7. 11.  3.  4.]
 [13. 15.  2.  3.]
 [ 1.  2.  1.  8.]
 [15.  9.  3. 10.]]
arg_max [1 1 3 0]
out [11. 15.  8. 15.]
out [[[[11. 15.]
   [ 8. 15.]]]]


array([[[[11., 15.],
         [ 8., 15.]]]])

In [19]:
dout = np.array([[[[1,2],
                   [3,4]]]])
dout = dout.transpose(0, 2, 3, 1)
print(dout.shape)
print(dout.size)
print(dout.flatten())

(1, 2, 2, 1)
4
[1 2 3 4]


In [20]:
pool_size = pool.pool_h * pool.pool_w
dmax = np.zeros((dout.size, pool_size))
dmax[np.arange(pool.arg_max.size), pool.arg_max.flatten()] = dout.flatten()
print(dmax)

[[0. 1. 0. 0.]
 [0. 2. 0. 0.]
 [0. 0. 0. 3.]
 [4. 0. 0. 0.]]


In [21]:
dmax = dmax.reshape(dout.shape + (pool_size,)) 
print(dmax.shape)
print(dmax)

(1, 2, 2, 1, 4)
[[[[[0. 1. 0. 0.]]

   [[0. 2. 0. 0.]]]


  [[[0. 0. 0. 3.]]

   [[4. 0. 0. 0.]]]]]


In [22]:
dcol = dmax.reshape(dmax.shape[0] * dmax.shape[1] * dmax.shape[2], -1)
print(dcol.shape)
print(dcol)

(4, 4)
[[0. 1. 0. 0.]
 [0. 2. 0. 0.]
 [0. 0. 0. 3.]
 [4. 0. 0. 0.]]


In [23]:
dx = my_col2im(dcol, (1,1,4,4), 2,2,2,0)
print(dx)

[[[[0. 1. 0. 2.]
   [0. 0. 0. 0.]
   [0. 0. 4. 0.]
   [0. 3. 0. 0.]]]]


In [24]:
dout = np.array([[[[1,2],
                   [3,4]]]])
dx = pool.backward(dout)
print(dx)

dmax [[0. 1. 0. 0.]
 [0. 2. 0. 0.]
 [0. 0. 0. 3.]
 [4. 0. 0. 0.]]
dcol [[0. 1. 0. 0.]
 [0. 2. 0. 0.]
 [0. 0. 0. 3.]
 [4. 0. 0. 0.]]
[[[[0. 1. 0. 2.]
   [0. 0. 0. 0.]
   [0. 0. 4. 0.]
   [0. 3. 0. 0.]]]]


### 채널이 2개인 경우 맥스 풀링 테스트

In [25]:
x = np.array([[[[ 7, 11, 13, 15],
                [ 3,  4,  2,  3],
                [ 1,  2, 15,  9],
                [ 1,  8,  3, 10]],
               [[ 5,  8,  6,  7],
                [10,  4, 11, 13],
                [ 8,  3, 10,  4],
                [ 1,  2,  5, 15]]
              ]])
print(x.shape)

(1, 2, 4, 4)


In [26]:
pool = MyPooling(2,2,2)
out = pool.forward(x)
print(out.shape)

col [[ 7. 11.  3.  4.  5.  8. 10.  4.]
 [13. 15.  2.  3.  6.  7. 11. 13.]
 [ 1.  2.  1.  8.  8.  3.  1.  2.]
 [15.  9.  3. 10. 10.  4.  5. 15.]]
col [[ 7. 11.  3.  4.]
 [ 5.  8. 10.  4.]
 [13. 15.  2.  3.]
 [ 6.  7. 11. 13.]
 [ 1.  2.  1.  8.]
 [ 8.  3.  1.  2.]
 [15.  9.  3. 10.]
 [10.  4.  5. 15.]]
arg_max [1 2 1 3 3 0 0 3]
out [11. 10. 15. 13.  8.  8. 15. 15.]
out [[[[11. 15.]
   [ 8. 15.]]

  [[10. 13.]
   [ 8. 15.]]]]
(1, 2, 2, 2)


In [27]:
dout = np.array([[[[1,1],
                   [1,1]],
                  [[2,2],
                   [2,2]]]])
print(dout.shape)
dx = pool.backward(dout)
print(dx.shape)
print(dx)

(1, 2, 2, 2)
dmax [[0. 1. 0. 0.]
 [0. 0. 2. 0.]
 [0. 1. 0. 0.]
 [0. 0. 0. 2.]
 [0. 0. 0. 1.]
 [2. 0. 0. 0.]
 [1. 0. 0. 0.]
 [0. 0. 0. 2.]]
dcol [[0. 1. 0. 0. 0. 0. 2. 0.]
 [0. 1. 0. 0. 0. 0. 0. 2.]
 [0. 0. 0. 1. 2. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 2.]]
(1, 2, 4, 4)
[[[[0. 1. 0. 1.]
   [0. 0. 0. 0.]
   [0. 0. 1. 0.]
   [0. 1. 0. 0.]]

  [[0. 0. 0. 0.]
   [2. 0. 0. 2.]
   [2. 0. 0. 0.]
   [0. 0. 0. 2.]]]]
