In [4]:
import sys, os
sys.path.append(os.pardir)
from common.util import im2col
import numpy as np

## im2col 구현

In [None]:
def im2col(input_data, filter_h, filter_w, stride=1, pad=0):
    """다수의 이미지를 입력받아 2차원 배열로 변환한다(평탄화).
    
    Parameters
    ----------
    input_data : 4차원 배열 형태의 입력 데이터(이미지 수, 채널 수, 높이, 너비)
    filter_h : 필터의 높이
    filter_w : 필터의 너비
    stride : 스트라이드
    pad : 패딩
    
    Returns
    -------
    col : 2차원 배열
    """
    N, C, H, W = input_data.shape
    out_h = (H + 2*pad - filter_h)//stride + 1
    out_w = (W + 2*pad - filter_w)//stride + 1

    img = np.pad(input_data, [(0,0), (0,0), (pad, pad), (pad, pad)], 'constant') #padding으로 높이와 너비에 padding을 넣어줌
    col = np.zeros((N, C, filter_h, filter_w, out_h, out_w))

    for y in range(filter_h):
        y_max = y + stride*out_h
        for x in range(filter_w):
            x_max = x + stride*out_w
            col[:, :, y, x, :, :] = img[:, :, y:y_max:stride, x:x_max:stride]

    col = col.transpose(0, 4, 5, 1, 2, 3).reshape(N*out_h*out_w, -1)
    return col


## im2col 시연

In [34]:
x1=np.random.rand(1,3,3,3)
col1=im2col(x1, 2, 2, stride=1, pad=0)
print(col1.shape)

(4, 12)


In [35]:
print(x1)

[[[[0.04951028 0.91294885 0.86122585]
   [0.36767645 0.46916723 0.42027799]
   [0.27151267 0.09554375 0.76419891]]

  [[0.07368698 0.80338699 0.53470977]
   [0.87842972 0.58905587 0.94303385]
   [0.55509312 0.62493555 0.15252255]]

  [[0.73984215 0.69804968 0.09109066]
   [0.24297083 0.82421198 0.59627527]
   [0.07965097 0.00195352 0.38335063]]]]


In [36]:
print(col1)

[[0.04951028 0.91294885 0.36767645 0.46916723 0.07368698 0.80338699
  0.87842972 0.58905587 0.73984215 0.69804968 0.24297083 0.82421198]
 [0.91294885 0.86122585 0.46916723 0.42027799 0.80338699 0.53470977
  0.58905587 0.94303385 0.69804968 0.09109066 0.82421198 0.59627527]
 [0.36767645 0.46916723 0.27151267 0.09554375 0.87842972 0.58905587
  0.55509312 0.62493555 0.24297083 0.82421198 0.07965097 0.00195352]
 [0.46916723 0.42027799 0.09554375 0.76419891 0.58905587 0.94303385
  0.62493555 0.15252255 0.82421198 0.59627527 0.00195352 0.38335063]]


In [39]:
x2=np.random.rand(1,3,7,7)
col2=im2col(x2, 5, 5, stride=1, pad=0)
print(col2.shape)

(9, 75)


In [40]:
x3=np.random.rand(10, 3, 7, 7)
col3=im2col(x3, 5, 5, stride=1, pad=0)
print(col3.shape)

(90, 75)


## Simple Convolution

In [None]:
class Convolution:
    def __init__(self, W, b, strid=1, pad=0):
        self.W=W
        self.b=b
        self.stride=stride
        self.pad=pad
        
    def forward(self, x):
        FN, C, FH, FW = self.W.shape
        N, C, H, W = x.shape
        out_h=int(1+(H+2*self.pad-FH)/self.stride)
        out_w=int(1+(W+2*self.pad-FW)/self.stride)
        
        col=im2col(x, FH, FW, self.stride, self.pad)
        col_W=self.W.reshape(FN, -1).T
        out=np.dot(col, col_W)+self.b
        
        out=out.reshape(N, out_h, out_w, -1).transpose(0,3,1,2)
        
        return out
        

self.W.reshape(FN, -1).T 확인해보기

In [47]:
w=np.random.rand(2,3,2,2) #FN=2, N=23 H=2, W=2
print(w)

[[[[0.78965879 0.88320332]
   [0.16793797 0.7653302 ]]

  [[0.61036554 0.40525144]
   [0.58634761 0.57793482]]

  [[0.19847316 0.05749034]
   [0.54917522 0.74860787]]]


 [[[0.35196569 0.92677374]
   [0.22004974 0.75369348]]

  [[0.4667236  0.2249829 ]
   [0.20668939 0.52194494]]

  [[0.31889386 0.62496452]
   [0.53885245 0.91287286]]]]


In [49]:
w.reshape(2,-1) #FN=2

array([[0.78965879, 0.88320332, 0.16793797, 0.7653302 , 0.61036554,
        0.40525144, 0.58634761, 0.57793482, 0.19847316, 0.05749034,
        0.54917522, 0.74860787],
       [0.35196569, 0.92677374, 0.22004974, 0.75369348, 0.4667236 ,
        0.2249829 , 0.20668939, 0.52194494, 0.31889386, 0.62496452,
        0.53885245, 0.91287286]])

In [53]:
w.reshape(2,-1).T

array([[0.78965879, 0.35196569],
       [0.88320332, 0.92677374],
       [0.16793797, 0.22004974],
       [0.7653302 , 0.75369348],
       [0.61036554, 0.4667236 ],
       [0.40525144, 0.2249829 ],
       [0.58634761, 0.20668939],
       [0.57793482, 0.52194494],
       [0.19847316, 0.31889386],
       [0.05749034, 0.62496452],
       [0.54917522, 0.53885245],
       [0.74860787, 0.91287286]])

In [52]:
col_w=w.reshape(2,-1).T

np.dot(col, col_W)+self.b 형태 확인해보기

In [55]:
out=np.dot(col1, col_w) #(pooling을 사용하지 않고, 온전한 합성곱 연산과 channel summation을 시행했을 때
out

array([[3.42969623, 3.55770853],
       [4.5222474 , 3.91669699],
       [2.42576949, 2.31669213],
       [3.02478181, 2.83398825]])

out.reshape(N, out_h, out_w, -1).transpose(0,3,1,2) 형태 확인해보기

In [58]:
FN, C, FH, FW = w.shape
N, C, H, W = x1.shape
pad=0
stride=1
out_h=int(1+(H+2*pad-FH)/stride)
out_w=int(1+(W+2*pad-FW)/stride)

In [61]:
print(FN, C, FH, FW, N, C, H, W, out_h, out_w)

2 3 2 2 1 3 3 3 2 2


In [59]:
out.reshape(N, out_h, out_w, -1).transpose(0,3,1,2) # N, out_h, out_w, FN -> N, FN, out_h, out_w

array([[[[3.42969623, 4.5222474 ],
         [2.42576949, 3.02478181]],

        [[3.55770853, 3.91669699],
         [2.31669213, 2.83398825]]]])

### 데이터 개수가 2이상일 때(N>=2)

In [62]:
x1=np.random.rand(2,3,3,3)
col1=im2col(x1, 2, 2, stride=1, pad=0)
print(col1.shape)

(8, 12)


In [67]:
w=np.random.rand(2,3,2,2) #FN=2, N=23 H=2, W=2
col_w=w.reshape(2,-1).T
print(col_w.shape)

(12, 2)


In [64]:
out=np.dot(col1, col_w) #(pooling을 사용하지 않고, 온전한 합성곱 연산과 channel summation을 시행했을 때
out

array([[3.03678754, 3.00309735],
       [3.64382296, 3.57877473],
       [3.22834844, 3.09499162],
       [3.73543431, 3.62902373],
       [2.28662063, 2.36306989],
       [3.62969014, 3.75955036],
       [3.06836769, 2.98538808],
       [3.88751337, 3.68985096]])

In [68]:
FN, C, FH, FW = w.shape
N, C, H, W = x1.shape
pad=0
stride=1
out_h=int(1+(H+2*pad-FH)/stride)
out_w=int(1+(W+2*pad-FW)/stride)

In [69]:
print(FN, C, FH, FW, N, C, H, W, out_h, out_w)

2 3 2 2 2 3 3 3 2 2


In [74]:
result=out.reshape(N, out_h, out_w, -1).transpose(0,3,1,2) # N, out_h, out_w, FN -> N, FN, out_h, out_w
result

array([[[[3.03678754, 3.64382296],
         [3.22834844, 3.73543431]],

        [[3.00309735, 3.57877473],
         [3.09499162, 3.62902373]]],


       [[[2.28662063, 3.62969014],
         [3.06836769, 3.88751337]],

        [[2.36306989, 3.75955036],
         [2.98538808, 3.68985096]]]])

In [73]:
print(result.shape)

(2, 2, 2, 2)


## pooling 구현

In [27]:
class Pooling:
    def __init__(self, pool_h, pool_w, stride=1, pad=0):
        self.pool_h=pool_h
        self.pool_w=pool_w
        self.stride=stride
        self.pad=pad
        
    def forward(self, x):
        N, C, H, W=x.shape
        out_h=int(1+(H-self.pool_h)/self.stride)
        out_w=int(1+(W-self.pool_w)/self.stride)
        
        #전개(1)
        col=im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)
        col=col.reshape(-1, self.pool_h*self.pool_w)
        
        #최댓값(2)
        out=np.max(col, axis1)
        
        #성형(3)
        out=out.reshape(N, out_h, out_w, C).transpose(0,3,1,2)
        
        return out

### 예시(데이터가 1개일 때)

In [82]:
x=np.random.rand(1,3,3,3)
col=im2col(x, 2, 2, stride=1, pad=0)
print(col.shape)

(4, 12)


In [83]:
print(col)

[[0.17841589 0.13143481 0.35266579 0.26480141 0.65228741 0.50583538
  0.34078224 0.37232706 0.32246471 0.23775278 0.82333536 0.33368825]
 [0.13143481 0.47221253 0.26480141 0.59690561 0.50583538 0.82247719
  0.37232706 0.22901439 0.23775278 0.94785634 0.33368825 0.32448504]
 [0.35266579 0.26480141 0.47193073 0.52139824 0.34078224 0.37232706
  0.93128615 0.62424366 0.82333536 0.33368825 0.94436293 0.65419418]
 [0.26480141 0.59690561 0.52139824 0.80639367 0.37232706 0.22901439
  0.62424366 0.37660754 0.33368825 0.32448504 0.65419418 0.91487463]]


In [88]:
w=np.random.rand(2,3,2,2) #FN=2, N=3 H=2, W=2
pool_h=2
pool_w=2
stride=1
pad=0
col2=col.reshape(-1, pool_h*pool_w)
print(col2.shape)

(12, 4)


In [85]:
print(col2)

[[0.17841589 0.13143481 0.35266579 0.26480141]
 [0.65228741 0.50583538 0.34078224 0.37232706]
 [0.32246471 0.23775278 0.82333536 0.33368825]
 [0.13143481 0.47221253 0.26480141 0.59690561]
 [0.50583538 0.82247719 0.37232706 0.22901439]
 [0.23775278 0.94785634 0.33368825 0.32448504]
 [0.35266579 0.26480141 0.47193073 0.52139824]
 [0.34078224 0.37232706 0.93128615 0.62424366]
 [0.82333536 0.33368825 0.94436293 0.65419418]
 [0.26480141 0.59690561 0.52139824 0.80639367]
 [0.37232706 0.22901439 0.62424366 0.37660754]
 [0.33368825 0.32448504 0.65419418 0.91487463]]


In [87]:
out=np.max(col2, axis=1)
print(out.shape)
print(out)

(12,)
[0.35266579 0.65228741 0.82333536 0.59690561 0.82247719 0.94785634
 0.52139824 0.93128615 0.94436293 0.80639367 0.62424366 0.91487463]


In [90]:
N, C, H, W=x.shape
out_h=int(1+(H-pool_h)/stride)
out_w=int(1+(W-pool_w)/stride)

out.reshape(N, out_h, out_w, C).transpose(0,3,1,2)

array([[[[0.35266579, 0.59690561],
         [0.52139824, 0.80639367]],

        [[0.65228741, 0.82247719],
         [0.93128615, 0.62424366]],

        [[0.82333536, 0.94785634],
         [0.94436293, 0.91487463]]]])

### 예시(데이터가 2개 이상일 때)

In [92]:
x=np.random.rand(2,3,3,3)
col=im2col(x, 2, 2, stride=1, pad=0)
print(col.shape)

(8, 12)


In [93]:
print(col)

[[0.92869297 0.16103981 0.95985401 0.08268901 0.93326146 0.02861156
  0.23230952 0.18765969 0.44231087 0.31210996 0.18018497 0.12906008]
 [0.16103981 0.70971971 0.08268901 0.34979556 0.02861156 0.93748953
  0.18765969 0.41440896 0.31210996 0.16967107 0.12906008 0.01136503]
 [0.95985401 0.08268901 0.36051413 0.49123755 0.23230952 0.18765969
  0.01893166 0.75847059 0.18018497 0.12906008 0.73052049 0.67632707]
 [0.08268901 0.34979556 0.49123755 0.33852637 0.18765969 0.41440896
  0.75847059 0.5909359  0.12906008 0.01136503 0.67632707 0.71105064]
 [0.86033696 0.15524344 0.65447834 0.54414173 0.91674629 0.33820164
  0.16121077 0.13588151 0.01897979 0.45067715 0.50486246 0.34081423]
 [0.15524344 0.12462701 0.54414173 0.18445113 0.33820164 0.09543932
  0.13588151 0.25504126 0.45067715 0.66698589 0.34081423 0.61396731]
 [0.65447834 0.54414173 0.76662866 0.54929467 0.16121077 0.13588151
  0.29391179 0.97403178 0.50486246 0.34081423 0.47391805 0.53326366]
 [0.54414173 0.18445113 0.54929467 0.1857

In [94]:
w=np.random.rand(2,3,2,2) #FN=2, N=3 H=2, W=2
pool_h=2
pool_w=2
stride=1
pad=0
col2=col.reshape(-1, pool_h*pool_w)
print(col2.shape)

(24, 4)


In [95]:
print(col2)

[[0.92869297 0.16103981 0.95985401 0.08268901]
 [0.93326146 0.02861156 0.23230952 0.18765969]
 [0.44231087 0.31210996 0.18018497 0.12906008]
 [0.16103981 0.70971971 0.08268901 0.34979556]
 [0.02861156 0.93748953 0.18765969 0.41440896]
 [0.31210996 0.16967107 0.12906008 0.01136503]
 [0.95985401 0.08268901 0.36051413 0.49123755]
 [0.23230952 0.18765969 0.01893166 0.75847059]
 [0.18018497 0.12906008 0.73052049 0.67632707]
 [0.08268901 0.34979556 0.49123755 0.33852637]
 [0.18765969 0.41440896 0.75847059 0.5909359 ]
 [0.12906008 0.01136503 0.67632707 0.71105064]
 [0.86033696 0.15524344 0.65447834 0.54414173]
 [0.91674629 0.33820164 0.16121077 0.13588151]
 [0.01897979 0.45067715 0.50486246 0.34081423]
 [0.15524344 0.12462701 0.54414173 0.18445113]
 [0.33820164 0.09543932 0.13588151 0.25504126]
 [0.45067715 0.66698589 0.34081423 0.61396731]
 [0.65447834 0.54414173 0.76662866 0.54929467]
 [0.16121077 0.13588151 0.29391179 0.97403178]
 [0.50486246 0.34081423 0.47391805 0.53326366]
 [0.54414173 

In [96]:
out=np.max(col2, axis=1)
print(out.shape)
print(out)

(24,)
[0.95985401 0.93326146 0.44231087 0.70971971 0.93748953 0.31210996
 0.95985401 0.75847059 0.73052049 0.49123755 0.75847059 0.71105064
 0.86033696 0.91674629 0.50486246 0.54414173 0.33820164 0.66698589
 0.76662866 0.97403178 0.53326366 0.54929467 0.97403178 0.61396731]


In [97]:
N, C, H, W=x.shape
out_h=int(1+(H-pool_h)/stride)
out_w=int(1+(W-pool_w)/stride)

out.reshape(N, out_h, out_w, C).transpose(0,3,1,2)

array([[[[0.95985401, 0.70971971],
         [0.95985401, 0.49123755]],

        [[0.93326146, 0.93748953],
         [0.75847059, 0.75847059]],

        [[0.44231087, 0.31210996],
         [0.73052049, 0.71105064]]],


       [[[0.86033696, 0.54414173],
         [0.76662866, 0.54929467]],

        [[0.91674629, 0.33820164],
         [0.97403178, 0.97403178]],

        [[0.50486246, 0.66698589],
         [0.53326366, 0.61396731]]]])