In [1]:
import numpy as np

## 【問題1】2次元畳み込み層の作成

### トイデータでスクラッチ

In [2]:
# トイデータ
X_toy = np.random.randint(1, 9, (2, 1, 4, 4)) # NCHW
X_toy

array([[[[5, 8, 2, 7],
         [5, 3, 7, 1],
         [7, 3, 2, 3],
         [5, 4, 2, 2]]],


       [[[4, 5, 8, 2],
         [3, 4, 4, 5],
         [4, 7, 5, 2],
         [1, 5, 3, 8]]]])

In [3]:
stride = 1
pad = 1

In [4]:
X = np.pad(X_toy, [(0, 0), (0, 0),  (pad, pad), (pad, pad)])
X.shape, X

((2, 1, 6, 6),
 array([[[[0, 0, 0, 0, 0, 0],
          [0, 5, 8, 2, 7, 0],
          [0, 5, 3, 7, 1, 0],
          [0, 7, 3, 2, 3, 0],
          [0, 5, 4, 2, 2, 0],
          [0, 0, 0, 0, 0, 0]]],
 
 
        [[[0, 0, 0, 0, 0, 0],
          [0, 4, 5, 8, 2, 0],
          [0, 3, 4, 4, 5, 0],
          [0, 4, 7, 5, 2, 0],
          [0, 1, 5, 3, 8, 0],
          [0, 0, 0, 0, 0, 0]]]]))

In [5]:
F = np.ones((2, 3, 3)) # 重みフィルタ（O, FH, FW）
B = np.array([1]) # Xのチャネル分
F.shape, B.shape, F, B

((2, 3, 3),
 (1,),
 array([[[1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.]],
 
        [[1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.]]]),
 array([1]))

In [6]:
# 型を取得
O, FH, FW = F.shape
N, C, H, W = X.shape
H_out = (H - FH) //  stride + 1
W_out = (W - FW) //  stride + 1

In [7]:
out = np.zeros((O, H_out, W_out))
out.shape, out

((2, 4, 4),
 array([[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]],
 
        [[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]]))

#### toy_forward

In [8]:
# outの0行0列目のセル
out[0, 0, 0] += (X[0, 0, 0 : 0+FH, 0 : 0+FW] * F[0, :, :]).sum()
out

array([[[21.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.]],

       [[ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.]]])

In [9]:
# outの0行1列目のセル
out[0, 0, 1] += (X[0, 0, 0 : 0+FH, 1 : 1+FW] * F[0, :, :]).sum()
out

array([[[21., 30.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.]],

       [[ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.]]])

In [10]:
# outの0行2列目のセル
out[0, 0, 2] += (X[0, 0, 0 : 0+FH, 2 : 2+FW] * F[0, :, :]).sum()
out

array([[[21., 30., 28.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.]],

       [[ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.]]])

In [11]:
# outの0行3列目のセル
out[0, 0, 3] += (X[0, 0, 0 : 0+FH, 3 : 3+FW] * F[0, :, :]).sum()
out

array([[[21., 30., 28., 17.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.]],

       [[ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.]]])

In [12]:
# outの1行0列目のセル
out[0, 1, 0] += (X[0, 0, 1 : 1+FH, 0 : 0+FW] * F[0, :, :]).sum()
out

array([[[21., 30., 28., 17.],
        [31.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.]],

       [[ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.]]])

In [13]:
# outの1行1列目のセル
out[0, 1, 1] += (X[0, 0, 1 : 1+FH, 1 : 1+FW] * F[0, :, :]).sum()
out

array([[[21., 30., 28., 17.],
        [31., 42.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.]],

       [[ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.]]])

In [14]:
# 上の流れをループ処理化
out = np.zeros((O, H_out, W_out))

for h in range(H_out): 
    for w in range(W_out):
        out[0, h, w] += (X[0, 0, h : h+FH, w : w+FW] * F[0, :, :]).sum()
out[0, :, :] += B
print(out)

[[[22. 31. 29. 18.]
  [32. 43. 37. 23.]
  [28. 39. 28. 18.]
  [20. 24. 17. 10.]]

 [[ 0.  0.  0.  0.]
  [ 0.  0.  0.  0.]
  [ 0.  0.  0.  0.]
  [ 0.  0.  0.  0.]]]


In [15]:
# ↑をチャネル毎に（ここではC＝1なので変わらない）
out = np.zeros((O, H_out, W_out))

for c in range(C):
    for h in range(H_out): 
        for w in range(W_out):
            out[0, h, w] += (X[0, c, h : h+FH, w : w+FW] * F[0, :, :]).sum()
    out[0, :, :] += B
print(out)

[[[22. 31. 29. 18.]
  [32. 43. 37. 23.]
  [28. 39. 28. 18.]
  [20. 24. 17. 10.]]

 [[ 0.  0.  0.  0.]
  [ 0.  0.  0.  0.]
  [ 0.  0.  0.  0.]
  [ 0.  0.  0.  0.]]]


In [16]:
# ↑を入力チャネル毎に
out = np.zeros((O, H_out, W_out))

for n in range(N):
    for c in range(C):
        for h in range(H_out): 
            for w in range(W_out):
                out[0, h, w] += (X[n, c, h : h+FH, w : w+FW] * F[0, :, :]).sum()
        out[0, :, :] += B
print(out)

[[[39. 60. 58. 38.]
  [60. 88. 80. 50.]
  [53. 76. 72. 46.]
  [38. 50. 48. 29.]]

 [[ 0.  0.  0.  0.]
  [ 0.  0.  0.  0.]
  [ 0.  0.  0.  0.]
  [ 0.  0.  0.  0.]]]


In [17]:
# ↑を出力チャネル毎に
out1 = np.zeros((O, H_out, W_out))

for o in range(O):
    for n in range(N):
        for c in range(C):
            for h in range(H_out): 
                for w in range(W_out):
                    out1[o, h, w] += (X[n, c, h : h+FH, w : w+FW] * F[o, :, :]).sum()
            out1[o, :, :] += B
print(out1)

[[[39. 60. 58. 38.]
  [60. 88. 80. 50.]
  [53. 76. 72. 46.]
  [38. 50. 48. 29.]]

 [[39. 60. 58. 38.]
  [60. 88. 80. 50.]
  [53. 76. 72. 46.]
  [38. 50. 48. 29.]]]


In [18]:
# strideを変える

stride = 3
H_out = (H - FH) //  stride + 1
W_out = (W - FW) //  stride + 1

In [19]:
# ↑にstrideを追加

out = np.zeros((O, H_out, W_out))

for o in range(O):
    for n in range(N):
        for c in range(C):
            for h in range(H_out): 
                for w in range(W_out):
                    out[o, h, w] += (X[n, c, (h*stride) : (h*stride)+FH, (w*stride) : (w*stride)+FW] * F[o, :, :]).sum()
            out[o, :, :] += B
print(out)

[[[39. 38.]
  [38. 29.]]

 [[39. 38.]
  [38. 29.]]]


#### toy_backward

In [20]:
# pad1, stride1の結果の誤差を使う
dA1 = out1.copy()
dA1.shape, dA1 # O, H_out, W_out

((2, 4, 4),
 array([[[39., 60., 58., 38.],
         [60., 88., 80., 50.],
         [53., 76., 72., 46.],
         [38., 50., 48., 29.]],
 
        [[39., 60., 58., 38.],
         [60., 88., 80., 50.],
         [53., 76., 72., 46.],
         [38., 50., 48., 29.]]]))

In [21]:
# delta_x

delta_x = np.zeros((X.shape))
delta_x.shape, delta_x # N, C, H, W

((2, 1, 6, 6),
 array([[[[0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0.]]],
 
 
        [[[0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0.]]]]))

In [22]:
delta_x = np.zeros((X.shape))

for h in range(H_out): 
    for w in range(W_out):
        delta_x[0, 0, (h*stride) : (h*stride)+FH, (w*stride) : (w*stride)+FW] += dA1[0, h, w] * F[0, :, :]
print(delta_x)

[[[[39. 39. 39. 60. 60. 60.]
   [39. 39. 39. 60. 60. 60.]
   [39. 39. 39. 60. 60. 60.]
   [60. 60. 60. 88. 88. 88.]
   [60. 60. 60. 88. 88. 88.]
   [60. 60. 60. 88. 88. 88.]]]


 [[[ 0.  0.  0.  0.  0.  0.]
   [ 0.  0.  0.  0.  0.  0.]
   [ 0.  0.  0.  0.  0.  0.]
   [ 0.  0.  0.  0.  0.  0.]
   [ 0.  0.  0.  0.  0.  0.]
   [ 0.  0.  0.  0.  0.  0.]]]]


In [23]:
delta_x = np.zeros((X.shape))

for o in range(O):
    for n in range(N):
        for c in range(C):
            for h in range(H_out): 
                for w in range(W_out):
                    delta_x[n, c, (h*stride) : (h*stride)+FH, (w*stride) : (w*stride)+FW] += dA1[o, h, w] * F[o, :, :]
print(delta_x)

[[[[ 78.  78.  78. 120. 120. 120.]
   [ 78.  78.  78. 120. 120. 120.]
   [ 78.  78.  78. 120. 120. 120.]
   [120. 120. 120. 176. 176. 176.]
   [120. 120. 120. 176. 176. 176.]
   [120. 120. 120. 176. 176. 176.]]]


 [[[ 78.  78.  78. 120. 120. 120.]
   [ 78.  78.  78. 120. 120. 120.]
   [ 78.  78.  78. 120. 120. 120.]
   [120. 120. 120. 176. 176. 176.]
   [120. 120. 120. 176. 176. 176.]
   [120. 120. 120. 176. 176. 176.]]]]


In [24]:
# paddingした分をリサイズし、元の入力サイズと同じにする
delta_x = delta_x[:, :, pad : -pad, pad : -pad]
delta_x.shape, delta_x

((2, 1, 4, 4),
 array([[[[ 78.,  78., 120., 120.],
          [ 78.,  78., 120., 120.],
          [120., 120., 176., 176.],
          [120., 120., 176., 176.]]],
 
 
        [[[ 78.,  78., 120., 120.],
          [ 78.,  78., 120., 120.],
          [120., 120., 176., 176.],
          [120., 120., 176., 176.]]]]))

In [25]:
# delta_b

delta_b = np.zeros((B.shape))

for o in range(O):
    for c in range(C):
        for h in range(H_out): 
            for w in range(W_out):
                delta_b[c] += dA1[o, h, w].sum()     
print(delta_b)

[494.]


In [26]:
# delta_f

delta_f = np.zeros((F.shape))

for o in range(O):
    for n in range(N):
        for c in range(C):
            for h in range(H_out): 
                for w in range(W_out):
                    delta_f[o, :, :] += dA1[o, h, w] * X[n, c, (h*stride) : (h*stride)+FH, (w*stride) : (w*stride)+FW]
print(delta_f)                    

[[[ 616. 1100.  600.]
  [1040. 2131. 1047.]
  [ 660.  672.  273.]]

 [[ 616. 1100.  600.]
  [1040. 2131. 1047.]
  [ 660.  672.  273.]]]


### クラス化

In [27]:
class Conv2d():
    
    """
    ノード数n_nodes1からn_nodes2への全結合層
    Parameters
    ----------
    n_nodes1 : int
      前の層のノード数
    n_nodes2 : int
      後の層のノード数
    initializer : 初期化方法のインスタンス
    optimizer : 最適化手法のインスタンス
    """
    
    def __init__(self, F, lr = 0.001, stride=1, pad=1):
        
        #self.optimizer = optimizer
        
        #forward
        self.X = None
        
        self.N = None #入力サンプル数
        self.C = None # 入力チャネル数
        self.H = None #高さ方向の特徴量
        self.W = None #幅方向の特徴量
        
        self.O = None # 出力チャネル数
        self.FH = None #フィルタ高さ
        self.FW = None # フィルタ幅
        
        self.F = F
        self.B = np.array([0.]) # C
             
        self.pad = pad
        self.stride = stride
        
        
        # backward
        self.H_out = None
        self.W_out = None
        
        #self.dW = None
        #self.dB = None
        
        self.lr = lr
    
    
    def conv_forward(self, X):
        """
        フォワード
        
        Parameters
        ----------
        X : 次の形のndarray, shape (batch_size, n_nodes1)
            入力
            
        Returns
        ----------
        A : 次の形のndarray, shape (batch_size, n_nodes2)
            出力
        """
        # ストライド・パディング
        stride = self.stride
        pad = self.pad
               
        # 入力にパディング
        X = np.pad(X, [(0, 0), (0, 0),  (pad, pad), (pad, pad)])
        
        # 重みの型を取る
        O, FH, FW = self.F.shape
        
        self.O = O
        self.FH = FH
        self.FW = FW
        
        # 入力の型を取る
        N, C, H, W = X.shape
        
        self.N = N
        self.C = C
        self.H = H #pad後
        self.W = W #pad後
        
        self.X = X
        
        F = self.F
        B = self.B

        # 出力サイズ
        H_out = (H - FH) //  stride + 1
        W_out = (W - FW) //  stride + 1
        
        self.H_out = H_out #backward用
        self.W_out = W_out #backward用
        

        out = np.zeros((O, H_out, W_out))

        for o in range(O):
            for n in range(N):
                for c in range(C):
                    for h in range(H_out): 
                        for w in range(W_out):
                            out[o, h, w] += (X[n, c, (h*stride) : (h*stride)+FH, (w*stride) : (w*stride)+FW] * F[o, :, :]).sum()
                    out[o, :, :] += B
    
        return out
    
    
    def conv_backward(self, dA):
        
        """
        バックワード
        
        Parameters
        ----------
        dA : 次の形のndarray, shape (batch_size, n_nodes2)
            後ろから流れてきた勾配
            
        Returns
        ----------
        dZ : 次の形のndarray, shape (batch_size, n_nodes1)
            前に流す勾配
            
        """
        
        N = self.N #入力サンプル数
        C = self.C # 入力チャネル数
        H = self.H #高さ方向の特徴量
        W = self.W #幅方向の特徴量
        
        O = self.O # 出力チャネル数
        FH = self.FH #フィルタ高さ
        FW = self.FW # フィルタ幅
        
        H_out = self.H_out
        W_out = self.W_out
        
        # ストライド・パディング
        stride = self.stride
        pad = self.pad
        
        
        # delta_x
        delta_x = np.zeros((self.X.shape))
        
        for o in range(O):
            for n in range(N):
                for c in range(C):
                    for h in range(H_out): 
                        for w in range(W_out):
                            delta_x[n, c, (h*stride) : (h*stride)+FH, (w*stride) : (w*stride)+FW] += dA[o, h, w] * F[o, :, :]
        
        delta_x = delta_x[:, :, pad : -pad, pad : -pad]
        
    
        # delta_b
        delta_b = np.zeros((self.B.shape))

        for o in range(O):
            for c in range(C):
                for h in range(H_out): 
                    for w in range(W_out):
                        delta_b[c] += dA[o, h, w].sum()     
        
        
        # delta_f
        delta_f = np.zeros((self.F.shape))

        for o in range(O):
            for n in range(N):
                for c in range(C):
                    for h in range(H_out): 
                        for w in range(W_out):
                            delta_f[o, :, :] += dA[o, h, w] * X[n, c, (h*stride) : (h*stride)+FH, (w*stride) : (w*stride)+FW]
 

        # optimizerに流す前にデータ型を揃える
        self.F = delta_f.astype(np.float64)
        self.B = delta_b.astype(np.float64)
        
        # 更新
        self.F -= self.lr * delta_f
        self.B -= self.lr * delta_b
        #self.optimizer.update(self)
        
        print("self.F" + str(self.F))
        print("self.B" + str(self.B))
        
        return delta_x

## 【問題2】小さな配列での2次元畳み込み層の実験

### forward

In [28]:
#CNN2 のフォワードを流す時の入力データ

#(1,1,4,4)
x = np.array([
                    [
                    [
                    [ 1, 2, 3, 4],
                    [ 5, 6, 7, 8],
                    [ 9, 10, 11, 12],
                    [13, 14, 15, 16]
                    ],
                    ],
                    ])

#(2,3,3)
w = np.array([
                    [
                    [ 0., 0., 0.],
                    [ 0., 1., 0.],
                    [ 0., -1., 0.],
                    ],
                    [
                    [ 0., 0., 0.],
                    [ 0., -1., 1.],
                    [ 0., 0., 0.]
                    ]
                    ])


In [29]:
x.shape, w.shape

((1, 1, 4, 4), (2, 3, 3))

In [30]:
c2d = Conv2d(w, stride=1, pad=0)

In [31]:
c2d.conv_forward(x)

array([[[-4., -4.],
        [-4., -4.]],

       [[ 1.,  1.],
        [ 1.,  1.]]])

In [32]:
# 出力
forward_output = np.array([
                                            [
                                            [-4,-4],
                                            [-4,-4],
                                            ],
                                            [
                                            [1,1],
                                            [1,1],
                                            ]
                                            ])
#(1,2,2,)

In [33]:
forward_output.shape

(2, 2, 2)

### backward

In [34]:
delta = np.array([
                         [
                         [-4,-4],
                         [10,11],
                         ],
                         [
                         [1,-7],
                         [1,-11],
                         ]
                         ])

In [35]:
back_output = np.array([
                                     [-5,4],
                                     [13,27],
                                     ])

In [36]:
delta.shape, delta

((2, 2, 2),
 array([[[ -4,  -4],
         [ 10,  11]],
 
        [[  1,  -7],
         [  1, -11]]]))

In [37]:
# padding=0の場合
c2d.conv_backward(delta)

self.F[[[  54.945  137.862  101.898]
  [  34.965   30.969   66.933]
  [  56.943   70.929   11.988]]

 [[ -54.945  -82.917  -13.986]
  [ -89.91   -78.921  -79.92 ]
  [-111.888  -41.958  -64.935]]]
self.B[-2.997]


array([], shape=(1, 1, 0, 0), dtype=float64)

## 【問題3】2次元畳み込み後の出力サイズ

In [38]:
def calc_kernel_2d(H, W):
    
    H_out = (H + 2*pad - FH) //  stride + 1
    W_out = (W + 2*pad - FW) //  stride + 1
    return H_out, W_out

## 【問題4】最大プーリング層の作成

### 4-1-1. トイデータによるforward

In [39]:
X_toy.shape, X_toy # N, C, H, W

((2, 1, 4, 4),
 array([[[[5, 8, 2, 7],
          [5, 3, 7, 1],
          [7, 3, 2, 3],
          [5, 4, 2, 2]]],
 
 
        [[[4, 5, 8, 2],
          [3, 4, 4, 5],
          [4, 7, 5, 2],
          [1, 5, 3, 8]]]]))

In [40]:
PFH, PFW = 2, 2 # Poolingフィルタサイズ
stride = 2 # stride幅はフィルタサイズと同一

N, C, H, W = X_toy.shape

In [41]:
# フィルターサイズ抽出

H_out, W_out = calc_kernel_2d(H, W)
H_out, W_out

(2, 2)

In [42]:
# 出力サイズ

out = np.zeros((2, 1, 2, 2)) # N, C, H_out, W_out
out

array([[[[0., 0.],
         [0., 0.]]],


       [[[0., 0.],
         [0., 0.]]]])

In [43]:
# outの0行0列目のセル
out[0, 0, 0, 0] += X_toy[0, 0, 0 : 0+PFH, 0 : 0+PFW].max()
out

array([[[[8., 0.],
         [0., 0.]]],


       [[[0., 0.],
         [0., 0.]]]])

In [44]:
# outの0行1列目のセル（Pool域はstride分飛ぶ）
out[0, 0, 0, 1] += X_toy[0, 0, 0 : 0+PFH, 2 : 2+PFW].max()
out

array([[[[8., 7.],
         [0., 0.]]],


       [[[0., 0.],
         [0., 0.]]]])

In [45]:
# outの1行0列目のセル
out[0, 0, 1, 0] += X_toy[0, 0, 2 : 2+PFH, 0 : 0+PFW].max()
out

array([[[[8., 7.],
         [7., 0.]]],


       [[[0., 0.],
         [0., 0.]]]])

In [46]:
# outの1行1列目のセル
out[0, 0, 1, 1] += X_toy[0, 0, 2 : 2+PFH, 2 : 2+PFW].max()
out

array([[[[8., 7.],
         [7., 3.]]],


       [[[0., 0.],
         [0., 0.]]]])

In [47]:
out = np.zeros((2, 1, 2, 2)) # N, C, H_out, W_out

for n in range(N):
    for c in range(C):
        for h in range(H_out): 
            for w in range(W_out):
                out[n, c, h, w] += X_toy[n, c, h*stride : h*stride+PFH, w*stride : w*stride+PFW].max()
                
out.shape, out

((2, 1, 2, 2),
 array([[[[8., 7.],
          [7., 3.]]],
 
 
        [[[5., 8.],
          [7., 8.]]]]))

### 4-1-2. トイデータによる最大値の位置情報埋め込み特徴マップの生成

In [48]:
#特徴マップの空箱を生成

out_map = np.zeros((X_toy.shape))
out_map

array([[[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]],


       [[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]]])

In [49]:
# 特徴マップに最大値の位置情報を{0, 1}で残す

out_map[0, 0, 0 : 0+PFH, 0 : 0+PFW] += X_toy[0, 0, 0 : 0+PFH, 0 : 0+PFW].max()==X_toy[0, 0, 0 : 0+PFH, 0 : 0+PFW]
out_map

array([[[[0., 1., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]],


       [[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]]])

In [50]:
out_map[0, 0, 0 : 0+PFH, 2 : 2+PFW] += X_toy[0, 0, 0 : 0+PFH, 2 : 2+PFW].max()==X_toy[0, 0, 0 : 0+PFH, 2 : 2+PFW]
out_map

array([[[[0., 1., 0., 1.],
         [0., 0., 1., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]],


       [[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]]])

In [51]:
out_map[0, 0, 2 : 2+PFH, 0 : 0+PFW] += X_toy[0, 0, 2 : 2+PFH, 0 : 0+PFW].max()==X_toy[0, 0, 2 : 2+PFH, 0 : 0+PFW]
out_map

array([[[[0., 1., 0., 1.],
         [0., 0., 1., 0.],
         [1., 0., 0., 0.],
         [0., 0., 0., 0.]]],


       [[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]]])

In [52]:
out_map[0, 0, 2 : 2+PFH, 2 : 2+PFW] += X_toy[0, 0, 2 : 2+PFH, 2 : 2+PFW].max()==X_toy[0, 0, 2 : 2+PFH, 2 : 2+PFW]
out_map

array([[[[0., 1., 0., 1.],
         [0., 0., 1., 0.],
         [1., 0., 0., 1.],
         [0., 0., 0., 0.]]],


       [[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]]])

In [53]:
# 処理を統合

N = 2
C = 1
H_out = 2
W_out = 2

out = np.zeros((N, C, H_out, W_out))
out_map = np.zeros((X_toy.shape))

for n in range(N):
    for c in range(C):
        for h in range(H_out): 
            for w in range(W_out):
                out[n, c, h, w] += X_toy[n, c, h*stride : h*stride+PFH, w*stride : w*stride+PFW].max()
                out_map[n, c, h*stride : h*stride+PFH, w*stride : w*stride+PFW] += X_toy[n, c, h*stride : h*stride+PFH, w*stride : w*stride+PFW].max()==X_toy[n, c, h*stride : h*stride+PFH, w*stride : w*stride+PFW]
                
print(out)
print(out_map)

[[[[8. 7.]
   [7. 3.]]]


 [[[5. 8.]
   [7. 8.]]]]
[[[[0. 1. 0. 1.]
   [0. 0. 1. 0.]
   [1. 0. 0. 1.]
   [0. 0. 0. 0.]]]


 [[[0. 1. 1. 0.]
   [0. 0. 0. 0.]
   [0. 1. 0. 0.]
   [0. 0. 0. 1.]]]]


### 4-1-3. トイデータによるbackward

In [54]:
dA = out
dA

array([[[[8., 7.],
         [7., 3.]]],


       [[[5., 8.],
         [7., 8.]]]])

In [55]:
out_map

array([[[[0., 1., 0., 1.],
         [0., 0., 1., 0.],
         [1., 0., 0., 1.],
         [0., 0., 0., 0.]]],


       [[[0., 1., 1., 0.],
         [0., 0., 0., 0.],
         [0., 1., 0., 0.],
         [0., 0., 0., 1.]]]])

In [56]:
# pooling_backward の出力の箱を用意しておく
dZ = np.zeros((out_map.shape))
dZ

array([[[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]],


       [[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]]])

In [57]:
# np.where で特徴マップの要素が１の部分を、誤差の値で置換する

dZ[0, 0, 0:0+PFH, 0:0+PFW] += np.where(out_map[0, 0, 0:0+PFH, 0:0+PFW]==1, out[0, 0, 0, 0], 0)
dZ

array([[[[0., 8., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]],


       [[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]]])

In [58]:
dZ[0, 0, 0:0+PFH, 2:2+PFW] += np.where(out_map[0, 0, 0:0+PFH, 2:2+PFW]==1, out[0, 0, 0, 1], 0)
dZ

array([[[[0., 8., 0., 7.],
         [0., 0., 7., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]],


       [[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]]])

In [59]:
dZ[0, 0, 2:2+PFH, 0:0+PFW] += np.where(out_map[0, 0, 2:2+PFH, 0:0+PFW]==1, out[0, 0, 1, 0], 0)
dZ

array([[[[0., 8., 0., 7.],
         [0., 0., 7., 0.],
         [7., 0., 0., 0.],
         [0., 0., 0., 0.]]],


       [[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]]])

In [60]:
dZ[0, 0, 2:2+PFH, 2:2+PFW] += np.where(out_map[0, 0, 2:2+PFH, 2:2+PFW]==1, out[0, 0, 1, 1], 0)
dZ

array([[[[0., 8., 0., 7.],
         [0., 0., 7., 0.],
         [7., 0., 0., 3.],
         [0., 0., 0., 0.]]],


       [[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]]])

In [61]:
# pooling_backward

dZ = np.zeros((out_map.shape))

for n in range(N):
    for c in range(C):
        for h in range(H_out):
            for w in range(W_out):
                dZ[n, c, h*stride : h*stride+PFH, w*stride : w*stride+PFW] += np.where(out_map[n, c, h*stride : h*stride+PFH, w*stride : w*stride+PFW]==1, out[n, c, h, w], 0)
                
dZ

array([[[[0., 8., 0., 7.],
         [0., 0., 7., 0.],
         [7., 0., 0., 3.],
         [0., 0., 0., 0.]]],


       [[[0., 5., 8., 0.],
         [0., 0., 0., 0.],
         [0., 7., 0., 0.],
         [0., 0., 0., 8.]]]])

### 4-2. max poolingのクラス化

In [62]:
class MaxPool2d():
    
    """
    ノード数n_nodes1からn_nodes2への全結合層
    Parameters
    ----------
    n_nodes1 : int
      前の層のノード数
    n_nodes2 : int
      後の層のノード数
    initializer : 初期化方法のインスタンス
    optimizer : 最適化手法のインスタンス
    """
    
    def __init__(self, stride, PFH, PFW):
        
        self.X = None
        
        self.N = None #入力サンプル数
        self.C = None # 入力チャネル数
        self.H = None #高さ方向の特徴量
        self.W = None #幅方向の特徴量
        
        self.O = None # 出力チャネル数
        self.PFH = PFH #フィルタ高さ
        self.PFW = PFW # フィルタ幅
        
        self.stride = stride
        
        self.H_out = None
        self.W_out = None
        
        self.out_map = None
        
        
    def calc_kernel_2d(H, W):
    
        H_out = (H + 2*pad - FH) //  stride + 1
        W_out = (W + 2*pad - FW) //  stride + 1
        return H_out, W_out

    
    def pool_forward(self, X):
        """
        フォワード
        
        Parameters
        ----------
        X : 次の形のndarray, shape (batch_size, n_nodes1)
            入力
            
        Returns
        ----------
        A : 次の形のndarray, shape (batch_size, n_nodes2)
            出力
        """
        # ストライド
        stride = self.stride
               
        # 入力の型
        N, C, H, W = X.shape
        
        self.N = N #コンストラクタに値を保存
        self.C = C
        self.H = H 
        self.W = W
        
        self.X = X
        
        # 出力サイズ
        H_out, W_out = calc_kernel_2d(H, W)
        
        self.H_out = H_out 
        self.W_out = W_out
        
        out = np.zeros((N, C, H_out, W_out))
        out_map = np.zeros((X_toy.shape))

        for n in range(N):
            for c in range(C):
                for h in range(H_out): 
                    for w in range(W_out):
                        out[n, c, h, w] += X_toy[n, c, h*stride : h*stride+PFH, w*stride : w*stride+PFW].max()
                        out_map[n, c, h*stride : h*stride+PFH, w*stride : w*stride+PFW] += X_toy[n, c, h*stride : h*stride+PFH, w*stride : w*stride+PFW].max()==X_toy[n, c, h*stride : h*stride+PFH, w*stride : w*stride+PFW]
        
        # コンストラクタに位置情報のマップを保存
        self.out_map = out_map 
        
        return out
    
    
    def pool_backward(self, dA):
        
        """
        バックワード
        
        Parameters
        ----------
        dA : 次の形のndarray, shape (batch_size, n_nodes2)
            後ろから流れてきた勾配
            
        Returns
        ----------
        dZ : 次の形のndarray, shape (batch_size, n_nodes1)
            前に流す勾配
            
        """
        
        N = self.N #入力サンプル数
        C = self.C # 入力チャネル数
        H = self.H #高さ方向の特徴量
        W = self.W #幅方向の特徴量
        
        #O = self.O # 出力チャネル数
        PFH = self.PFH #フィルタ高さ
        PFW = self.PFW # フィルタ幅
        
        H_out = self.H_out
        W_out = self.W_out
        
        stride = self.stride
        
        out_map = self.out_map
        
        dZ = np.zeros((out_map.shape))

        for n in range(N):
            for c in range(C):
                for h in range(H_out):
                    for w in range(W_out):
                        dZ[n, c, h*stride : h*stride+PFH, w*stride : w*stride+PFW] += np.where(out_map[n, c, h*stride : h*stride+PFH, w*stride : w*stride+PFW]==1, out[n, c, h, w], 0)
        
        return dZ

In [63]:
# MaxPool2d クラスのインスタンス生成

pool2d = MaxPool2d(stride=2, PFH=2, PFW=2)

In [64]:
# forward (strideが2, フィルタサイズが2×2、入力のサイズが４×４) 

out = pool2d.pool_forward(X_toy)
out

array([[[[8., 7.],
         [7., 3.]]],


       [[[5., 8.],
         [7., 8.]]]])

In [65]:
# backward

pool2d.pool_backward(out)

array([[[[0., 8., 0., 7.],
         [0., 0., 7., 0.],
         [7., 0., 0., 3.],
         [0., 0., 0., 0.]]],


       [[[0., 5., 8., 0.],
         [0., 0., 0., 0.],
         [0., 7., 0., 0.],
         [0., 0., 0., 8.]]]])

## 【問題6】平滑化

In [66]:
X_toy.shape, X_toy # N, C, H, W

((2, 1, 4, 4),
 array([[[[5, 8, 2, 7],
          [5, 3, 7, 1],
          [7, 3, 2, 3],
          [5, 4, 2, 2]]],
 
 
        [[[4, 5, 8, 2],
          [3, 4, 4, 5],
          [4, 7, 5, 2],
          [1, 5, 3, 8]]]]))

In [67]:
# backward用の型を保存

N, C, H, W = X_toy.shape

In [68]:
# forward

out = X_toy.reshape(2, -1) #N, C+H+W
out

array([[5, 8, 2, 7, 5, 3, 7, 1, 7, 3, 2, 3, 5, 4, 2, 2],
       [4, 5, 8, 2, 3, 4, 4, 5, 4, 7, 5, 2, 1, 5, 3, 8]])

In [69]:
# backward

dout = out.reshape(N, C, H, W)
dout

array([[[[5, 8, 2, 7],
         [5, 3, 7, 1],
         [7, 3, 2, 3],
         [5, 4, 2, 2]]],


       [[[4, 5, 8, 2],
         [3, 4, 4, 5],
         [4, 7, 5, 2],
         [1, 5, 3, 8]]]])

In [70]:
class Flatten():
    
    def __init__(self):
        
        self.N = None
        self.C = None
        self.H = None
        self.W = None

        
    def flat_forward(self, X):
        
        self.N, self.C, self.H, self.W = X_toy.shape
        out = X_toy.reshape(N, -1)
        return out
    
    
    def flat_backward(self, dA):
        
        dout = dA.reshape(self.N, self.C, self.H, self.W)
        return dout

In [71]:
flat = Flatten()

In [72]:
# forward

out = flat.flat_forward(X_toy)
out

array([[5, 8, 2, 7, 5, 3, 7, 1, 7, 3, 2, 3, 5, 4, 2, 2],
       [4, 5, 8, 2, 3, 4, 4, 5, 4, 7, 5, 2, 1, 5, 3, 8]])

In [73]:
# backward

flat.flat_backward(out)

array([[[[5, 8, 2, 7],
         [5, 3, 7, 1],
         [7, 3, 2, 3],
         [5, 4, 2, 2]]],


       [[[4, 5, 8, 2],
         [3, 4, 4, 5],
         [4, 7, 5, 2],
         [1, 5, 3, 8]]]])

## 【問題7】学習と推定

## Data & Util

In [74]:
#from keras.datasets import mnist
from tensorflow import keras

#(X_train, y_train), (X_test, y_test) = mnist.load_data()
(X_train, y_train), (X_test, y_test) = keras.datasets.mnist.load_data()

In [75]:
X_train = X_train[:, np.newaxis, :, :] #NCHW
X_train.shape

(60000, 1, 28, 28)

In [76]:
# 前処理

#X_train = X_train.astype(np.float)
#X_test = X_test.astype(np.float)
#X_train /= 255
#X_test /= 255

In [77]:
# 8:2にtrain_test_split

from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2)

In [78]:
class GetMiniBatch:
    
    """
    ミニバッチを取得するイテレータ
    
    Parameters
    ----------
    X : 次の形のndarray, shape (n_samples, n_features)
      訓練データ
    y : 次の形のndarray, shape (n_samples, 1)
      正解値
    batch_size : int
      バッチサイズ
    seed : int
      NumPyの乱数のシード
      
    """
    
    def __init__(self, X, y, batch_size = 20, seed=0):
        
        self.batch_size = batch_size
        np.random.seed(seed)
        shuffle_index = np.random.permutation(np.arange(X.shape[0]))
        self._X = X[shuffle_index]
        self._y = y[shuffle_index]
        self._stop = np.ceil(X.shape[0]/self.batch_size).astype(np.int)
        
        
    def __len__(self):
        return self._stop
    
    
    def __getitem__(self,item):
        p0 = item*self.batch_size
        p1 = item*self.batch_size + self.batch_size
        return self._X[p0:p1], self._y[p0:p1]
    
    
    def __iter__(self):
        self._counter = 0
        return self
    
    
    def __next__(self):
        
        if self._counter >= self._stop:
            raise StopIteration()
            
        p0 = self._counter*self.batch_size
        p1 = self._counter*self.batch_size + self.batch_size
        self._counter += 1
        return self._X[p0:p1], self._y[p0:p1]

## Layer

In [79]:
# 全結合層

class FC:
    
    """
    ノード数n_nodes1からn_nodes2への全結合層
    Parameters
    ----------
    n_nodes1 : int
      前の層のノード数
    n_nodes2 : int
      後の層のノード数
    initializer : 初期化方法のインスタンス
    optimizer : 最適化手法のインスタンス
    """
    
    def __init__(self, n_feature, n_output, initializer, optimizer):
        
        self.optimizer = optimizer
        
        self.W = initializer.W(n_feature, n_output)
        self.B = initializer.B(n_output)
       
        self.X = None
        self.dZ = None
        
    
    def forward(self, X):
        """
        フォワード
        
        Parameters
        ----------
        X : 次の形のndarray, shape (batch_size, n_nodes1)
            入力
            
        Returns
        ----------
        A : 次の形のndarray, shape (batch_size, n_nodes2)
            出力
        """
        
        self.X = X
        
        # アフィン変換
        A = X.dot(self.W) + self.B

        return A
    
    
    def backward(self, dA):
        
        """
        バックワード
        
        Parameters
        ----------
        dA : 次の形のndarray, shape (batch_size, n_nodes2)
            後ろから流れてきた勾配
            
        Returns
        ----------
        dZ : 次の形のndarray, shape (batch_size, n_nodes1)
            前に流す勾配
            
        """
        # 前のノードに流す勾配
        self.dZ = np.dot(dA, self.W.T)
        
        # バイアスに流す勾配
        self.dB = np.sum(dA, axis=0)
        
        # 重み行列に流す勾配
        self.dW = np.dot(self.X.T, dA)
        
        # 更新
        self.optimizer.update(self)
        
        return self.dZ

In [80]:
# 入出力チャネル数が同一の場合

class Conv2d():
    
    """
    ノード数n_nodes1からn_nodes2への全結合層
    Parameters
    ----------
    n_nodes1 : int
      前の層のノード数
    n_nodes2 : int
      後の層のノード数
    initializer : 初期化方法のインスタンス
    optimizer : 最適化手法のインスタンス
    """
    
    def __init__(self, initializer, optimizer, N, C, FH=3, FW=3, stride=1, pad=1, lr = 0.001):
        
        self.optimizer = optimizer
        
        #forward
        self.X = None
        
        self.N = None #入力サンプル数
        self.C = None # 入力チャネル数
        self.H = None #高さ方向の特徴量
        self.W = None #幅方向の特徴量
        
        self.O = None # 出力チャネル数
        self.FH = FH #フィルタ高さ
        self.FW = FW # フィルタ幅
        
        self.initializer = initializer 
        self.F = self.initializer.W(N, C, FH, FW)
        self.B = self.initializer.B(C)
        
        self.pad = pad
        self.stride = stride
        
        self.H_out = None
        self.W_out = None
        
        self.delta_f = None
        self.delta_x = None
        self.delta_b = None
        
        self.lr = lr
    
    
    def conv_forward(self, X):
        """
        フォワード
        
        Parameters
        ----------
        X : 次の形のndarray, shape (batch_size, n_nodes1)
            入力
            
        Returns
        ----------
        A : 次の形のndarray, shape (batch_size, n_nodes2)
            出力
        """
        # ストライド・パディング
        stride = self.stride
        pad = self.pad
               
        # 入力にパディング
        X = np.pad(X, [(0, 0), (0, 0),  (pad, pad), (pad, pad)])
        
        # 入力の型を取る
        N, C, H, W = X.shape
        
        self.N = N
        self.C = C
        self.H = H #pad後
        self.W = W #pad後
        
        self.X = X
        
        # 重みの型を取る
        N, C, FH, FW = self.F.shape
        
        F = self.F
        B = self.B

        # 出力サイズ
        H_out = (H - FH) //  stride + 1
        W_out = (W - FW) //  stride + 1
        
        self.H_out = H_out #backward用
        self.W_out = W_out #backward用
        
        out = np.zeros((N, C, H_out, W_out))

        for n in range(N):
            for c in range(C):
                for h in range(H_out): 
                    for w in range(W_out):
                        out[n, c, h, w] += (X[n, c, (h*stride) : (h*stride)+FH, (w*stride) : (w*stride)+FW] * F[n, c, :, :]).sum()
                    out[n, c, :, :] += B
    
        return out
    
    
    def conv_backward(self, dA):
        
        """
        バックワード
        
        Parameters
        ----------
        dA : 次の形のndarray, shape (batch_size, n_nodes2)
            後ろから流れてきた勾配
            
        Returns
        ----------
        dZ : 次の形のndarray, shape (batch_size, n_nodes1)
            前に流す勾配
            
        """
        
        N = self.N #入力サンプル数
        C = self.C # 入力チャネル数
        H = self.H #高さ方向の特徴量
        W = self.W #幅方向の特徴量
        
        O = self.O # 出力チャネル数
        FH = self.FH #フィルタ高さ
        FW = self.FW # フィルタ幅
        
        H_out = self.H_out
        W_out = self.W_out
        
        # ストライド・パディング
        stride = self.stride
        pad = self.pad
        
        X = self.X
        
        # delta_x
        delta_x = np.zeros((self.X.shape))
        F = self.F
        
        for n in range(N):
            for c in range(C):
                for h in range(H_out): 
                    for w in range(W_out):
                        delta_x[n, c, (h*stride) : (h*stride)+FH, (w*stride) : (w*stride)+FW] += dA[n, c, h, w] * F[n, c, :, :]
        
        delta_x = delta_x[:, :, pad : -pad, pad : -pad]
        self.delta_x = delta_x
        

        # delta_b
        delta_b = np.zeros((self.B.shape))

        for n in range(N):
            for c in range(C):
                for h in range(H_out): 
                    for w in range(W_out):
                        delta_b[c] += dA[n, c, h, w].sum()   
                        
        self.delta_b = delta_b

        
        # delta_f
        delta_f = np.zeros((self.F.shape))
        
        for n in range(N):
            for c in range(C):
                for h in range(H_out): 
                    for w in range(W_out):
                        delta_f[n, c, :, :] += dA[n, c, h, w] * X[n, c, (h*stride) : (h*stride)+FH, (w*stride) : (w*stride)+FW]
                        
        self.delta_f = delta_f
                        
        
        # optimizerに流す前にデータ型を揃える
        self.delta_f = delta_f.astype(np.float64)
        self.delta_b = delta_b.astype(np.float64)
        
        # 更新
        self.F -= self.lr * delta_f
        self.B -= self.lr * delta_b
        #self.optimizer.update(self)
        
        #print("self.F" + str(self.F))
        #print("self.B" + str(self.B))
        
        return delta_x

In [81]:
# Max Pooling層

class MaxPool2d():
    
    """
    ノード数n_nodes1からn_nodes2への全結合層
    Parameters
    ----------
    n_nodes1 : int
      前の層のノード数
    n_nodes2 : int
      後の層のノード数
    initializer : 初期化方法のインスタンス
    optimizer : 最適化手法のインスタンス
    """
    
    def __init__(self, stride, PFH, PFW):
        
        self.X = None
        
        self.N = None #入力サンプル数
        self.C = None # 入力チャネル数
        self.H = None #高さ方向の特徴量
        self.W = None #幅方向の特徴量
        
        self.O = None # 出力チャネル数
        self.PFH = PFH #フィルタ高さ
        self.PFW = PFW # フィルタ幅
        
        self.stride = stride
        
        self.H_out = None
        self.W_out = None
        
        self.out_map = None
        
        
    def calc_kernel_2d(self, H, W):
    
        H_out = (H + 2*pad - FH) //  stride + 1
        W_out = (W + 2*pad - FW) //  stride + 1
        return H_out, W_out

    
    def pool_forward(self, X):
        """
        フォワード
        
        Parameters
        ----------
        X : 次の形のndarray, shape (batch_size, n_nodes1)
            入力
            
        Returns
        ----------
        A : 次の形のndarray, shape (batch_size, n_nodes2)
            出力
        """
        # ストライド
        stride = self.stride
               
        # 入力の型
        N, C, H, W = X.shape
        
        self.N = N #コンストラクタに値を保存
        self.C = C
        self.H = H 
        self.W = W
        
        self.X = X
        
        # 出力サイズ
        H_out, W_out = self.calc_kernel_2d(H, W)
        
        self.H_out = H_out 
        self.W_out = W_out
        
        out = np.zeros([N, C, H_out, W_out])
        out_map = np.zeros((X.shape))


        for n in range(N):
            for c in range(C):
                for h in range(H_out): 
                    for w in range(W_out):
                        out[n, c, h, w] += np.max(X[n, c, h*stride : h*stride+PFH, w*stride : w*stride+PFW])
                        out_map[n, c, h*stride : h*stride+PFH, w*stride : w*stride+PFW] += X[n, c, h*stride : h*stride+PFH, w*stride : w*stride+PFW].max()==X[n, c, h*stride : h*stride+PFH, w*stride : w*stride+PFW]
        
        # コンストラクタに位置情報のマップを保存
        self.out_map = out_map 
        
        return out
    
    
    def pool_backward(self, dA):
        
        """
        バックワード
        
        Parameters
        ----------
        dA : 次の形のndarray, shape (batch_size, n_nodes2)
            後ろから流れてきた勾配
            
        Returns
        ----------
        dZ : 次の形のndarray, shape (batch_size, n_nodes1)
            前に流す勾配
            
        """
        
        N = self.N #入力サンプル数
        C = self.C # 入力チャネル数
        H = self.H #高さ方向の特徴量
        W = self.W #幅方向の特徴量
        
        PFH = self.PFH #フィルタ高さ
        PFW = self.PFW # フィルタ幅
        
        H_out = self.H_out
        W_out = self.W_out
        
        stride = self.stride
        
        out_map = self.out_map
        
        dZ = np.zeros((out_map.shape))

        for n in range(N):
            for c in range(C):
                for h in range(H_out):
                    for w in range(W_out):
                        dZ[n, c, h*stride : h*stride+PFH, w*stride : w*stride+PFW] += np.where(out_map[n, c, h*stride : h*stride+PFH, w*stride : w*stride+PFW]==1, dA[n, c, h, w], 0)
        
        return dZ

In [82]:
# 平滑化層

class Flatten():
    
    def __init__(self):
        
        self.N = None
        self.C = None
        self.H = None
        self.W = None

        
    def flat_forward(self, X):
        
        self.N, self.C, self.H, self.W = X.shape
        out = X.reshape(self.N, -1)
        return out
    
    
    def flat_backward(self, dA):
        
        dout = dA.reshape(self.N, self.C, self.H, self.W)
        return dout

## Activation

In [83]:
class Tanh:
         
    def forward(self, A):

        self.Z = (np.exp(A) - np.exp(-A)) / (np.exp(A) + np.exp(-A))
        return self.Z
    
    
    def backward(self, dZ):
        
        dA = dZ*(1 - self.Z)**2
        return dA

In [84]:
class Softmax:
    
    def forward(self, A):
        
        A = A - np.max(A, axis=1, keepdims=True)
        Z = np.exp(A) / np.sum(np.exp(A), axis=1, keepdims=True)
        return Z
        
        
    def backward(self, dZ, Y):
        
        batch_size = dZ.shape[0]
        dA = (dZ - Y) / batch_size
        
        loss = (-1)*np.sum(Y * np.log(dZ + 1e-7))  / batch_size
        
        
        return dA, loss

In [85]:
class ReLU:
    
    def forward(self, A):

        self.A = A
        Z = np.maximum(0, A)
        return Z
        
    
    def backward(self, dZ):
        
        dA = np.where(self.A>0, dZ, 0)
        return dA

## Initializer

In [86]:
class SimpleInitializer():
    
    """
    ガウス分布によるシンプルな初期化
    Parameters
    ----------
    sigma : float
      ガウス分布の標準偏差
    """
    
    def __init__(self, sigma=0.01):
        
        self.sigma = sigma
        
        
    def W(self, n_nodes1, n_nodes2):
        
        """
        重みの初期化
        
        Parameters
        ----------
        n_nodes1 : int
          前の層のノード数
        n_nodes2 : int
          後の層のノード数
          
        Returns
        ----------
        W :float
            重みの初期値
            
        """
        
        W = self.sigma * np.random.randn(n_nodes1, n_nodes2)
        
        return W
    
    
    def B(self, n_nodes2):
        
        """
        バイアスの初期化
        
        Parameters
        ----------
        n_nodes2 : int
          後の層のノード数
          
        Returns
        ----------
        B :float
            バイアスの初期値
            
        """
        
        B = self.sigma * np.random.randn(n_nodes2)
        
        return B

In [87]:
class ConvInitializer():
    
    """
    ガウス分布によるシンプルな初期化
    Parameters
    ----------
    sigma : float
      ガウス分布の標準偏差
    """
    
    def __init__(self, sigma=0.01):
        
        self.sigma = sigma
        
        
    def W(self, N, C, FH, FW):
        
        """
        重みの初期化
        
        Parameters
        ----------
        n_nodes1 : int
          前の層のノード数
        n_nodes2 : int
          後の層のノード数
          
        Returns
        ----------
        W :float
            重みの初期値
            
        """
        
        W = self.sigma * np.random.randn(N, C, FH, FW)
        
        return W
    
    
    def B(self, N):
        
        """
        バイアスの初期化
        
        Parameters
        ----------
        n_nodes2 : int
          後の層のノード数
          
        Returns
        ----------
        B :float
            バイアスの初期値
            
        """
        
        B = self.sigma * np.random.randn(N)
        
        return B

## Optimizer

In [88]:
class SGD():
    
    """
    確率的勾配降下法
    
    Parameters
    ----------
    lr : 学習率
    
    """
    
    def __init__(self, lr):
        
        self.lr = lr
        
        
    def update(self, layer):
        
        """
        ある層の重みやバイアスの更新
        
        Parameters
        ----------
        layer : 更新前の層のインスタンス
        
        """

        layer.W -= self.lr * layer.dW
        layer.B -= self.lr * layer.dB

## Trainer

In [89]:
class ScratchConvolutionalNeuralNetrowkClassifier():
    
    """
    任意の構成で学習と推定が行えるCNNクラス
    
    """
    
    def __init__(self, fc_initializer, conv_initializer, optimizer, activator1, activator2, lr = 0.01,  batch_size = 20, n_features = 49, n_output = 10, sigma=0.01, n_epochs=20, verbose = True):
        
        self.verbose = verbose
        
        self.n_epochs = n_epochs
        self.batch_size = batch_size
        self.n_features = n_features
        self.n_output = n_output
        
        self.Conv1 = None
        self.Conv2 = None
        
        self.Pool1 = None
        self.Pool2 = None
        
        self.Flat = None
        self.FC = None
        
        self.fc_initializer = fc_initializer
        self.conv_initializer = conv_initializer
        
        self.optimizer = optimizer
        self.lr = lr
        
        self.activator1 = activator1
        self.activator2 = activator2   
        
        self.sigma = sigma
        
        self.loss = None
        self.val_loss = None
        
        self.loss_hist = []
        self.val_loss_hist = []
        
        
    # 訓練時に１エポック分の処理を行う関数
    def forward_and_backward(self, X, y):
        
        # one-hot変換
        Y = np.identity(10)[y]
        
        
        # サンプルコード2 : フォワード
        A1 = self.Conv1.conv_forward(X)
        #print("A1"+ str(A1.shape))
        
        Z1 = self.activation1.forward(A1)
        #print("Z1" + str(Z1.shape))
        
        P1 = self.Pool1.pool_forward(Z1)
        #print("P1" + str(P1.shape))
        
        A2 = self.Conv2.conv_forward(P1)
        #print("A2" + str(A2.shape))
        
        Z2 = self.activation2.forward(A2)
        #print("Z2"+str(Z2.shape))
        
        P2 = self.Pool2.pool_forward(Z2)
        #print("P2"+str(P2.shape))
        
        F1 = self.Flat.flat_forward(P2)
        #print("F1"+str(F1.shape))
        
        A3 = self.FC.forward(F1)
        #print("A3"+str(A3.shape))
        
        Z3 = self.activation3.forward(A3)        
        #print("Z3"+str(Z3.shape))
        
    
        # サンプルコード3 : バックワード
        dA3, self.loss = self.activation3.backward(Z3, Y) # 交差エントロピー誤差とソフトマックスを合わせている
        #print("dA3"+str(dA3.shape))
            
        dZ2 = self.FC.backward(dA3)
        #print("dZ2"+str(dZ2.shape))
        
        dF1 = self.Flat.flat_backward(dZ2)
        #print("dF1"+str(dF1.shape))
        
        dP2 = self.Pool2.pool_backward(dF1)
        #print("dP2"+str(dP2.shape))
        
        dA2 = self.activation2.backward(dP2)
        #print("dA2"+str(dA2.shape))
        
        dZ1 = self.Conv2.conv_backward(dA2)
        #print("dZ1"+str(dZ1.shape))
        
        dP1 = self.Pool1.pool_backward(dZ1)
        #print("dP1" + str(dP1.shape))
        
        dA1 = self.activation1.backward(dP1)
        #print("dA1"+str(dA1.shape))
        
        dZ0 = self.Conv1.conv_backward(dA1) # dZ0は使用しない
        #print("dZ0" + str(dZ0.shape))
    
    
    # このエポックで更新した重みを使って検証する関数
    def forward_with_loss(self, X, y):
        
        Y = np.identity(10)[y]
        
        # サンプルコード2 : フォワード
        A1 = self.Conv1.conv_forward(X)
        #print("A1"+ str(A1.shape))
        
        Z1 = self.activation1.forward(A1)
        #print("Z1" + str(Z1.shape))
        
        P1 = self.Pool1.pool_forward(Z1)
        #print("P1" + str(P1.shape))
        
        A2 = self.Conv2.conv_forward(P1)
        #print("A2" + str(A2.shape))
        
        Z2 = self.activation2.forward(A2)
        #print("Z2"+str(Z2.shape))
        
        P2 = self.Pool2.pool_forward(Z2)
        #print("P2"+str(P2.shape))
        
        F1 = self.Flat.flat_forward(P2)
        #print("F1"+str(F1.shape))
        
        A3 = self.FC.forward(F1)
        #print("A3"+str(A3.shape))
        
        Z3 = self.activation3.forward(A3)        
        #print("Z3"+str(Z3.shape))
        
        dA3, self.val_loss = self.activation3.backward(Z3, Y) 
        
        
    
    def fit(self, X, y, X_val=None, y_val=None):
        
        """
        self.sigma : ガウス分布の標準偏差
        self.lr : 学習率
        self.n_nodes1 : 1層目のノード数
        self.n_nodes2 : 2層目のノード数
        self.n_output : 出力層のノード数
        """
        #initializer, optimizer, lr = 0.001, FH=3, FW=3, stride=1, pad=1
        self.Conv1 = Conv2d(self.conv_initializer(self.sigma), self.optimizer(self.lr), N=20, C=1, FH=3, FW=3, stride=1, pad=1)
        self.activation1 = self.activator1
    
        self.Pool1 = MaxPool2d(stride=2, PFH=2, PFW=2)
        
        self.Conv2 = Conv2d(self.conv_initializer(self.sigma), self.optimizer(self.lr), N=20, C=1, FH=3, FW=3, stride=1, pad=1)
        self.activation2 = self.activator2
        
        self.Pool2 = MaxPool2d(stride=2, PFH=2, PFW=2)
        
        self.Flat = Flatten()
        
        self.FC = FC(self.n_features, self.n_output, self.fc_initializer(self.sigma), self.optimizer(self.lr))
        self.activation3 = Softmax()
        
        
        # 訓練データのミニバッチを生成
        get_mini_batch_train = GetMiniBatch(X, y, batch_size=self.batch_size)
        
        # 検証データのミニバッチを生成
        get_mini_batch_val = GetMiniBatch(X_val, y_val, batch_size=self.batch_size)
        
        
        for epoch in range(self.n_epochs):
            
            for mini_X_train, mini_y_train in get_mini_batch_train:
                
                # 順伝播・クロスエントロピー誤差・逆伝播
                self.forward_and_backward(mini_X_train, mini_y_train)
                
            self.loss_hist.append(self.loss)
            
            # 検証
            mini_X_val, mini_y_val = get_mini_batch_val[0]
            self.forward_with_loss(mini_X_val, mini_y_val)
            self.val_loss_hist.append(self.val_loss)
        
            
            if self.verbose:

                print('#'*25)
                print('### Epoch %i'%(epoch+1))
                print('#'*25)
                
                print("訓練データの損失 : {}".format(self.loss))
                print("検証データの損失 : {}".format(self.val_loss))
                
                print()
                
                
    
    def predict(self, X):
        
        preds = []
        
        for i in range(int(X.shape[0] // self.batch_size)):
            
            X_batch = X[i*self.batch_size : i*self.batch_size + self.batch_size]     
        
            # サンプルコード2 : フォワード
            A1 = self.Conv1.conv_forward(X_batch)
            #print("A1"+ str(A1.shape))

            Z1 = self.activation1.forward(A1)
            #print("Z1" + str(Z1.shape))

            P1 = self.Pool1.pool_forward(Z1)
            #print("P1" + str(P1.shape))

            A2 = self.Conv2.conv_forward(P1)
            #print("A2" + str(A2.shape))

            Z2 = self.activation2.forward(A2)
            #print("Z2"+str(Z2.shape))

            P2 = self.Pool2.pool_forward(Z2)
            #print("P2"+str(P2.shape))

            F1 = self.Flat.flat_forward(P2)
            #print("F1"+str(F1.shape))

            A3 = self.FC.forward(F1)
            #print("A3"+str(A3.shape))

            Z3 = self.activation3.forward(A3)        
            #print("Z3"+str(Z3.shape))

            pred = Z3.argmax(axis=1)
            preds.append(pred)
            
        out = np.array(preds)
        out = out.ravel()
        
        return out

In [90]:
cnn = ScratchConvolutionalNeuralNetrowkClassifier(fc_initializer=SimpleInitializer,
                                                                                conv_initializer = ConvInitializer,
                                                                                optimizer = SGD,
                                                                                lr=0.001, 
                                                                                sigma=0.01, 
                                                                                activator1=ReLU(), 
                                                                                activator2=ReLU(), 
                                                                                n_epochs=10,
                                                                                verbose = True
                                                                                )

In [91]:
%%time

cnn.fit(X_train, y_train, X_val, y_val)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  self._stop = np.ceil(X.shape[0]/self.batch_size).astype(np.int)


KeyboardInterrupt: 

In [None]:
cnn.val_loss

0.0853051847172703

In [None]:
X_val.shape

(12000, 1, 28, 28)

In [None]:
X_test.shape

(10000, 28, 28)

In [None]:
X_test2 = X_test[:, np.newaxis, :, :]
X_test2.shape

(10000, 1, 28, 28)

In [None]:
y_pred = cnn.predict(X_test2)
len(y_pred), y_pred

(10000, array([7, 2, 1, ..., 4, 5, 6]))

In [None]:
len(y_test), y_test

(10000, array([7, 2, 1, ..., 4, 5, 6], dtype=uint8))

In [None]:
# Accuracy

from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_test, y_pred)
accuracy

0.9015

## LeNet

In [None]:
# LeNetの入力チャネルと出力チャネルの変化に対応

class ConvInitializer():
    
    """
    ガウス分布によるシンプルな初期化
    Parameters
    ----------
    sigma : float
      ガウス分布の標準偏差
    """
    
    def __init__(self, sigma=0.01):
        
        self.sigma = sigma
        
        
    def W(self, N, O, C, FH, FW):
        
        """
        重みの初期化
        
        Parameters
        ----------
        n_nodes1 : int
          前の層のノード数
        n_nodes2 : int
          後の層のノード数
          
        Returns
        ----------
        W :float
            重みの初期値
            
        """
        
        W = self.sigma * np.random.randn(N, O, C, FH, FW)
        
        return W
    
    
    def B(self, N, O):
        
        """
        バイアスの初期化
        
        Parameters
        ----------
        n_nodes2 : int
          後の層のノード数
          
        Returns
        ----------
        B :float
            バイアスの初期値
            
        """
        
        B = self.sigma * np.random.randn(N, O)
        
        return B

In [None]:
# LeNetの入力チャネルと出力チャネルの変化に対応

class Conv2d():
    
    """
    ノード数n_nodes1からn_nodes2への全結合層
    Parameters
    ----------
    n_nodes1 : int
      前の層のノード数
    n_nodes2 : int
      後の層のノード数
    initializer : 初期化方法のインスタンス
    optimizer : 最適化手法のインスタンス
    """
    
    def __init__(self, initializer, N, O, C, FH=3, FW=3, stride=1, pad=1, lr = 0.01):
        
        #self.optimizer = optimizer
        
        #forward
        self.X = None
        
        self.N = N
        self.O = O
        self.C = C
        self.H = None #高さ方向の特徴量
        self.W = None #幅方向の特徴量
        
        self.FH = FH #フィルタ高さ
        self.FW = FW # フィルタ幅
        
        self.initializer = initializer 
        self.F = self.initializer.W(N, O, C, FH, FW)
        self.B = self.initializer.B(N, O)
        
        self.pad = pad
        self.stride = stride
        
        self.H_out = None
        self.W_out = None
        
        # self.dW = None
        # self.dB = None
        self.delta_f = None
        self.delta_x = None
        self.delta_b = None
        
        self.lr = lr
    
    
    def conv_forward(self, X):
        """
        フォワード
        
        Parameters
        ----------
        X : 次の形のndarray, shape (batch_size, n_nodes1)
            入力
            
        Returns
        ----------
        A : 次の形のndarray, shape (batch_size, n_nodes2)
            出力
        """
        # ストライド・パディング
        stride = self.stride
        pad = self.pad
               
        # 入力にパディング
        X = np.pad(X, [(0, 0), (0, 0),  (pad, pad), (pad, pad)])
        
        # 入力の型を取る
        _, _, H, W = X.shape
        
        self.H = H #pad後
        self.W = W #pad後
        self.X = X
        
        O = self.O
        N = self.N
        
        FH = self.FH
        FW = self.FW
        
        F = self.F
        B = self.B

        # 出力サイズ
        H_out = (H - FH) //  stride + 1
        W_out = (W - FW) //  stride + 1
        
        self.H_out = H_out #backward用
        self.W_out = W_out #backward用
        
        out = np.zeros((N, O, H_out, W_out))
        
        for n in range(N):
            for o in range(O):
                for c in range(C):
                    for h in range(H_out): 
                        for w in range(W_out):
                            out[n, o, h, w] += (X[n, c, (h*stride) : (h*stride)+FH, (w*stride) : (w*stride)+FW] * F[n, o, c, :, :]).sum()
                    out[n, :, :, :] += B[n, o]
    
        return out
    
    
    def conv_backward(self, dA):
        
        """
        バックワード
        
        Parameters
        ----------
        dA : 次の形のndarray, shape (batch_size, n_nodes2)
            後ろから流れてきた勾配
            
        Returns
        ----------
        dZ : 次の形のndarray, shape (batch_size, n_nodes1)
            前に流す勾配
            
        """
        
        N = self.N #入力サンプル数
        C = self.C # 入力チャネル数
        H = self.H #高さ方向の特徴量
        W = self.W #幅方向の特徴量
        
        FH = self.FH #フィルタ高さ
        FW = self.FW # フィルタ幅
        
        H_out = self.H_out
        W_out = self.W_out
        
        # ストライド・パディング
        stride = self.stride
        pad = self.pad
        
        X = self.X
        
        # delta_x
        delta_x = np.zeros((self.X.shape))
        F = self.F
        
    
        for n in range(N):
            for o in range(O):
                for c in range(C):
                    for h in range(H_out): 
                        for w in range(W_out):
                            delta_x[n, c, h*stride : h*stride+FH, w*stride : w*stride+FW] += dA[n, o, h, w] * F[n, o, c, :, :]
        
        
        if pad ==0:
            pass
        else:
            delta_x = delta_x[:, :, pad : -pad, pad : -pad]
        
        
        self.delta_x = delta_x
        

        # delta_b
        delta_b = np.zeros((self.B.shape))

        for n in range(N):
            for o in range(O):
                for h in range(H_out): 
                    for w in range(W_out):
                        delta_b[n, o] += dA[n, o, :, :].sum()   
                        
        self.delta_b = delta_b

        # delta_f
        delta_f = np.zeros((self.F.shape))
        
        
        for n in range(N):
            for o in range(O):
                for c in range(C):
                    for h in range(H_out): 
                        for w in range(W_out):
                            delta_f[n, o, c,  :, :] += dA[n, o, h, w] * X[n, c, (h*stride) : (h*stride)+FH, (w*stride) : (w*stride)+FW]
                                    
        self.delta_f = delta_f
                        
        # optimizerに流す前にデータ型を揃える
        self.delta_f = delta_f.astype(np.float64)
        self.delta_b = delta_b.astype(np.float64)
        
        # 更新
        self.F -= self.lr * delta_f
        self.B -= self.lr * delta_b
        #self.optimizer.update(self)
        
        #print("self.F" + str(self.F))
        #print("self.B" + str(self.B))
        
        return delta_x

In [None]:
class LeNet():
    
    """
    任意の構成で学習と推定が行えるCNNクラス
    
    """
    
    def __init__(self, fc_initializer, conv_initializer, optimizer, activator1, activator2, activator3, activator4, activator5, lr = 0.01,  batch_size = 20, n_features = 49, n_output = 10, sigma=0.01, n_epochs=20, verbose = True):
        
        self.verbose = verbose
        
        self.n_epochs = n_epochs
        self.batch_size = batch_size
        self.n_features = n_features
        self.n_output = n_output
        
        self.Conv1 = None
        self.Conv2 = None
        
        self.Pool1 = None
        self.Pool2 = None
        
        self.Flat = None
        self.FC = None
        
        self.fc_initializer = fc_initializer
        self.conv_initializer = conv_initializer
        
        self.optimizer = optimizer
        self.lr = lr
        
        self.activator1 = activator1
        self.activator2 = activator2
        self.activator3 = activator3
        self.activator4 = activator4
        self.activator5 = activator5
        
        self.sigma = sigma
        
        self.loss = None
        self.val_loss = None
        
        self.loss_hist = []
        self.val_loss_hist = []
        
        
    # 訓練時に１エポック分の処理を行う関数
    def forward_and_backward(self, X, y):
        
        # one-hot変換
        Y = np.identity(10)[y]
        
        
        # サンプルコード2 : フォワード
        A1 = self.Conv1.conv_forward(X)
        #print("A1"+ str(A1.shape))
        
        Z1 = self.activation1.forward(A1)
        #print("Z1" + str(Z1.shape))
        
        P1 = self.Pool1.pool_forward(Z1)
        #print("P1" + str(P1.shape))
        
        A2 = self.Conv2.conv_forward(P1)
        #print("A2" + str(A2.shape))
        
        Z2 = self.activation2.forward(A2)
        #print("Z2"+str(Z2.shape))
        
        P2 = self.Pool2.pool_forward(Z2)
        #print("P2"+str(P2.shape))
        
        Z3 = self.activation3.forward(P2) #Tanh
        #print("Z3"+str(Z3.shape))
        
        F1 = self.Flat.flat_forward(Z3)
        #print("F1"+str(F1.shape))
        
        A3 = self.FC1.forward(F1)
        #print("A3"+str(A3.shape))
        
        Z4 = self.activation4.forward(A3) # ReLU
        #print("Z4"+str(Z4.shape))
        
        A4 = self.FC2.forward(Z4)
        #print("A4"+str(A4.shape))
        
        Z5 = self.activation5.forward(A4) # ReLU
        #print("Z5"+str(Z5.shape))
        
        A5 = self.FC3.forward(Z5)
        #print("A5"+str(A5.shape))
        
        Z6 = self.activation6.forward(A5) # Softmax  
        #print("Z6"+str(Z6.shape))
        
    
        # サンプルコード3 : バックワード
        dZ6, self.loss = self.activation6.backward(Z6, Y) # 交差エントロピー誤差とソフトマックスを合わせている
        #print("dZ6"+str(dZ6.shape))
            
        dA5 = self.FC3.backward(dZ6)
        #print("dA5"+str(dA5.shape))
        
        dZ5 = self.activation5.backward(dA5)
        #print("dZ5"+str(dZ5.shape))
        
        dA4 = self.FC2.backward(dZ5)
        #print("dA4"+str(A4.shape))
        
        dZ4 = self.activation4.backward(dA4)
        #print("dZ4"+str(dZ4.shape))
        
        dA3 = self.FC1.backward(dZ4)
        #print("dA3"+str(dA3.shape))
        
        dF1 = self.Flat.flat_backward(dA3)
        #print("dF1"+str(dF1.shape))
        
        dZ3 = self.activation3.backward(dF1)
        #print("dZ3"+str(dZ3.shape))
        
        dP2 = self.Pool2.pool_backward(dZ3)
        #print("dP2"+str(dP2.shape))
        
        dA2 = self.activation2.backward(dP2)
        #print("dA2"+str(dA2.shape))
        
        dZ1 = self.Conv2.conv_backward(dA2)
        #print("dZ1"+str(dZ1.shape))
        
        dP1 = self.Pool1.pool_backward(dZ1)
        #print("dP1" + str(dP1.shape))
        
        dA1 = self.activation1.backward(dP1)
        #print("dA1"+str(dA1.shape))
        
        dZ0 = self.Conv1.conv_backward(dA1) # dZ0は使用しない
        #print("dZ0" + str(dZ0.shape))
    
    
    # このエポックで更新した重みを使って検証する関数
    def forward_with_loss(self, X, y):
        
        # one-hot変換
        Y = np.identity(10)[y]
 
        
        # サンプルコード2 : フォワード
        A1 = self.Conv1.conv_forward(X)
        #print("A1"+ str(A1.shape))
        
        Z1 = self.activation1.forward(A1)
        #print("Z1" + str(Z1.shape))
        
        P1 = self.Pool1.pool_forward(Z1)
        #print("P1" + str(P1.shape))
        
        A2 = self.Conv2.conv_forward(P1)
        #print("A2" + str(A2.shape))
        
        Z2 = self.activation2.forward(A2)
        #print("Z2"+str(Z2.shape))
        
        P2 = self.Pool2.pool_forward(Z2)
        #print("P2"+str(P2.shape))
        
        Z3 = self.activation3.forward(P2) #Tanh
        #print("Z3"+str(Z3.shape))
        
        F1 = self.Flat.flat_forward(Z3)
        #print("F1"+str(F1.shape))
        
        A3 = self.FC1.forward(F1)
        #print("A3"+str(A3.shape))
        
        Z4 = self.activation4.forward(A3) # ReLU
        #print("Z4"+str(Z4.shape))
        
        A4 = self.FC2.forward(Z4)
        #print("A4"+str(A4.shape))
        
        Z5 = self.activation5.forward(A4) # ReLU
        #print("Z5"+str(Z5.shape))
        
        A5 = self.FC3.forward(Z5)
        #print("A5"+str(A5.shape))
        
        Z6 = self.activation6.forward(A5) # Softmax  
        #print("Z6"+str(Z6.shape))
    
        # サンプルコード3 : バックワード
        dZ6, self.val_loss = self.activation6.backward(Z6, Y) 
        
        
        
        
    
    def fit(self, X, y, X_val=None, y_val=None):
        
        """
        self.sigma : ガウス分布の標準偏差
        self.lr : 学習率
        self.n_nodes1 : 1層目のノード数
        self.n_nodes2 : 2層目のノード数
        self.n_output : 出力層のノード数
        """
        #initializer, optimizer, lr = 0.001, FH=3, FW=3, stride=1, pad=1
        self.Conv1 = Conv2d(self.conv_initializer(self.sigma), N=20, O = 6, C=1, FH=5, FW=5, stride=1, pad=2)
        self.activation1 = self.activator1
    
        self.Pool1 = MaxPool2d(stride=2, PFH=2, PFW=2)
        
        self.Conv2 = Conv2d(self.conv_initializer(self.sigma), N=20, O=16, C=6, FH=5, FW=5, stride=1, pad=0)
        self.activation2 = self.activator2
        
        self.Pool2 = MaxPool2d(stride=2, PFH=2, PFW=2)
        self.activation3 = self.activator3
        
        self.Flat = Flatten()
        
        self.FC1 = FC(400, 120, self.fc_initializer(self.sigma), self.optimizer(self.lr))
        self.activation4 = self.activator4
        
        self.FC2 = FC(120, 84, self.fc_initializer(self.sigma), self.optimizer(self.lr))
        self.activation5 = self.activator5
        
        self.FC3 = FC(84, self.n_output, self.fc_initializer(self.sigma), self.optimizer(self.lr))
        self.activation6 = Softmax()
        
        
        # 訓練データのミニバッチを生成
        get_mini_batch_train = GetMiniBatch(X, y, batch_size=self.batch_size)
        
        # 検証データのミニバッチを生成
        get_mini_batch_val = GetMiniBatch(X_val, y_val, batch_size=self.batch_size)
        
        
        for epoch in range(self.n_epochs):
            
            for mini_X_train, mini_y_train in get_mini_batch_train:
                
                # 順伝播・クロスエントロピー誤差・逆伝播
                self.forward_and_backward(mini_X_train, mini_y_train)
                
            self.loss_hist.append(self.loss)
            
            # 検証
            mini_X_val, mini_y_val = get_mini_batch_val[0]
            self.forward_with_loss(mini_X_val, mini_y_val)
            self.val_loss_hist.append(self.val_loss)
        
            
            if self.verbose:

                print('#'*25)
                print('### Epoch %i'%(epoch+1))
                print('#'*25)
                
                print("訓練データの損失 : {}".format(self.loss))
                print("検証データの損失 : {}".format(self.val_loss))
                
                print()
                
                
    
    def predict(self, X):
        
        A1 = self.Conv1.conv_forward(X)
        #print("A1"+ str(A1.shape))
        
        Z1 = self.activation1.forward(A1)
        #print("Z1" + str(Z1.shape))
        
        P1 = self.Pool1.pool_forward(Z1)
        #print("P1" + str(P1.shape))
        
        A2 = self.Conv2.conv_forward(P1)
        #print("A2" + str(A2.shape))
        
        Z2 = self.activation2.forward(A2)
        #print("Z2"+str(Z2.shape))
        
        P2 = self.Pool2.pool_forward(Z2)
        #print("P2"+str(P2.shape))
        
        Z3 = self.activation3.forward(P2) #Tanh
        #print("Z3"+str(Z3.shape))
        
        F1 = self.Flat.flat_forward(Z3)
        #print("F1"+str(F1.shape))
        
        A3 = self.FC.forward(F1)
        #print("A3"+str(A3.shape))
        
        Z4 = self.activation4.forward(A3) # ReLU
        #print("Z4"+str(Z4.shape))
        
        A4 = self.FC.forward(Z4)
        #print("A4"+str(A4.shape))
        
        Z5 = self.activation5.forward(A4) # Softmax  
        #print("Z5"+str(Z5.shape))
        
        pred = Z5.argmax(axis=1)
        return pred

In [None]:
lenet = LeNet(fc_initializer=SimpleInitializer,
                                                  conv_initializer = ConvInitializer,
                                                  optimizer = SGD,
                                                  lr=0.001, 
                                                  batch_size = 20,
                                                  sigma=0.01, 
                                                  activator1=ReLU(), 
                                                  activator2=ReLU(), 
                                                  activator3=Tanh(),
                                                  activator4=ReLU(),
                                                  activator5=ReLU(),
                                                  n_epochs=5,
                                                  verbose = True
                                                  )

In [None]:
%%time

lenet.fit(X_train, y_train, X_val, y_val)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations


#########################
### Epoch 1
#########################
訓練データの損失 : 2.3006242937171324
検証データの損失 : 2.298204711704135

#########################
### Epoch 2
#########################
訓練データの損失 : 2.2998948739803278
検証データの損失 : 2.295290796778331

#########################
### Epoch 3
#########################
訓練データの損失 : 2.299357236339567
検証データの損失 : 2.293067741009478

#########################
### Epoch 4
#########################
訓練データの損失 : 2.298949031802921
検証データの損失 : 2.291367706604727

#########################
### Epoch 5
#########################
訓練データの損失 : 2.298630649609449
検証データの損失 : 2.290065434398339

CPU times: user 19h 53min 37s, sys: 19min 9s, total: 20h 12min 47s
Wall time: 20h 14min 27s
