# 深層学習スクラッチ　畳み込みNN２

スクラッチで2次元用畳み込みニューラルネットワークを実装した後、学習と検証を行なっていく。

In [1]:
#基本ライブラリ
import time
import pickle
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
%matplotlib inline

In [2]:
#scikit-learnライブラリ
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
from sklearn.model_selection import train_test_split

#データセット
#from sklearn.datasets import load_iris

In [3]:
#MNISTデータセット
from keras.datasets import mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()

#平滑化
#X_train_f = X_train.reshape(-1, 784)
#X_test_f = X_test.reshape(-1, 784)

#前処理
X_train_ff = X_train.astype(np.float)
X_test_ff = X_test.astype(np.float)
X_train_ff /= 255
X_test_ff /= 255

from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(handle_unknown='ignore', sparse=False)
y_train_one_hot = enc.fit_transform(y_train[:, np.newaxis])
y_test_one_hot = enc.transform(y_test[:, np.newaxis])

Xt_train, Xt_val, yt_train, yt_val = train_test_split(X_train_ff, y_train_one_hot, test_size=0.2)
print(Xt_train.shape) # (48000, 784)
print(Xt_val.shape) # (12000, 784)
print(yt_train.shape)
print(yt_val.shape)

Using TensorFlow backend.


(48000, 28, 28)
(12000, 28, 28)
(48000, 10)
(12000, 10)


### データセットにチャネル次元を増やす

**reshapeとnewaxisの違い**

- newaxisを使って次元を追加すると、元の配列の各次元における要素数がわからなくても実行可能
- reshapeは１つだけわからないときは-1を使って自動調整できるが、-1を2回以上使うとエラーになる
- reshapeは次元追加に合わせて形状変更が可能

In [4]:
#チャネルの次元を増やす
Xt_train = Xt_train[:, np.newaxis, :, :]
Xt_val = Xt_val[:, np.newaxis, :, :]
yt_train = yt_train[:, np.newaxis, :]
yt_val  = yt_val[:, np.newaxis, :]

print(Xt_train.shape)
print(Xt_val.shape)
print(yt_train.shape)
print(yt_val.shape)

(48000, 1, 28, 28)
(12000, 1, 28, 28)
(48000, 1, 10)
(12000, 1, 10)


## 実装に必要な関数

### ２次元畳み込み後の出力サイズ

In [5]:
def Nw_out(Nw_in, P, F, S):
    return (Nw_in + 2*P - F) / S + 1

def Nh_out(Nh_in, P, F, S):
    return (Nh_in + 2*P - F) / S + 1

In [6]:
x = (4,4)
P = 0
S = 1
F = (3,3)

h = Nh_out(x[0], P, F[0], S)
w = Nw_out(x[1], P, F[1], S)
print(h, w)

2.0 2.0


### SGD、Initiarizer

In [7]:
#SGD for Conv
class SGD:
    """
    確率的勾配降下法
    Parameters
    ----------
    lr : 学習率
    """
    def __init__(self, lr=0.01):
        self.lr = lr
        
    def update(self, layer):
        """
        ある層の重みやバイアスの更新
        Parameters
        ----------
        layer : 更新前の層のインスタンス
        """
        layer.W -= self.lr*layer.LW
        layer.B -= self.lr*layer.LB


In [8]:
class OneInit():
    def __init__(self, sigma=0.01):
        self.sigma = sigma
        
    def W(self, hw, input_channel=1, output_channel=1 ):
        """
        重みの初期化
        Parameters
        ----------
        hw: フィルタの縦横サイズ（タプル）
        input_channel： チャネルの入力サイズ
        output_channel：チャネルの出力サイズ
        """
        W = np.ones((output_channel, input_channel, hw[0], hw[1]))

        return W
    
    def B(self, output_channel):
        """
        バイアスの初期化
        -------------
        output_channel：アウトプットチャネル数
        """
        B = np.ones((output_channel)).astype(float)
        
        return B

In [9]:
#ザビエル for Conv
class XavierlInit():
    def __init__(self, sigma=0.01):
        self.sigma = sigma
        
    def W(self, w, input_channel=1, output_channel=1 ):
        """
        重みの初期化
        Parameters
        ----------
        n_nodes1 : int
          前の層のノード数
        n_nodes2 : int
          後の層のノード数

        Returns
        ----------
        W :
        """
        xavier = 1 / np.sqrt(w)
        W = xavier * np.random.randn((output_channel, input_channel, w))

        return W
    
    def B(self, output_channel, output_h, output_w):
        """
        バイアスの初期化
        Parameters
        ----------
        n_nodes2 : int
          後の層のノード数

        Returns
        ----------
        B :
        """
        B = self.sigma * np.random.randn(output_channel, output_h, output_w) 
        
        return B


## for文を使った実装

In [10]:
class Conv2dFor():
   
    def __init__(self, initializer, optimizer, fsize, in_channel=1, out_channel=1, pad=0, stride=1, mode='constant'):
        self.optimizer = optimizer
        self.initializer = initializer
        self.in_channel = in_channel
        self.out_channel = out_channel
        self.pad = pad
        self.fsize = fsize   #1次元
        self.stride = stride
        self.B = 0
        self.mode = mode
    
    def forward(self, X, init=False):

        if init:
            self.X = self.pad_init(X, self.pad, self.mode)
            self.W =  self.initializer.W(self.fsize, self.in_channel, self.out_channel)
            outputsize = (int(self.Nh_out(X.shape[1], self.pad, self.fsize[0], self.stride)),
                               int(self.Nw_out(X.shape[2], self.pad, self.fsize[1], self.stride)))
            self.B = self.initializer.B(self.out_channel)
            self.LW = np.zeros(self.W.shape)
            self.LZ = np.zeros(self.X.shape)
            self.A = np.zeros((self.out_channel, outputsize[0], outputsize[1]))
        
        for k in range(self.out_channel):    #フィルター数イテレーション
            for j in range(self.X.shape[0]):   #入力チャネルイテレーション
                for h in range(0, self.X.shape[1] - self.W.shape[2] + 1):       #フィルタ縦移動
                    for w in range(0, self.X.shape[2] - self.W.shape[3] + 1):  #フィルタ横移動
                        self.A[k, h, w] += np.sum(self.X[j, h: h+self.W.shape[2], w: w+self.W.shape[3]] * self.W[k, j])
            self.A[k] += self.B[k]
        
        return self.A
    
    def backward(self, LA):

        self.LB = np.sum(np.sum(LA, axis=1), axis=1)   #チャネルごと傾きをSum
        
        for k in range(self.out_channel): 
            for j in range(self.W.shape[1]):
                for h in range(LA.shape[1]):
                    for w in range(LA.shape[2]):
                        self.LW[k, j] += (self.X[j, h: h+self.W.shape[2], w: w+self.W.shape[3]] * LA[k, h, w])

        for k in range(self.out_channel):
            for j in range(self.X.shape[0]):
                for h in range(LA.shape[1]):
                    for w in range(LA.shape[2]):
                        self.LZ[j, h: h+self.W.shape[2], w: w+self.W.shape[3]] += LA[k, h, w]*self.W[k, j]
                    
        LZ = self.LZ
        
        # 更新
        self.optimizer.update(self)
        
        return LZ

    def Nw_out(self, Nw_in, P, F, S):
        return (Nw_in + 2*P - F) / S + 1

    def Nh_out(self, Nh_in, P, F, S):
        return (Nh_in + 2*P - F) / S + 1
    
    def pad_init(self, X, pad, mode='constant'):
        """
        パディングを追加する関数
        ----
        pad: スカラーもしくは配列
        mode: 補完する値
        """
        return np.pad(X, pad, mode)


In [11]:
test = Conv2dFor(OneInit(), SGD(), fsize=(3,3), in_channel=2, out_channel=3, pad=0, stride=1)

x = np.array([[[1, 2, 3, 4, 5], [2, 3, 4, 5, 6],[3, 4, 5, 6, 7],[4, 5, 6, 7, 8],[5, 6, 7, 8, 9]],
                     [[1, 1, 1, 1, 1], [2, 2, 2, 2, 2],[3, 3, 3, 3, 3],[4, 4, 4, 4, 4],[5, 5, 5, 5, 5]]])


A = test.forward(x, init=True)
print("A:", A)
print("W", test.W)

A: [[[ 46.  55.  64.]
  [ 64.  73.  82.]
  [ 82.  91. 100.]]

 [[ 46.  55.  64.]
  [ 64.  73.  82.]
  [ 82.  91. 100.]]

 [[ 46.  55.  64.]
  [ 64.  73.  82.]
  [ 82.  91. 100.]]]
W [[[[1. 1. 1.]
   [1. 1. 1.]
   [1. 1. 1.]]

  [[1. 1. 1.]
   [1. 1. 1.]
   [1. 1. 1.]]]


 [[[1. 1. 1.]
   [1. 1. 1.]
   [1. 1. 1.]]

  [[1. 1. 1.]
   [1. 1. 1.]
   [1. 1. 1.]]]


 [[[1. 1. 1.]
   [1. 1. 1.]
   [1. 1. 1.]]

  [[1. 1. 1.]
   [1. 1. 1.]
   [1. 1. 1.]]]]


In [12]:
y = np.array([60])
LA = y - A
print(LA)

[[[ 14.   5.  -4.]
  [ -4. -13. -22.]
  [-22. -31. -40.]]

 [[ 14.   5.  -4.]
  [ -4. -13. -22.]
  [-22. -31. -40.]]

 [[ 14.   5.  -4.]
  [ -4. -13. -22.]
  [-22. -31. -40.]]]


In [13]:
LZ = test.backward(LA)
print("-------")
print("LZ:", LZ)
print("dW:", test.LW)
print("dB:",  test.LB)
print("NewB:", test.B)
print("NewW:", test.W)

-------
LZ: [[[  42.   57.   45.    3.  -12.]
  [  30.    6.  -72. -102.  -78.]
  [ -36. -153. -351. -315. -198.]
  [ -78. -210. -396. -318. -186.]
  [ -66. -159. -279. -213. -120.]]

 [[  42.   57.   45.    3.  -12.]
  [  30.    6.  -72. -102.  -78.]
  [ -36. -153. -351. -315. -198.]
  [ -78. -210. -396. -318. -186.]
  [ -66. -159. -279. -213. -120.]]]
dW: [[[[-513. -630. -747.]
   [-630. -747. -864.]
   [-747. -864. -981.]]

  [[-342. -342. -342.]
   [-459. -459. -459.]
   [-576. -576. -576.]]]


 [[[-513. -630. -747.]
   [-630. -747. -864.]
   [-747. -864. -981.]]

  [[-342. -342. -342.]
   [-459. -459. -459.]
   [-576. -576. -576.]]]


 [[[-513. -630. -747.]
   [-630. -747. -864.]
   [-747. -864. -981.]]

  [[-342. -342. -342.]
   [-459. -459. -459.]
   [-576. -576. -576.]]]]
dB: [-117. -117. -117.]
NewB: [2.17 2.17 2.17]
NewW: [[[[ 6.13  7.3   8.47]
   [ 7.3   8.47  9.64]
   [ 8.47  9.64 10.81]]

  [[ 4.42  4.42  4.42]
   [ 5.59  5.59  5.59]
   [ 6.76  6.76  6.76]]]


 [[[ 6.13  7

## im2colを使った実装

### 処理の流れ

In [14]:
x = np.array([[[1, 2, 3, 4, 5], [2, 3, 4, 5, 6],[3, 4, 5, 6, 7],[4, 5, 6, 7, 8],[5, 6, 7, 8, 9]],
                     [[1, 1, 1, 1, 1], [2, 2, 2, 2, 2],[3, 3, 3, 3, 3],[4, 4, 4, 4, 4],[5, 5, 5, 5, 5]]])

FH = FW = 3
stride = 1
C, H, W = x.shape
N = 1

out_H = ((H - FH) // stride) + 1
out_W = ((W - FW) // stride) + 1
print("out_H=", out_H)
print("out_W=", out_W)

col = np.zeros([out_H * out_W * N, C * FH * FW])  

#colの縦は、AをFlattenしたサイズをサンプル数分用意したもの
#colの横は、フィルタをinputチャネルごとFlattenしたもの
col

out_H= 3
out_W= 3


array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.]])

In [15]:
for i in range(out_H):
    for j in range(out_W):
        patch = x[ : , i*stride : i*stride+FH, j*stride : j*stride+FW]    #横→縦の順にストライドしていく
        print(patch)
        print(patch.shape)
        print(np.reshape(patch, -1))
        col[i*out_W + j, : ] = np.reshape(patch, -1)   #0→１→２
col

[[[1 2 3]
  [2 3 4]
  [3 4 5]]

 [[1 1 1]
  [2 2 2]
  [3 3 3]]]
(2, 3, 3)
[1 2 3 2 3 4 3 4 5 1 1 1 2 2 2 3 3 3]
[[[2 3 4]
  [3 4 5]
  [4 5 6]]

 [[1 1 1]
  [2 2 2]
  [3 3 3]]]
(2, 3, 3)
[2 3 4 3 4 5 4 5 6 1 1 1 2 2 2 3 3 3]
[[[3 4 5]
  [4 5 6]
  [5 6 7]]

 [[1 1 1]
  [2 2 2]
  [3 3 3]]]
(2, 3, 3)
[3 4 5 4 5 6 5 6 7 1 1 1 2 2 2 3 3 3]
[[[2 3 4]
  [3 4 5]
  [4 5 6]]

 [[2 2 2]
  [3 3 3]
  [4 4 4]]]
(2, 3, 3)
[2 3 4 3 4 5 4 5 6 2 2 2 3 3 3 4 4 4]
[[[3 4 5]
  [4 5 6]
  [5 6 7]]

 [[2 2 2]
  [3 3 3]
  [4 4 4]]]
(2, 3, 3)
[3 4 5 4 5 6 5 6 7 2 2 2 3 3 3 4 4 4]
[[[4 5 6]
  [5 6 7]
  [6 7 8]]

 [[2 2 2]
  [3 3 3]
  [4 4 4]]]
(2, 3, 3)
[4 5 6 5 6 7 6 7 8 2 2 2 3 3 3 4 4 4]
[[[3 4 5]
  [4 5 6]
  [5 6 7]]

 [[3 3 3]
  [4 4 4]
  [5 5 5]]]
(2, 3, 3)
[3 4 5 4 5 6 5 6 7 3 3 3 4 4 4 5 5 5]
[[[4 5 6]
  [5 6 7]
  [6 7 8]]

 [[3 3 3]
  [4 4 4]
  [5 5 5]]]
(2, 3, 3)
[4 5 6 5 6 7 6 7 8 3 3 3 4 4 4 5 5 5]
[[[5 6 7]
  [6 7 8]
  [7 8 9]]

 [[3 3 3]
  [4 4 4]
  [5 5 5]]]
(2, 3, 3)
[5 6 7 6 7 8 7 8 9 3 3 3 4 4 4

array([[1., 2., 3., 2., 3., 4., 3., 4., 5., 1., 1., 1., 2., 2., 2., 3.,
        3., 3.],
       [2., 3., 4., 3., 4., 5., 4., 5., 6., 1., 1., 1., 2., 2., 2., 3.,
        3., 3.],
       [3., 4., 5., 4., 5., 6., 5., 6., 7., 1., 1., 1., 2., 2., 2., 3.,
        3., 3.],
       [2., 3., 4., 3., 4., 5., 4., 5., 6., 2., 2., 2., 3., 3., 3., 4.,
        4., 4.],
       [3., 4., 5., 4., 5., 6., 5., 6., 7., 2., 2., 2., 3., 3., 3., 4.,
        4., 4.],
       [4., 5., 6., 5., 6., 7., 6., 7., 8., 2., 2., 2., 3., 3., 3., 4.,
        4., 4.],
       [3., 4., 5., 4., 5., 6., 5., 6., 7., 3., 3., 3., 4., 4., 4., 5.,
        5., 5.],
       [4., 5., 6., 5., 6., 7., 6., 7., 8., 3., 3., 3., 4., 4., 4., 5.,
        5., 5.],
       [5., 6., 7., 6., 7., 8., 7., 8., 9., 3., 3., 3., 4., 4., 4., 5.,
        5., 5.]])

In [16]:
weight = np.array( [[[1, 1, 1],[1, 1, 1],[1, 1, 1]],[[1, 1, 1],[1, 1, 1],[1, 1, 1]]])
print(weight.shape)
weight_f = np.reshape(weight, -1)
print(weight_f.shape)
a = np.dot(col,weight_f)+1
print(a)

(2, 3, 3)
(18,)
[ 46.  55.  64.  64.  73.  82.  82.  91. 100.]


### im2col


### conv2d

In [21]:
class Conv2d():
   
    def __init__(self, initializer, optimizer, fsize, in_channel=1, out_channel=1, pad=0, mode='constant', stride=1):
        self.optimizer = optimizer
        self.initializer = initializer
        self.in_channel = in_channel
        self.out_channel = out_channel
        self.pad = pad
        self.fsize = fsize   #1次元
        self.stride = stride
        self.B = 0
        self.mode = mode
    
    def forward(self, X, init=False):
        """
        X: (n_sumple, in_channel, height, width)の４次元配列
        """

        if init:
            self.X = self.pad_init(X, self.pad, self.mode)
            self.col = self.im2col(self.X, self.fsize[0], self.fsize[1], self.stride)
            self.W =  self.initializer.W(self.fsize, self.in_channel, self.out_channel).flatten().reshape(self.out_channel, -1)
            self.outputsize = (int(self.Nh_out(X.shape[2], self.pad, self.fsize[0], self.stride)),
                               int(self.Nw_out(X.shape[3], self.pad, self.fsize[1], self.stride)))
            self.B = self.initializer.B(self.out_channel)
            self.LW = np.zeros(self.W.shape)
            self.LZ = np.zeros(self.col.shape)
            self.A = np.zeros((self.out_channel, self.X.shape[0] * self.outputsize[0] * self.outputsize[1]))
        
        for c in range(self.out_channel):    #フィルター数イテレーション
            self.A[c] = np.dot(self.col, self.W[c]) + self.B[c]
        
        return self.A.reshape(self.X.shape[0], self.out_channel, self.outputsize[0], self.outputsize[1])
    
    def backward(self, LA):

        self.LB = np.sum(LA, axis=1)   #チャネルごと傾きをSum
        
        for c in range(self.out_channel): 
            self.LW[c] = np.sum(self.col * LA[c].reshape(-1,1), axis=0)

        for c in range(self.out_channel):
            self.LZ += np.tile(self.W[c],(9, 1)) * LA[c].reshape(-1,1)
                    
        LZ = self.LZ
        
        # 更新
        self.optimizer.update(self)
        
        return LZ

    def Nw_out(self, Nw_in, P, F, S):
        return (Nw_in + 2*P - F) / S + 1

    def Nh_out(self, Nh_in, P, F, S):
        return (Nh_in + 2*P - F) / S + 1
    
    def pad_init(self, X, pad, mode='constant'):
        """
        パディングを追加する関数
        ----
        pad: スカラーもしくは配列
        mode: 補完する値
        """
        return np.pad(X, pad, mode)
    
    def im2col(self, X, FH, FW, stride):
        """
        コンボリュージョンマップを作成する
        """
        N, C, H, W = X.shape
        out_H = ((H - FH) // stride) + 1
        out_W = ((W - FW) // stride) + 1
        col = np.zeros([out_H * out_W * N, C * FH * FW]) 
        
        for n in range(N):
            for i in range(out_H):
                for j in range(out_W):
                    patch = X[n, : , i*stride : i*stride+FH, j*stride : j*stride+FW]
                    col[i*out_W + j, : ] = np.reshape(patch, -1)
                    
        return col


In [22]:
test0 = Conv2d(OneInit(), SGD(), fsize=(3,3), in_channel=2, out_channel=3, pad=0, stride=1)

x = np.array([[[[1, 2, 3, 4, 5], [2, 3, 4, 5, 6],[3, 4, 5, 6, 7],[4, 5, 6, 7, 8],[5, 6, 7, 8, 9]],
                     [[1, 1, 1, 1, 1], [2, 2, 2, 2, 2],[3, 3, 3, 3, 3],[4, 4, 4, 4, 4],[5, 5, 5, 5, 5]]]])


A0 = test0.forward(x, init=True)
print("A:", A0)
print("W", test0.W)

A: [[[[ 46.  55.  64.]
   [ 64.  73.  82.]
   [ 82.  91. 100.]]

  [[ 46.  55.  64.]
   [ 64.  73.  82.]
   [ 82.  91. 100.]]

  [[ 46.  55.  64.]
   [ 64.  73.  82.]
   [ 82.  91. 100.]]]]
W [[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]


In [23]:
y = np.array([60])
LA0 = y - A0
print(LA0)

[[[[ 14.   5.  -4.]
   [ -4. -13. -22.]
   [-22. -31. -40.]]

  [[ 14.   5.  -4.]
   [ -4. -13. -22.]
   [-22. -31. -40.]]

  [[ 14.   5.  -4.]
   [ -4. -13. -22.]
   [-22. -31. -40.]]]]


In [24]:
LZ0 = test0.backward(LA0)
print("-------")
print("LZ:", LZ0)
print("dW:", test0.LW)
print("dB:",  test0.LB)
print("NewB:", test0.B)
print("NewW:", test0.W)

ValueError: operands could not be broadcast together with shapes (9,18) (27,1) 

## MaxPool

### for文

In [25]:
class MaxPool2D_for:
    def __init__(self, fsize, stride=1):
        self.fsize = fsize
        self.stride = stride
        pass
    
    def fit(self, X):
        """
        X: (チャネル数, hight, width)
        """
        self.A = np.zeros((X.shape[0],
            X.shape[1] - self.fsize[0] + 1,
            X.shape[2] - self.fsize[1] + 1))
        self.Aindex = np.copy(self.A)

        for i in range(X.shape[0]):
            for h in range(self.A.shape[0]):
                for w in range(self.A.shape[1]):
                    self.A[h, w] = np.max(X[i, h:h+self.fsize[0], w:w+self.fsize[1]])
                    self.Aindex[h, w] = np.argmax(X[i, h:h+self.fsize[0], w:w+self.fsize[1]])
        
        return self.A

### im2col

In [126]:
class MaxPool2D:
    def __init__(self, fsize, stride=1):
        self.fsize = fsize
        self.stride = stride
        self.mask = None
        pass
    
    def fit(self, X):
        """
        X: (サンプル数, チャネル数, hight, width)
        """
        self.col = self.im2col(X, 2, 2, 1)
        self.A = np.max(self.col, axis=2)
        self.Aindex = np.argmax(self.col, axis=2)
        
        self.A = self.A.reshape(X.shape[1], -1, self.out_H, self.out_W).transpose(1,0,2,3)
         
        return self.A
    
    def backward(self, X):
        """
        X: (サンプル数, チャネル数, hight, width)
        """
        X = X.transpose(1,0,2,3) #Aindexと同じ形に
        X = X.reshape(X.shape[0], -1)
        
        index = np.ndarray.flatten(self.Aindex)
        X = np.ndarray.flatten(X)
        LZ = np.zeros(self.col.shape)
        LZ = LZ.reshape(-1, LZ.shape[2])
        LZ[np.arange(index.size), index] = X
        
        return LZ


    def im2col(self, X, FH, FW, stride):
        """
        コンボリュージョンマップを作成する
        """
        N, C, H, W = X.shape
        self.out_H = ((H - FH) // stride) + 1
        self.out_W = ((W - FW) // stride) + 1
        col = np.zeros([C, self.out_H * self.out_W * N, FH * FW]) 
        
        for c in range(C):
            for n in range(N):
                for i in range(self.out_H):
                    for j in range(self.out_W):
                        patch = X[n, c , i*stride : i*stride+FH, j*stride : j*stride+FW]
                        col[c, i*self.out_W + j, : ] = np.reshape(patch, -1)
                    
        return col

In [127]:
maxpool = MaxPool2D(fsize=(2,2))
test2 = maxpool.fit(A0)
print(test2)
print(maxpool.Aindex)

[[[[ 73.  82.]
   [ 91. 100.]]

  [[ 73.  82.]
   [ 91. 100.]]

  [[ 73.  82.]
   [ 91. 100.]]]]
[[3 3 3 3]
 [3 3 3 3]
 [3 3 3 3]]


## 平滑化

flattenと同様の動作をする関数にnp.ravel関数があります。この関数は、flatten関数とは違い、コピーを作成しません。大きなデータで破壊的な変更をしても問題ない場合は、こちらの関数を使用することでパフォーマンスの向上が見込めます。

In [128]:
class Flatten():
    def __init__(self):
        pass
    
    def fit(self, X):
        """
        X: ３次元データを想定 (チャネル数, height, width)
        """
        self.form = X.shape
        flattend = X.reshape(X.shape[0], -1)
        
        return flattend
    
    def backward(self, X):
        reform = X.reshape(self.form)
        
        return reform
        
        

In [129]:
flat = Flatten()
test2_flat = flat.fit(test2)
print(test2_flat)
print(flat.form)
print(test2_flat.reshape(flat.form))

[[ 73.  82.  91. 100.  73.  82.  91. 100.  73.  82.  91. 100.]]
(1, 3, 2, 2)
[[[[ 73.  82.]
   [ 91. 100.]]

  [[ 73.  82.]
   [ 91. 100.]]

  [[ 73.  82.]
   [ 91. 100.]]]]


## 複数サンプルテスト


In [130]:
x = Xt_train[0:100]

tests = Conv2d(OneInit(), SGD(), fsize=(3,3), in_channel=1, out_channel=3, pad=0, stride=1)
As = tests.forward(x, init=True)
print(As.shape)

(100, 3, 26, 26)


In [131]:
maxpools = MaxPool2D(fsize=(2,2))
tests_m = maxpools.fit(As)
print(tests_m.shape)
print(maxpools.Aindex.shape)

(100, 3, 25, 25)
(3, 62500)


In [132]:
flat = Flatten()
tests_flat = flat.fit(tests_m)
print(tests_flat.shape)

(100, 1875)


In [133]:
#バックワード
back1 = flat.backward(tests_flat)
print(back1.shape)

(100, 3, 25, 25)


In [135]:
LZ = maxpools.backward(back1)


array([[1., 0., 0., 0.],
       [1., 0., 0., 0.],
       [1., 0., 0., 0.],
       ...,
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [53]:
maxpools.Aindex.size

187500

In [36]:
for i in range(3):
    for j in range(62500):
        maxpools.LZ[i, j, maxpools.Aindex[i, j]] = atai[i, j]

In [37]:
maxpools.LZ[0,0:10]

array([[1., 0., 0., 0.],
       [1., 0., 0., 0.],
       [1., 0., 0., 0.],
       [1., 0., 0., 0.],
       [1., 0., 0., 0.],
       [1., 0., 0., 0.],
       [1., 0., 0., 0.],
       [1., 0., 0., 0.],
       [1., 0., 0., 0.],
       [1., 0., 0., 0.]])