# 畳み込みニューラルネットワーク

### Convolution/Poolingレイヤの実装

In [2]:
import numpy as np

In [3]:
x = np.random.rand(10,1,28,28)
x.shape


(10, 1, 28, 28)

In [13]:
y = np.random.rand(2,2,2,3)
y[0]

array([[[0.7060744 , 0.91926254, 0.6726526 ],
        [0.06161747, 0.64811467, 0.74639766]],

       [[0.22792606, 0.75864532, 0.21596095],
        [0.28511664, 0.45102482, 0.75461466]]])

In [9]:
x[0]

array([[[4.20941670e-01, 1.11516217e-01, 2.51869407e-01, 6.11205881e-01,
         1.25727377e-01, 1.42771752e-01, 3.60279290e-01, 9.15966021e-01,
         9.94927269e-01, 1.91022795e-01, 6.68080067e-01, 4.28422139e-01,
         3.70627183e-01, 4.09742072e-01, 7.16127122e-01, 1.55298496e-01,
         3.67282189e-02, 8.90569090e-01, 5.25854473e-01, 8.49884103e-01,
         9.70199584e-01, 6.79570226e-01, 8.78270630e-01, 4.95851225e-01,
         8.95929668e-01, 3.18780976e-01, 8.81174977e-01, 2.19279143e-01],
        [9.19617965e-02, 4.27190487e-01, 5.16067578e-01, 7.47736497e-01,
         1.69674927e-02, 6.52084125e-01, 9.45248208e-01, 6.50876896e-01,
         8.70127078e-01, 6.12521605e-01, 3.48992634e-01, 3.08095590e-01,
         9.27988608e-01, 2.09074395e-01, 1.02554848e-01, 5.68100502e-01,
         3.25551174e-01, 8.70108697e-02, 3.07844623e-01, 8.23383124e-01,
         2.88234285e-01, 5.73702443e-02, 9.39779052e-01, 7.36109220e-02,
         3.10389373e-01, 8.65461313e-01, 4.4719652

In [14]:
x[0,0]

array([[4.20941670e-01, 1.11516217e-01, 2.51869407e-01, 6.11205881e-01,
        1.25727377e-01, 1.42771752e-01, 3.60279290e-01, 9.15966021e-01,
        9.94927269e-01, 1.91022795e-01, 6.68080067e-01, 4.28422139e-01,
        3.70627183e-01, 4.09742072e-01, 7.16127122e-01, 1.55298496e-01,
        3.67282189e-02, 8.90569090e-01, 5.25854473e-01, 8.49884103e-01,
        9.70199584e-01, 6.79570226e-01, 8.78270630e-01, 4.95851225e-01,
        8.95929668e-01, 3.18780976e-01, 8.81174977e-01, 2.19279143e-01],
       [9.19617965e-02, 4.27190487e-01, 5.16067578e-01, 7.47736497e-01,
        1.69674927e-02, 6.52084125e-01, 9.45248208e-01, 6.50876896e-01,
        8.70127078e-01, 6.12521605e-01, 3.48992634e-01, 3.08095590e-01,
        9.27988608e-01, 2.09074395e-01, 1.02554848e-01, 5.68100502e-01,
        3.25551174e-01, 8.70108697e-02, 3.07844623e-01, 8.23383124e-01,
        2.88234285e-01, 5.73702443e-02, 9.39779052e-01, 7.36109220e-02,
        3.10389373e-01, 8.65461313e-01, 4.47196521e-01, 1.30961

In [15]:
#im2col
import sys,os
sys.path.append(os.pardir)
from common.util import im2col

x1 = np.random.rand(1,3,7,7)
coll = im2col(x1,5,5,stride=1,pad=0)
print(coll.shape)

x2 = np.random.rand(10,3,7,7)
col2 = im2col(x2,5,5,stride=1,pad=0)
print(col2.shape)

(9, 75)
(90, 75)


In [16]:
#Convolutionクラスの実装
class Convolution:
    def __init__(self,W,b,stride=1,pad=0):
        self.W = W
        self.b = b
        self.stride = stride
        self.pad = pad

    def forward(self,x):
        FN,C,FH,FW = self.W.shape
        N,C,H,W = x.shape
        out_h = int(1 + (H + 2*self.pad - FH) / self.stride)
        out_W = int(1 + (W + 2*self.pad - FW) / self.stride)

        col = im2col(x,FH,FW,self.stride,self.pad)
        col_W = self.W.reshape(FN,-1).T
        out = np.dot(col,col_W) + self.b

        out = out.reshape(N,out_h,out_W,-1).transpose(0,3,1,2)

        return out

In [20]:
#Poolingレイヤの実装
class Pooling:
    def __init__(self,pool_h,pool_w,stride=1,pad=0):
        self.pool_h = pool_h
        self.pool_w = pool_w
        self.stride = stride
        self.pad = pad

    def forward(self,x):
        N, C, H, W = x.shape
        out_h = int(1 + (H - self.pool_h) / self.stride)
        out_w = int(1 + (W - self.pool_w) / self.stride)

        col = im2col(x,self.pool_h,self.pool_w,self.stride,self.pad)
        col = col.reshape(-1,self.pool_h*self.pool_w)

        out = np.max(col,axis=1)

        out = out.reshape(N,out_h,out_w,C).transpose(0,3,1,2)

        return out
        

In [21]:
from collections import OrderedDict

In [22]:
#Reluレイヤの実装
class Relu:
    def __init__(self):
        self.mask = None

    def forward(self,x):
        self.mask = (x <= 0)
        out = x.copy()
        out[self.mask] = 0

        return out

    def backward(self,dout):
        dout[self.mask] = 0
        dx = dout

        return dx

In [23]:
#Affineレイヤの実装
class Affine:
    def __init__(self,W,b):
        self.W = W
        self.b = b
        self.x = None
        self.dW = None
        self.db = None

    def forward(self,x):
        self.x = x
        out = np.dot(x,self.W) + self.b

        return out

    def backward(self,dout):
        dx = np.dot(dout,self.W.T)
        self.dW = np.dot(self.x.T,dout)
        self.db = np.sum(dout,axis=0)


        return dx         
        

In [25]:
def softmax(a):
    c = np.max(a)
    exp_a = np.exp(a - c)
    sum_exp_a = np.sum(exp_a)
    y = exp_a / sum_exp_a
    return y

#交差エントロピー損失
def cross_entropy_error(y,t):
    delta = 1e-7
    return -np.sum(t * np.log(y + delta))



In [26]:
#Softmax-with-Lossレイヤの実装
class SoftmaxWithLoss:
    def __init__(self):
        self.loss = None
        self.y = None
        self.t = None

    def forward(self,x,t):
        self.t = t
        self.y = softmax(x)
        self.losss = cross_entropy_error(self.y,self.t)

        return self.loss
    
    def backward(self,dout=1):
        batch_size = self.t.shape[0]
        dx = (self.y - self.t) / batch_size

        return dx

### CNNの実装

In [27]:
class SimpleConvNet:
    def __init__(self,input_dim=(1,28,28),
                 conv_param={"filter_num":30,"filter_size":5,"pad":0,"stride":1},
                 hidden_size=100,output_size=10,weight_init_std=0.01):
        filter_num = conv_param["filter_num"]
        filter_size = conv_param["filter_size"]
        filter_pad = conv_param["pad"]
        filter_stride = conv_param["stride"]
        input_size = input_dim[1]
        conv_output_size = (input_size - filter_size + 2*filter_pad) / filter_stride + 1
        pool_output_size = int(filter_num * (conv_output_size/2) * (conv_output_size/2))

        #重みパラメーターの初期化
        self.params = {}
        self.params["W1"] = weight_init_std * np.random.rand(filter_num,input_dim[0],
                                                             filter_size,filter_size)
        self.params["b1"] = np.zeros(filter_num)
        self.params["W2"] = weight_init_std * np.random.randn(pool_output_size,hidden_size)
        self.params["b2"] = np.zeros(hidden_size)
        self.params["W3"] = weight_init_std * np.random.randn(hidden_size,output_size)
        self.params["b3"] = np.zeros(output_size)

        #レイヤの生成
        self.layers = OrderedDict()
        self.layers["Conv1"] = Convolution(self.params["W1"],
                                           self.params["b1"],
                                           conv_param["stride"],
                                           conv_param["pad"])
        self.layers["Relu1"] = Relu()
        self.layers["Pool1"] = Pooling(pool_h=2,pool_w=2,stride=2)
        self.layers["Affine1"] = Affine(self.params["W2"],self.params["b2"])
        self.layers["Relu2"] = Relu()
        self.layers["Affine2"] = Affine(self.params["W3"],self.params["b3"])

        self.last_layer = SoftmaxWithLoss()
        
    def predict(self,x):
        for layer in self.layers.values():
            x = layer.forward(x)
        return x

    def loss(self,x,t):
        y = self.predict(x)
        return self.lastLayer.forward(y,t)

    def gradient(self,x,t):
        #forward
        self.loss(x,t)
        #backward
        dout = 1
        dout = self.lastLayer.backward(dout)

        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)


        grads = {}
        grads["W1"] = self.layers["Conv1"].dW
        grads["b1"] = self.layers["Conv1"].db
        grads["W2"] = self.layers["Affine1"].dW
        grads["b2"] = self.layers["Affine1"].db
        grads["W3"] = self.layers["Affine2"].dW
        grads["b3"] = self.layers["Affine2"].db

        return grads

In [28]:
# coding: utf-8
import sys, os
sys.path.append(os.pardir)  # 親ディレクトリのファイルをインポートするための設定
import numpy as np
import matplotlib.pyplot as plt
from dataset.mnist import load_mnist
from simple_convnet import SimpleConvNet
from common.trainer import Trainer

# データの読み込み
(x_train, t_train), (x_test, t_test) = load_mnist(flatten=False)

# 処理に時間のかかる場合はデータを削減 
#x_train, t_train = x_train[:5000], t_train[:5000]
#x_test, t_test = x_test[:1000], t_test[:1000]

max_epochs = 20

network = SimpleConvNet(input_dim=(1,28,28), 
                        conv_param = {'filter_num': 30, 'filter_size': 5, 'pad': 0, 'stride': 1},
                        hidden_size=100, output_size=10, weight_init_std=0.01)
                        
trainer = Trainer(network, x_train, t_train, x_test, t_test,
                  epochs=max_epochs, mini_batch_size=100,
                  optimizer='Adam', optimizer_param={'lr': 0.001},
                  evaluate_sample_num_per_epoch=1000)
trainer.train()

# パラメータの保存
network.save_params("params.pkl")
print("Saved Network Parameters!")

# グラフの描画
markers = {'train': 'o', 'test': 's'}
x = np.arange(max_epochs)
plt.plot(x, trainer.train_acc_list, marker='o', label='train', markevery=2)
plt.plot(x, trainer.test_acc_list, marker='s', label='test', markevery=2)
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.ylim(0, 1.0)
plt.legend(loc='lower right')
plt.show()


train loss:2.299541594303412
=== epoch:1, train acc:0.2, test acc:0.18 ===
train loss:2.297906325725518
train loss:2.294529462744023
train loss:2.289038575357394
train loss:2.279316988342388
train loss:2.2674971148215874
train loss:2.2619298868689266
train loss:2.2512911974321828
train loss:2.2179676551169267
train loss:2.192993388399168
train loss:2.1372087440416636
train loss:2.1394170686006975
train loss:2.0494828763303694
train loss:2.059387417643817
train loss:2.065578013096769
train loss:2.01613774792426
train loss:1.91279296740881
train loss:1.9009824994865965
train loss:1.876666114766724
train loss:1.7804612670909181
train loss:1.674089631532683
train loss:1.5328407769186765
train loss:1.5389739853061415
train loss:1.394734103117456
train loss:1.3869337520691287
train loss:1.2499890513443688
train loss:1.3350416335333386
train loss:1.0419565383797365
train loss:1.1531730002376197
train loss:1.0287139438063906
train loss:1.0766121447468615
train loss:0.9332653426282078
train los

KeyboardInterrupt: 