# Table of Contents
 <p><div class="lev1 toc-item"><a href="#逆伝搬(backward-propagation)" data-toc-modified-id="逆伝搬(backward-propagation)-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>逆伝搬(backward propagation)</a></div><div class="lev2 toc-item"><a href="#計算グラフ" data-toc-modified-id="計算グラフ-11"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>計算グラフ</a></div><div class="lev1 toc-item"><a href="#活性化関数レイヤ" data-toc-modified-id="活性化関数レイヤ-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>活性化関数レイヤ</a></div><div class="lev1 toc-item"><a href="#affine/softmaxレイヤ" data-toc-modified-id="affine/softmaxレイヤ-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>affine/softmaxレイヤ</a></div><div class="lev1 toc-item"><a href="#誤差逆伝搬法による学習" data-toc-modified-id="誤差逆伝搬法による学習-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>誤差逆伝搬法による学習</a></div>

# 逆伝搬(backward propagation)

数値微分は遅いので，それに置き換える計算法として，誤差逆伝搬法を用いる．これは，一度順伝搬計算をおこない，ノードごとの数値を記録しておいて，それらの総和として得られる値の微小変化を逆向きに伝搬させていってそれぞれのパラメータの偏微分を求めるということなのかな．

りんごオレンジを微分したらどうなるかを実験するといいかも．


## 計算グラフ

In [17]:
class MulLayer:
    def __init__(self):
        self.x = None
        self.y = None

    def forward(self, x, y):
        self.x = x
        self.y = y                
        out = x * y

        return out

    def backward(self, dout):
        dx = dout * self.y
        dy = dout * self.x

        return dx, dy



In [19]:
apple = 100
apple_num = 2
tax = 1.1

mul_apple_layer = MulLayer()
mul_tax_layer = MulLayer()

# forward
apple_price = mul_apple_layer.forward(apple, apple_num)
price = mul_tax_layer.forward(apple_price, tax)

print(price)

220.00000000000003


In [20]:
# backward
dprice = 1
dapple_price, dtax = mul_tax_layer.backward(dprice)

print(dapple_price, dtax)

1.1 200


In [25]:
dapple, dapple_num = mul_apple_layer.backward(dapple_price)

In [26]:
print(dapple, dapple_num, dtax)

2.2 110.00000000000001 200


In [28]:
class AddLayer:
    def __init__(self):
        pass
    
    def forward(self, x, y):
        out = x + y
        return out
    
    def backward(self, dout):
        dx = dout * 1
        dy = dout * 1
        return dx, dy
    

In [39]:
apple = 100
apple_num = 2
orange = 150
orange_num = 3
tax = 1.1

# layer
mul_apple_layer = MulLayer()
mul_orange_layer = MulLayer()
add_apple_orange_layer = AddLayer()
mul_tax_layer = MulLayer()

# forward
apple_price = mul_apple_layer.forward(apple, apple_num)
orange_price = mul_orange_layer.forward(orange, orange_num)
all_price = add_apple_orange_layer.forward(apple_price, orange_price)
price = mul_tax_layer.forward(all_price, tax)

In [41]:
print(apple_price)
print(orange_price)
print(all_price)
print(price)

200
450
650
715.0000000000001


In [48]:
# backward
dprice = 1
dall_price, dtax = mul_tax_layer.backward(dprice)
dapple_price, dorange_price = add_apple_orange_layer.backward(dall_price)

In [51]:
print(dall_price)
print(dtax)
print(dapple_price)
print(dorange_price)

1.1
650
1.1
1.1


In [52]:
dorange, dorange_num = mul_orange_layer.backward(dorange_price)
dapple, dapple_num = mul_apple_layer.backward(dapple_price)

print(dorange)
print(dorange_num)
print(dapple)
print(dapple_num)


3.3000000000000003
165.0
2.2
110.00000000000001


# 活性化関数レイヤ

# affine/softmaxレイヤ

# 誤差逆伝搬法による学習

In [1]:
# coding: utf-8
import sys, os
sys.path.append(os.pardir)

import numpy as np
from dataset.mnist import load_mnist
from ch05.two_layer_net import TwoLayerNet

# データの読み込み
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

train_loss_list = []
train_acc_list = []
test_acc_list = []

iter_per_epoch = max(train_size / batch_size, 1)

for i in range(iters_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    
    # 勾配
    #grad = network.numerical_gradient(x_batch, t_batch)
    grad = network.gradient(x_batch, t_batch)
    
    # 更新
    for key in ('W1', 'b1', 'W2', 'b2'):
        network.params[key] -= learning_rate * grad[key]
    
    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)
    
    if i % iter_per_epoch == 0:
        train_acc = network.accuracy(x_train, t_train)
        test_acc = network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print(train_acc, test_acc)

0.119533333333 0.126
0.906216666667 0.9084
0.92225 0.9236
0.937883333333 0.9369
0.94635 0.9439
0.95305 0.9502
0.958 0.9546
0.961733333333 0.9572
0.964266666667 0.9601
0.96805 0.9619
0.97035 0.9643
0.9727 0.9658
0.973166666667 0.9673
0.975433333333 0.9699
0.976816666667 0.9688
0.978183333333 0.9707
0.977733333333 0.9683
