# 誤差伝播法
- 重みパラメータの勾配の計算を効率良く行う方法

## 連鎖律
- 端的に合成関数の微分

In [10]:
# 乗算
class MulLayer:
    def __init__(self):
        self.x = None
        self.y = None

    def forward(self, x, y):
        self.x = x
        self.y = y
        return x * y
    
    def backward(self, d_out):
        d_x = d_out * self.y
        d_y = d_out * self.x

        return d_x, d_y

# 加算
class AddLayer:
    def __init__(self):
        pass

    def forward(self, x, y):
        return x + y
    
    def backward(self, d_out):
        d_x = d_out
        d_y = d_out

        return d_x, d_y


In [18]:
# ReLU
class ReLU:
    def __init__(self):
        self.mask = None
    
    def forward(self, x):
        self.mask = (x <= 0)
        out = x.copy()
        out[self.mask] = 0

        return out
    
    def backward(self, d_out):
        d_out[self.mask] = 0
        d_x = d_out
        return d_x

import numpy as np

# sigmoid
class Sigmoid:
    def __init__(self):
        self.out = None
    
    def forward(self, x):
        out = 1 / (1 + np.exp(-x))
        self.out = out

        return out

    def backward(self, d_out):
        d_x = d_out * (1.0 - self.out) * self.out
        return d_x
\

import sys
sys.path.append('./deep-learning-from-scratch')
from common.functions import *

# Affine
class Affine:
    def __init__(self, W, b):
        self.W = W
        self.b = b
        self.x = None
        self.d_W = None
        self.d_b = None
    
    def forward(self, x):
        self.x = x
        out = np.dot(x, self.W) + self.b
        return out

    def backward(self, d_out):
        d_x = np.dot(d_out, self.W.T)
        self.d_W = np.dot(self.x.T, d_out)
        self.d_b = np.sum(d_out, axis=0)
        return d_x

# softmax with loss
class SoftmaxWithLoss:
    def __init__(self):
        self.loss = None
        self.y = None
        self.t = None
    
    def forward(self, x, t):
        self.t = t
        self.y = softmax(x)
        self.loss = cross_entropy_error(self.y, self.t)
        return self.loss
    
    def backward(self, d_out=1):
        batch_size = self.t.shape[0]
        d_x = (self.y - self.t) / batch_size
        return d_x



In [15]:
# 誤差伝播法を使った２層ニューラルネットワーク
import sys
sys.path.append('./deep-learning-from-scratch')

import numpy as np
from common.gradient import numerical_gradient
from collections import OrderedDict

class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        self.params = {
            'W1': weight_init_std * np.random.randn(input_size, hidden_size),
            'b1': np.zeros(hidden_size),
            'W2': weight_init_std * np.random.randn(hidden_size, output_size),
            'b2': np.zeros(output_size),
        }

        # レイヤの作成
        self.layers = OrderedDict()
        self.layers["Affine1"] = Affine(self.params['W1'], self.params['b1'])
        self.layers["ReLU1"] = ReLU()
        self.layers["Affine2"] = Affine(self.params['W2'], self.params['b2'])
        self.lastLayer = SoftmaxWithLoss()

    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)
        return x
    
    def loss(self, x, t):
        y = self.predict(x)
        return self.lastLayer.forward(x, t)
    
    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)

        accuracy = np.sum(y == t / float(x.shape[0]))
        return accuracy
    
    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)
        grads = {
            "W1": numerical_gradient(loss_W, self.params['W1']),
            "b1": numerical_gradient(loss_W, self.params['b1']),
            "W2": numerical_gradient(loss_W, self.params['W2']),
            "b2": numerical_gradient(loss_W, self.params['b2']),
        }
        return grads
    
    def gradient(self, x, t):
        # forward
        self.loss(x, t)

        # backward
        d_out = 1
        d_out = self.lastLayer.backward(d_out)

        layers = list(self.layers.values())
        layers.reverse()
        
        for layer in layers:
            d_out = layer.backward(d_out)

        grads = {
            "W1": self.layers["Affine1"].d_W,
            "b1": self.layers["Affine1"].d_b,
            "W2": self.layers["Affine2"].d_W,
            "b2": self.layers["Affine2"].d_b,
        }

        return grads


In [1]:
# ミニバッチの実装
import sys
sys.path.append('./deep-learning-from-scratch')
sys.path.append('./deep-learning-from-scratch/ch05')

import numpy as np
from dataset.mnist import load_mnist
from two_layer_net import TwoLayerNet


(x_train , t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

train_loss_list = []

# ハイパーパラメータ
iters_num = 10_000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

for i in range(iters_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    grad = network.gradient(x_batch, t_batch)

    for key in network.params.keys():
        network.params[key] -= learning_rate * grad[key]
    
    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)


ModuleNotFoundError: No module named 'dataset'