In [1]:
import numpy as np
import matplotlib.pyplot as plt
from dataset.mnist import load_mnist

In [6]:
from common.functions import sigmoid, softmax, sigmoid_grad
from common.gradient import numerical_gradient

class TwoLayerNet:
    def __init__(self, layerSize, weight_init_std=0.01, h = sigmoid, sigma = softmax):
        # 初始化权重
        self.l = len(layerSize) - 1
        self.h = h
        self.sigma = sigma
        self.W = []
        self.B = []
        for i in range(self.l):
            self.W.append(weight_init_std * np.random.randn( layerSize[i], layerSize[i+1]))
            self.B.append(np.zeros(layerSize[i+1]))

    def calcLayer(A, w, b, h):
        return h(np.dot(A,w) + b)
                          
    def predict(self, x):
        A = x
        # 隐藏层
        for i in range(self.l - 1):
            A = calcLayer(A, self.W[i], self.B[i], self.h)
        # 输出层
        return calcLayer(A, W[self.l-1], B[self.l-1], self.sigma)
        
    # x:输入数据, t:监督数据
    def loss(self, x, t):
        y = self.predict(x)        
        return cross_entropy_error(y, t)
    
    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        t = np.argmax(t, axis=1)
        
        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy
        
    # x:输入数据, t:监督数据
    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)
        
        grads = []
        for i in range(self.l - 1):
            grads.append(numerical_gradient(loss_W, self.W[i]))
            grads.append(numerical_gradient(loss_W, self.B[i]))
        return grads
        
    def gradient(self, x, t):        
        batch_num = x.shape[0]
        # forward
        # 输入层
        A = []
        Z = []
        z = x
        A.append(x)
        # 隐藏层
        for i in range(self.l - 1):
            a = np.dot(z, self.W[i]) + self.B[i]
            z = self.h(a)
            A.append(a)
            Z.append(z)
        # 输出层
        a = np.dot(z, self.W[self.l-1]) + self.B[self.l-1]
        y = self.sigma(a)
        
        A.append(a)
        Z.append(y)
        # backward
        dy = (y - t) / batch_num
        
        grads = []
        for i in range(self.l-1,-1,-1):
            grads.append(np.dot( A[i].T, dy))
            grads.append(np.sum(dy, axis=0))
            
            da = np.dot(dy, self.W[i].T)
            dy = sigmoid_grad(A[i]) * da
        # numerical_gradient 和这个返回的顺序一致
        return grads[::-1]