In [6]:
import os,sys
import numpy as np
from funcs import *
from collections import OrderedDict # 有序的字典


class TwoLayNet(object):
    
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        # 初始化权重
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.random.randn(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.random.randn(output_size)

        # 生成层
        self.layers = OrderedDict()
        
        self.layers['Affine1'] = Affine(self.params['W1'],self.params['b1'])
        self.layers['Relu1'] = ReLu()
        
        self.layers['Affine2'] = Affine(self.params['W2'],self.params['b2'])

        self.lastLayer = SoftmaxWithLoss()
        
    def predict(self, x):
        # 到第二层神经网络输出为止，比如 a1=x*W1+b1 z1=relu(a1) x=z1*W2+b2，之后返回x，后面不进行softmax的处理 
        for layer in self.layers.values():
            x = layer.forward(x)
        return x
    
    # x:输入数据 t:监督数据
    def loss(self, x, t):
        y = self.predict(x)
        # 在 lastLayer 层中 进行 softmax(y) 之后接着求损失函数 cross_entropy_error(softmax(y),t)
        return self.lastLayer.forward(y,t)
    
    # x:输入数据 t:监督数据
    def accuracy(self, x, t):
        # 根据初始输入数据求精度
        y=self.predict(x) # y是经过两层神经网络的输出，不需要经过softmax处理（如果要求损失函数，则需要经过softmax的处理，将输出和监督数据统一）

        y = np.argmax(y, axis=1) #y=[[2,10,4],[3,9,1]] --> y=[1,1]
        if t.ndim != 1: # 如果 t 是 one-hot 的形式
            t = np.argmax(t, axis=1) # t=[[0,1,0],[0,1,0]] --> t=[1,1]

        accu = np.sum(y==t) / x.shape[0]
        
        return accu
            
    # x:输入数据 t:监督数据
    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x,t)

        grads={}
        
        # grads['Wn/bn'] 的 shape 和 Wn/bn 的 shape 一致 
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])

        return grads
    
    def gradient(self, x, t):
        # 要想实现反向传播-backward，得先把正向-forward 走完
        # 虽然 self.loss 已经走完了正向，但是 self.gradient 比 self.loss 要先调用
        self.loss(x,t)

        # 下面走 backward
        dout = 1
        dout = self.lastLayer.backward(dout)

        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)

        grads = {}
        grads['W1'] = self.layers['Affine1'].dW
        grads['b1'] = self.layers['Affine1'].db
        grads['W2'] = self.layers['Affine2'].dW
        grads['b2'] = self.layers['Affine2'].db

        return grads

In [30]:
# 测试
net = TwoLayNet(2,3,4)
x = np.random.randn(3,2)
t = np.array([[0,1,0,0],[1,0,0,0],[0,0,0,1]])

for i in range(2000):
    grads = net.numerical_gradient(x, t)
    net.params['W1'] -= grads['W1']
    net.params['b1'] -= grads['b1']
    net.params['W2'] -= grads['W2']
    net.params['b2'] -= grads['b2']
    if not i%10: 
        print(net.accuracy(x, t))

0.3333333333333333
0.6666666666666666
0.6666666666666666
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
