In [160]:
import numpy as np
import sys
sys.path.append('./book_material')
from dataset.mnist import *
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label = True)

In [257]:
##wb是模型本身的参数因此放在构造函数中无需手动进行更改 其他则由输入的x决定
class Affine:
    def __init__(self,w,b):
        self.w=w
        self.b=b
        self.x=None
        self.dw=None
        self.db=None
        self.dx=None
    def forward(self,x):
        if x.ndim==1:
            x = x.reshape(1,-1)
        self.x=x
        output=np.dot(self.x, self.w)+self.b
        return output
    def backward(self,d_out):
        self.dx=np.dot(d_out,self.w.T)
        self.dw=np.dot(self.x.T,d_out)
        self.db=np.sum(d_out,axis=0)
        return self.dx

class Relu:
    def __init__(self):
        self.mask=None
    def forward(self,x):
        self.mask=x>0
        out=self.mask*x
        return out
    def backward(self,d_out):
        dx=self.mask*d_out
        return dx



class Softmaxwithloss:
    def __init__(self):
        self.t=None
        self.x=None
        self.y=None
        self.dx=None
        self.batch_size = None
    def forward(self,x):
        if x.ndim==1:
            x = x.reshape(1,-1)
        self.x=x-np.max(x,axis=-1,keepdims=True)
        self.batch_size = x.shape[0]
        self.y=np.exp(self.x)/np.sum(np.exp(self.x),axis=-1,keepdims=True)
        return self.y
    def loss(self,x,t):

        out=self.forward(x)
        self.t=t
        if t.ndim!=1:
            loss_rate=np.sum(-self.t*np.log(out+1e-7))/self.batch_size
        else:
            loss_rate=-np.sum(np.log(out[np.arange(len(t)),t]+1e-7))/self.batch_size
        return  loss_rate
    
    def backward(self,d_out=1,t=None):
        self.t=t
        if self.t.ndim!=1:
            dx = (self.y - self.t) / self.batch_size
        else:
            y_c=self.y.copy()
            # dx=(y_c[np.arange(len(self.t)),self.t]-1)/self.batch_size 这里错了
            y_c[np.arange(len(self.t)),self.t] -=1
            dx=y_c/self.batch_size
        self.dx=dx
        return self.dx

class MultiLayersNetwork:
    def __init__(self, input_size, output_size, hidden_size_list=None):
        if hidden_size_list is None:
            self.hidden_size_list = [100, 100, 100]
        else:
            self.hidden_size_list = hidden_size_list
        self.input_size = input_size

        self.output_size = output_size
        self.params=dict()
        self.sourcedata=None
        self.layers= dict()
        #生成层
        parameter_size_list=self.hidden_size_list
        parameter_size_list.insert(0,input_size)
        parameter_size_list.append(self.output_size)
        self.hidden_size_list=parameter_size_list

        # for i in range(len(parameter_size_list) - 1): # 遍历所有 Affine 层
        #     scale = np.sqrt(1.0 / parameter_size_list[i])
        #     self.params['W' + str(i)] = np.random.randn(parameter_size_list[i], parameter_size_list[i + 1]) * scale # 使用 randn 初始化
        #     self.params['b' + str(i)] = np.zeros(parameter_size_list[i + 1]) # 偏置初始化为 0
        #     self.layers['affine' + str(i)] = Affine(self.params['W' + str(i)], self.params['b' + str(i)])
        #
        #     if i < len(parameter_size_list) - 2: # 除了最后一层 Affine，都添加 Relu
        #         self.layers['relu' + str(i)] = Relu()
        #     else: # 最后一层 Affine 之后添加 Softmaxwithloss
        #         self.layers['Activation_function'] = Softmaxwithloss()


        #倒数第二层之前全部用relu
        for i in range(len(parameter_size_list)-2):
            scale = np.sqrt(1.0 / parameter_size_list[i])
            self.params['W'+str(i)]=np.random.randn(parameter_size_list[i],parameter_size_list[i+1])*scale
            self.params['b'+str(i)]=np.zeros(parameter_size_list[i+1])
            #每一层就自己层的权重和偏置
            self.layers['affine'+str(i)]=Affine(self.params['W'+str(i)],self.params['b'+str(i)])
            self.layers['relu'+str(i)]=Relu()

        if i==len(parameter_size_list)-3:
            i+=1
            scale = np.sqrt(1.0 / parameter_size_list[i])
            self.params['W'+str(i)]=np.random.randn(parameter_size_list[i],parameter_size_list[i+1])*scale
            self.params['b'+str(i)]=np.zeros(parameter_size_list[i+1])
            self.layers['affine'+str(i)]=Affine(self.params['W'+str(i)],self.params['b'+str(i)])
            self.layers['Activation_function']=Softmaxwithloss()


    def predict(self, x):
        inputs=x
        for key,func in self.layers.items():
            inputs=func.forward(inputs)
        return inputs

    def backward(self,t,d_out=1):

        back_list=list(self.layers.keys())
        back_list.reverse()
        d_out=d_out
        for key in back_list:
            if key=='Activation_function':
                d_out=self.layers[key].backward(d_out,t=t)
            else:
                d_out=self.layers[key].backward(d_out)

    def gradient(self,t):

        self.backward(d_out=1,t=t)
        grads=dict()
        for idx in range(len(self.hidden_size_list)-1):
            grads['W'+str(idx)]=self.layers['affine'+str(idx)].dw
            grads['b'+str(idx)]=self.layers['affine'+str(idx)].db
        return grads
    def accuracy(self,x,t):
        if t.ndim!=1: t=np.argmax(t,axis=1)
        y=np.argmax(self.predict(x),axis=1)
        return np.sum(y==t)/y.shape[0]

In [261]:
mln=MultiLayersNetwork(input_size=784,output_size=10,hidden_size_list=[100,100])
mln.accuracy(x_test,t_test)

np.float64(0.0788)

In [262]:
for i in range(200):
    mln.predict(x_train)
    grads=mln.gradient(t=t_train)
    for key in grads.keys():
        mln.params[key] -=0.1*grads[key]

In [263]:
mln.accuracy(x_test,t_test)

np.float64(0.9072)