In [108]:
import numpy as np
import sys

sys.path.append('book_material')
from dataset.mnist import *
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label = True)

##wb是模型本身的参数因此放在构造函数中无需手动进行更改 其他则由输入的x决定
class Affine:
    def __init__(self,w,b):
        self.w=w
        self.b=b
        self.x=None
        self.dw=None
        self.db=None
        self.dx=None
    def forward(self,x):
        if x.ndim==1:
            x = x.reshape(1,-1)
        self.x=x
        output=np.dot(self.x, self.w)+self.b
        return output
    def backward(self,d_out):

        self.dx=np.dot(d_out,self.w.T)
        self.dw=np.dot(self.x.T,d_out)
        self.db=np.sum(d_out,axis=0)
        return self.dx

class Relu:
    def __init__(self):
        self.mask=None
    def forward(self,x):
        self.mask=x>0
        out=self.mask*x
        return out
    def backward(self,d_out):
        dx=self.mask*d_out
        return dx



class Softmaxwithloss:
    def __init__(self):
        self.t=None
        self.x=None
        self.y=None
        self.dx=None
        self.batch_size = None
        self.w_rate=0.1
        self.w=None
    def forward(self,x):
        if x.ndim==1:
            x = x.reshape(1,-1)
        self.x=x-np.max(x,axis=-1,keepdims=True)
        self.batch_size = x.shape[0]
        self.y=np.exp(self.x)/np.sum(np.exp(self.x),axis=-1,keepdims=True)

        return self.y
    def loss(self,x,t):



        out=self.forward(x)
        self.t=t
        if t.ndim!=1:
            loss_rate=np.sum(-self.t*np.log(out+1e-7))/self.batch_size
        else:
            loss_rate=-np.sum(np.log(out[np.arange(len(t)),t]+1e-7))/self.batch_size


        return  loss_rate

    def backward(self,t=None):

        self.t=t
        if self.t.ndim!=1:
            dx = (self.y - self.t) / self.batch_size
        else:
            y_c=self.y.copy()

            y_c[np.arange(len(self.t)),self.t] -=1
            dx=y_c/self.batch_size

        self.dx=dx

        return self.dx

class dropout:
    def __init__(self, rate=0.1):
        self.rate = rate
        self.mask = None
        self.mode= 'train'
    def forward(self, x):
        if self.mode=='train':
            self.mode = 'train'
            self.mask = np.random.binomial(1,1-self.rate,size=x.shape)
            out = x*np.true_divide(self.mask,(1-self.rate))
        else:
            self.mode = 'test'
            self.mask = np.ones(x.shape)
            out=x
        return out

    def backward(self, d_out):
        ###正向的时候 x m*n维  mask是m*n维 输出 m*n维 逐元素相乘
        ###反向传播的时候 d_out m*n维 同样也是逐元素相乘
        return d_out * self.mask



class MultiLayersNetwork:
    def __init__(self, input_size, output_size,rate=0.1 ,hidden_size_list=None,weight=1):
        if hidden_size_list is None:
            self.hidden_size_list = [100, 100, 100]
        else:
            self.hidden_size_list = hidden_size_list
        self.input_size = input_size

        self.output_size = output_size
        self.params=dict()
        self.sourcedata=None
        self.layers= dict()
        self.weight=weight
        self.w_dict=None
        self.rate=rate
        self.input_detail={}
        #生成层
        parameter_size_list=self.hidden_size_list
        parameter_size_list.insert(0,input_size)
        parameter_size_list.append(self.output_size)
        self.hidden_size_list=parameter_size_list

        for i in range(len(parameter_size_list) - 1): # 遍历所有 Affine 层
            scale = np.sqrt(self.weight / parameter_size_list[i])
            self.params['W' + str(i)] = np.random.randn(parameter_size_list[i], parameter_size_list[i + 1]) * scale # 使用 randn 初始化
            self.params['b' + str(i)] = np.zeros(parameter_size_list[i + 1]) # 偏置初始化为 0
            self.layers['affine' + str(i)] = Affine(self.params['W' + str(i)], self.params['b' + str(i)])

            if i < len(parameter_size_list) - 2: # 除了最后一层 Affine，都添加 Relu 和dropout层
                self.layers['dropout' + str(i)] = dropout(self.rate)
                self.layers['relu' + str(i)] = Relu()

            else: # 最后一层 Affine 之后添加 Softmaxwithloss
                self.layers['Activation_function'] = Softmaxwithloss()



    def set_dropout_mode(self,mode):
        if mode=='train':
            pass
        else:
            for i in self.layers.keys():
                if isinstance(self.layers[i],dropout):
                    self.layers[i].mode=mode

    def predict(self, x):
        inputs=x
        for key,func in self.layers.items():
            inputs=func.forward(inputs)
            self.input_detail[key]=inputs
        return inputs

    def loss(self,x,t,weight_decay_lambda=0):
        w_decay=0
        for w_key in self.params.keys():
            if 'W' in w_key:
                w_decay += 0.5*weight_decay_lambda * np.sum(self.params[w_key]**2)

        inputs=x
        for key,func in self.layers.items():
            if key=='Activation_function':
                loss_value=func.loss(inputs,t)+w_decay
            else:
                inputs=func.forward(inputs)



        return loss_value

    def backward(self,t,d_out=1):

        back_list=list(self.layers.keys())
        back_list.reverse()
        d_out=d_out
        for key in back_list:
            if key=='Activation_function':

                d_out=self.layers[key].backward(t=t)

            else:
                d_out=self.layers[key].backward(d_out)

    def gradient(self,t,weight_decay_lambda=0):

        self.backward(d_out=1,t=t)
        grads=dict()
        for idx in range(len(self.hidden_size_list)-1):
            grads['W'+str(idx)]=self.layers['affine'+str(idx)].dw+weight_decay_lambda*(self.params['W'+str(idx)])
            grads['b'+str(idx)]=self.layers['affine'+str(idx)].db
        return grads
    def accuracy(self,x,t):
        if t.ndim!=1: t=np.argmax(t,axis=1)
        y=np.argmax(self.predict(x),axis=1)
        return np.sum(y==t)/y.shape[0]


### 总结 当训练集过少次数过低的时候 难以过拟合 所以使用正则手段没用 
mln7=MultiLayersNetwork(input_size=784,output_size=10,rate=0.1,hidden_size_list=[100,100,100,100,100,100,100,100,100],weight=2)

mask=np.random.choice(60000,size=10000)
x_mask=x_train[mask]
t_mask=t_train[mask]
loss_list=[]
mln7.set_dropout_mode('test')
for i in range(1000):
    loss_list.append(mln7.loss(x_mask,t_mask,weight_decay_lambda=0.0))
    grads=mln7.gradient(t=t_mask)
    for key in grads.keys():
        mln7.params[key] -=0.1*grads[key]
mln7.set_dropout_mode('test')
mln7.accuracy(x_test,t_test)

0.9434

In [107]:
mln7.accuracy(x_mask,t_mask)

1.0

[[0. 0. 2. 0. 2. 2. 0. 0. 0. 2. 0. 0. 0. 0. 0. 2. 2. 0. 2. 0. 2. 0. 0. 0.
  2. 0. 0. 2. 2. 2. 2. 0. 2. 0. 0. 2. 0. 2. 0. 2. 2. 2. 2. 0. 0. 0. 2. 0.
  0. 0. 0. 2. 0. 2. 2. 2. 2. 0. 0. 2. 0. 0. 0. 2. 0. 2. 2. 2. 0. 2. 2. 2.
  0. 2. 0. 0. 0. 0. 2. 0. 0. 2. 2. 0. 2. 2. 2. 2. 2. 2. 2. 2. 2. 0. 2. 0.
  2. 0. 0. 0.]]


In [99]:
mln7.input_detail['affine6']

array([[-0.18548159, -0.00616037,  0.08482984, ..., -0.12593171,
        -0.09261184,  0.21550057],
       [-0.32329175,  0.05640867, -0.27356264, ..., -0.54540084,
        -0.28913599,  0.72764212],
       [-0.0599832 ,  0.11419436, -0.23372228, ..., -0.21511901,
        -0.12375964,  0.25477153],
       ...,
       [-0.14460058,  0.0633298 , -0.09135575, ..., -0.21712407,
        -0.12749988,  0.26417222],
       [-0.08824096,  0.24601549, -0.27485167, ..., -0.29114132,
        -0.3333861 ,  0.4019106 ],
       [ 0.24132348,  0.69647077, -1.02724083, ..., -0.58827353,
        -0.62175684,  1.10057868]])