# mini-batch方式实现基于反向传播求解梯度的两层神经网络

In [1]:
#import sys, os
#sys.path.append(os.pardir)
import numpy as np
from deepcores.layers import *
from collections import OrderedDict
from models.TwoLayerNet import TwoLayerNet


from dataset.mnist import load_mnist

(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label = True)

# 超参数
iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

train_loss_list = []
train_acc_list = []
test_acc_list = []
iter_per_epoch = max(x_train.shape[0]/batch_size,1)  #每用完所有样本来测试所需要的次数

network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

for i in range(iters_num):
    # 获取mini-batch
    batch_mask = np.random.choice(train_size, batch_size) #从0到train_size-1中随机选择batch_size个序号
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    
    # 计算梯度
    grad = network.gradient(x_batch, t_batch)
    # grad = network.gradient(x_batch, t_batch) # 高速版!
    # 更新参数
    for key in ('W1', 'b1', 'W2', 'b2'):
        network.params[key] -= learning_rate * grad[key]
    
    # 记录学习过程
    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)
    
    # 计算每个epoch的识别精度
    if i % iter_per_epoch == 0:
        train_acc = network.accuracy(x_train, t_train)
        test_acc = network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print("train acc, test acc | " + str(train_acc) + ", " + str(test_acc))

train acc, test acc | 0.09871666666666666, 0.098
train acc, test acc | 0.8982666666666667, 0.902
train acc, test acc | 0.91965, 0.9225
train acc, test acc | 0.9331333333333334, 0.9317
train acc, test acc | 0.9410333333333334, 0.9376
train acc, test acc | 0.9484333333333334, 0.9473
train acc, test acc | 0.9537666666666667, 0.9505
train acc, test acc | 0.9583333333333334, 0.9554
train acc, test acc | 0.9621166666666666, 0.9594
train acc, test acc | 0.96595, 0.9616
train acc, test acc | 0.9680333333333333, 0.9637
train acc, test acc | 0.9710833333333333, 0.9652
train acc, test acc | 0.9719, 0.9658
train acc, test acc | 0.9733833333333334, 0.9679
train acc, test acc | 0.9753333333333334, 0.9688
train acc, test acc | 0.9764166666666667, 0.9696
train acc, test acc | 0.9768333333333333, 0.9702


In [2]:
import numpy as np
from deepcores.layers import *
from collections import OrderedDict
from models.TwoLayerNet_Numerical_Gradient import TwoLayerNet

#每经过一个epoch会输出当前的训练样本准确率和测试样本准确率
from dataset.mnist import load_mnist

(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label = True)

# 超参数
iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

train_loss_list = []
train_acc_list = []
test_acc_list = []
iter_per_epoch = max(x_train.shape[0]/batch_size,1)  #每用完所有样本来测试所需要的次数

network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

for i in range(iters_num):
    print(f'序号:{i}')
    # 获取mini-batch
    batch_mask = np.random.choice(train_size, batch_size) #从0到train_size-1中随机选择batch_size个序号
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    
    # 计算梯度
    grad = network.numerical_gradient(x_batch, t_batch)
    # grad = network.gradient(x_batch, t_batch) # 高速版!
    
    # 更新参数
    for key in ('W1', 'b1', 'W2', 'b2'):
        network.params[key] -= learning_rate * grad[key]
    
    # 记录学习过程
    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)
    
    # 计算每个epoch的识别精度
    if i % iter_per_epoch == 0:
        train_acc = network.accuracy(x_train, t_train)
        test_acc = network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print("train acc, test acc | " + str(train_acc) + ", " + str(test_acc))


序号:0
train acc, test acc | 0.10441666666666667, 0.1028
序号:1
序号:2
序号:3
序号:4
序号:5
序号:6
序号:7
序号:8
序号:9
序号:10
序号:11
序号:12
序号:13
序号:14
序号:15
序号:16
序号:17
序号:18
序号:19
序号:20
序号:21
序号:22
序号:23
序号:24
序号:25
序号:26
序号:27
序号:28
序号:29
序号:30
序号:31
序号:32
序号:33
序号:34
序号:35
序号:36
序号:37
序号:38
序号:39
序号:40
序号:41
序号:42
序号:43
序号:44
序号:45
序号:46
序号:47
序号:48
序号:49
序号:50
序号:51
序号:52
序号:53
序号:54
序号:55
序号:56
序号:57
序号:58
序号:59
序号:60
序号:61
序号:62
序号:63
序号:64
序号:65
序号:66
序号:67
序号:68
序号:69
序号:70
序号:71
序号:72
序号:73
序号:74
序号:75
序号:76
序号:77
序号:78
序号:79
序号:80
序号:81
序号:82
序号:83
序号:84


KeyboardInterrupt: 

In [8]:
x=np.array([[1,2],[3,4]])
x.mean(axis=0)

array([2., 3.])

In [None]:
class XXLayer:
    def __init__(self,[参数1，]):
        ...省略其他非通用代码
        self.w1 = None

    def forward(self,x):
        ...省略其他非通用代码
        out = f(x,w1)
        return out

    def backward(self,dout):
        ...省略其他非通用代码
        self.dw1 = dout*deltaf(x,w1)/deltaw1
        dx = dout*deltaf(x,w1)/deltax