In [1]:
import numpy as np
from dataset.mnist import load_mnist
from common.functions import softmax

# 交叉熵误差
$E=-\Sigma t_k log(y_K)$

In [2]:
def cross_entropy_error(y, t):
    delta = 1e-7 # 保护机制, 防止log0
    if y.ndim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)
        
    #这里reshape的作用是计算单个数据使得batch_size=1
    batch_size = y.shape[0]
    return -np.sum(t*np.log(y + delta))/batch_size

# mini-batch

使用训练数据进行学习，严格来说， 就是针对训练数据计算损失函数的值，找出使该值尽可能小的参数

如果以全部数据为对象 求损失函数的和，则计算过程需要花费较长的时间

我们从全部数据中选出一部分，作为全部数据的近似。神经网络的学习也是从训练数据中选出一批数据（称为 mini-batch, 小 批量），然后对每个 mini-batch 进行学习

In [3]:
(x_train, t_train), (x_test, t_test) = \
load_mnist(normalize=True, one_hot_label=True)

train_size = x_train.shape[0]
batch_size = 10
batch_mask = np.random.choice(train_size, batch_size)
x_batch = x_train[batch_mask]
t_batch = t_train[batch_mask]

# 梯度的计算


In [4]:
def numerical_gradient(f, x):
    h = 1e-4
    grad = np.zeros_like(x)
    
    for idx in range(x.size):
        tmp_val = x[idx]
        # x[idx]逐个求偏导
        #f(x+h)
        x[idx] = tmp_val + h
        fxh1 = f(x)
        #(fx-h)
        x[idx] = tmp_val - h
        fxh2 = f(x)
        
        grad[idx] = (fxh1-fxh2) / 2*h
        x[idx] = tmp_val
        
    return grad

In [5]:
def function_2(x):
    # x[0]**2 + x[1]**2
    return np.sum(x**2)

numerical_gradient(function_2, np.array([3.0,0.0]))

array([6.e-08, 0.e+00])

# 梯度下降

In [6]:
def gradient_descent(f, init_x, lr=0.01, step_num=100):
    x = init_x
    
    for i in range(step_num):
        grad =  numerical_gradient(f, x)
        x -= lr * grad
        
    return x

In [7]:
init_x = np.array([-3.0, 4.0])
gradient_descent(function_2, init_x)

array([-2.99999994,  3.99999992])

# 利用梯度下降更新参数

In [8]:
class simpleNet: 
    def __init__(self): 
        self.W = np.random.randn(2,3) # 用高斯分布进行初始化 
        
    def forward(self, x): 
        return np.dot(x, self.W) 
    
    def loss(self, x, t): 
        z = self.forward(x) 
        y = softmax(z) 
        loss = cross_entropy_error(y, t) 
        return loss

In [9]:
net = simpleNet()
print(net.W)
x = np.array([0.6, 0.9])
p = net.forward(x)
print(p)
t = np.array([0, 0, 1])
net.loss(x, t)

[[ 0.5000227   2.04419071  0.37229533]
 [-0.18373586 -1.35679891 -0.33304462]]
[ 0.13465134  0.0053954  -0.07636295]


1.2000016679819552