In [1]:
import sys, os
from collections import OrderedDict
import numpy as np

sys.path.append('/Users/derek/projects/deep-learning-from-scratch')
from dataset.mnist import load_mnist

from common.functions import softmax, cross_entropy_error

In [90]:
class SimpleNet:
    def __init__(self):
        self.W = np.random.randn(2, 3)
    
    def predict(self, x):
        return np.dot(x, self.W)
    
    def loss(self, x, t):
        z = self.predict(x)
        y = softmax(z)
        loss = cross_entropy_error(y, t)
        
        return loss

In [91]:
net = SimpleNet()
net.W

array([[-0.03091546,  2.50336843,  1.43683195],
       [ 0.85491189,  0.90549628,  0.69665072]])

In [92]:
x = np.array([0.6, 0.9])
net.predict(x)

array([0.75087142, 2.31696771, 1.48908482])

In [93]:
t = np.array([0, 0, 1])
net.loss(x, t)

1.3261287608227716

Numerical gradient는 어차피 검증용이기 때문에 메모리를 고려하기 보다는, 수학적으로 명료한 함수를 쓰는 것이 좋음

제가 헷갈렸던 이유


책에서 아쉬운 점
1. function에서 입력값이 사용되지 않음
2. numerical gradient에서 side-effect

In [80]:
def loss_with_weight_normal(W):
    global x, t
    _net = SimpleNet()
    _net.W[:] = W
    return _net.loss(x, t)

In [66]:
net.W + 100

array([[ 97.70617988,  99.20253244,  98.71593398],
       [ 99.57122665, 100.06119336,  98.24629849]])

In [76]:
normal_function(net.W + 2)

[[-0.29382012  1.20253244  0.71593398]
 [ 1.57122665  2.06119336  0.24629849]]


2.2675248786603306

In [77]:
normal_function(net.W - 2)

[[-4.29382012 -2.79746756 -3.28406602]
 [-2.42877335 -1.93880664 -3.75370151]]


2.267524878660331

In [14]:
def loss_with_strange_normal(W):
    global x, t, net
    return net.loss(x, t)

In [126]:
def numerical_gradient1(f, x):
    h = 1e-4
    grad = np.zeros_like(x)
    
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        idx = it.multi_index
        delta = np.zeros_like(x)
        delta[idx] = delta[idx] + h

        fxh1 = f(x+delta)
        fxh2 = f(x-delta)
        grad[idx] = (fxh1 - fxh2) / (2*h)
        it.iternext()
    return grad

In [133]:
def numerical_gradient2(f, x):
    h = 1e-4
    grad = np.zeros_like(x)
    
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        idx = it.multi_index
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + h
        fxh1 = f(x) # f(x+h)
        
        x[idx] = tmp_val - h 
        fxh2 = f(x) # f(x-h)
        grad[idx] = (fxh1 - fxh2) / (2*h)
        
        x[idx] = tmp_val
        it.iternext()
    return grad

In [134]:
numerical_gradient1(normal_function, net.W)

array([[ 0.07614099,  0.36455704, -0.44069803],
       [ 0.11421148,  0.54683557, -0.66104705]])

In [135]:
numerical_gradient1(strange_function, net.W)

array([[0., 0., 0.],
       [0., 0., 0.]])

In [136]:
numerical_gradient2(normal_function, net.W)

array([[ 0.07614099,  0.36455704, -0.44069803],
       [ 0.11421148,  0.54683557, -0.66104705]])

In [137]:
numerical_gradient2(strange_function, net.W)

array([[ 0.07614099,  0.36455704, -0.44069803],
       [ 0.11421148,  0.54683557, -0.66104705]])