# This notebook is used to implement gradient in neural network.

In [1]:
import numpy as np

In [2]:
def softmax(x):
    c = np.max(x)
    y = np.exp(x - c) / np.sum(np.exp(x-c))
    return y

In [3]:
def cross_entropy_loss(y, l):
    # y is the output of network
    # l is the one-hot label
    loss = -np.sum(l * np.log(y + np.finfo(float).eps))
    return loss

In [4]:
class SimpleNet:
    def __init__(self):
        self.W = np.random.randn(2, 3)
        
    def predict(self, x):
        return softmax(np.dot(x, self.W))
    
    def loss(self, x, l):
        y = self.predict(x)
        return cross_entropy_loss(y, l)

In [5]:
x = np.array([0.6, 0.9])
net = SimpleNet()
y = net.predict(x)

In [6]:
print(net)
print(y)

<__main__.SimpleNet object at 0x000001EDF5C1ED30>
[0.79752813 0.03642105 0.16605083]


In [7]:
print(np.sum(y))

1.0


In [8]:
l1 = np.array([1, 0, 0]).astype(np.float64)
print(l1)

[1. 0. 0.]


In [9]:
loss1 = net.loss(x, l1)

In [10]:
print(loss1)

0.22623817690397927


In [11]:
l2 = np.array([0, 0, 1])

In [12]:
loss2 = net.loss(x, l2)
print(loss2)

1.7954613490022633


In [14]:
def numerical_gradient(f, x):
    h = 1e-4 # 0.0001
    grad = np.zeros_like(x)
    
    print(net)
    
    print(net.W)
    
    
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        idx = it.multi_index
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + h
        fxh1 = f(x) # f(x+h)
        
        x[idx] = tmp_val - h 
        fxh2 = f(x) # f(x-h)
        grad[idx] = (fxh1 - fxh2) / (2*h)
        
        x[idx] = tmp_val # 还原值
        it.iternext()   
        
    return grad

In [17]:
def f(W):
    return net.loss(x, l2)

In [18]:
dW = numerical_gradient(f, net.W)

<__main__.SimpleNet object at 0x000001EDF5C1ED30>
[[ 0.0286076  -0.47319418  0.26585957]
 [ 2.16838283 -0.92644964  0.26656688]]
