In [2]:
import numpy as np

In [9]:
def sigmoid(z):
    """This function used to implement sigmoid function
    Input: x, an array_like object;
    Output: y, output."""
    y = 1 / (1 + np.exp(-z))
    return y

In [7]:
def softmax(x):
    c = np.max(x)
    y = np.exp(x - c) / np.sum(np.exp(x-c))
    return y

In [11]:
def cross_entropy_loss(y, l):
    # y is the output of network
    # l is the one-hot label
    loss = -np.sum(l * np.log(y + np.finfo(float).eps))
    return loss

In [3]:
def numerical_gradient(f, x):
    h = 1e-4 # 0.0001
    grad = np.zeros_like(x)
    
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        idx = it.multi_index
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + h
        fxh1 = f(x) # f(x+h)
        
        x[idx] = tmp_val - h 
        fxh2 = f(x) # f(x-h)
        grad[idx] = (fxh1 - fxh2) / (2*h)
        
        x[idx] = tmp_val # 还原值
        it.iternext()   
        
    return grad

In [15]:
class TwoLayerNet:
    
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)
        
    def predict(self, x):
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']
        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        y = softmax(a2)
        return y
    
    def loss(self, x, l):
        y = self.predict(x)
        return cross_entropy_loss(y, l)
    
    def accuracy(self, x, l):
        y = self.predict(x)
        p = np.argmax(y, axis=1)
        t = np.argmax(l, axis=1)
        accuracy = np.sum(p==t).astype(np.float) / x.shape[0]
        return accuracy
    
    def numerical_gradient(self, x, l):
        loss_W = lambda W: self.loss(x, l)
        
        grads = {}
        
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
        
        return grads

In [17]:
net = TwoLayerNet(input_size=784, hidden_size=100, output_size=10)
print('Shape of W1 is {}'.format(net.params['W1'].shape))
print('Shape of b1 is {}'.format(net.params['b1'].shape))
print('Shape of W2 is {}'.format(net.params['W2'].shape))
print('Shape of b2 is {}'.format(net.params['b2'].shape))

Shape of W1 is (784, 100)
Shape of b1 is (100,)
Shape of W2 is (100, 10)
Shape of b2 is (10,)
