In [1]:
import numpy as np

In [2]:
def sigmoid(z):
    """This function used to implement sigmoid function
    Input: x, an array_like object;
    Output: y, output."""
    y = 1 / (1 + np.exp(-z))
    return y

In [3]:
def softmax(x):
    c = np.max(x)
    y = np.exp(x - c) / np.sum(np.exp(x-c))
    return y

In [4]:
def cross_entropy_loss(y, l):
    # y is the output of network
    # l is the one-hot label
    loss = -np.sum(l * np.log(y + np.finfo(float).eps))
    return loss

In [5]:
def numerical_gradient(f, x):
    h = 1e-4 # 0.0001
    grad = np.zeros_like(x)
    
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        idx = it.multi_index
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + h
        fxh1 = f(x) # f(x+h)
        
        x[idx] = tmp_val - h 
        fxh2 = f(x) # f(x-h)
        grad[idx] = (fxh1 - fxh2) / (2*h)
        
        x[idx] = tmp_val # 还原值
        it.iternext()   
        
    return grad

In [6]:
class TwoLayerNet:
    
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)
        
    def predict(self, x):
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']
        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        y = softmax(a2)
        return y
    
    def loss(self, x, l):
        y = self.predict(x)
        return cross_entropy_loss(y, l)
    
    def accuracy(self, x, l):
        y = self.predict(x)
        p = np.argmax(y, axis=1)
        t = np.argmax(l, axis=1)
        accuracy = np.sum(p==t).astype(np.float) / x.shape[0]
        return accuracy
    
    def numerical_gradient(self, x, l):
        loss_W = lambda W: self.loss(x, l)
        
        grads = {}
        
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
        
        return grads

In [7]:
net = TwoLayerNet(input_size=784, hidden_size=100, output_size=10)
print('Shape of W1 is {}'.format(net.params['W1'].shape))
print('Shape of b1 is {}'.format(net.params['b1'].shape))
print('Shape of W2 is {}'.format(net.params['W2'].shape))
print('Shape of b2 is {}'.format(net.params['b2'].shape))

Shape of W1 is (784, 100)
Shape of b1 is (100,)
Shape of W2 is (100, 10)
Shape of b2 is (10,)


In [8]:
from dataset.mnist import load_mnist

In [9]:
[(x_train, l_train), (x_test, l_test)] = load_mnist(normalize=True, one_hot_label=True)

In [15]:
train_loss_list = []

In [16]:
items_num = 1
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1
network = TwoLayerNet(input_size=784, hidden_size=100, output_size=10)

In [19]:
for i in range(items_num):
    batch_mask = np.random.choice(train_size, batch_size)
    print(batch_mask)
    x_batch = x_train[batch_mask]
    l_batch = l_train[batch_mask]
    
    grad = network.numerical_gradient(x_batch, l_batch)
    
    for k in ('W1', 'b1', 'W2', 'b2'):
        network.params[k] -= learning_rate * grad[k]
        
    loss = network.loss(x_batch, l_batch)
    print('predict: {}'.format(network.predict(x_batch)))
    print('loss={}'.format(loss))
    train_loss_list.append(loss)

[18622 37008 40905 40379 54827 27808  5033 48320 52612 29744 53803  4832
 57255 44394  1249 38557 55426  3312  9111 32648 52957 14273  4237 35218
 27083 25230 36365 54442 41031 52032 21891 41911 28250  3480  3758 59478
 42561 40902  3586 13429  3032 10584 34640 10481 57419 40444 32262 17459
 12419 32166 11730  5604 55974  2260 39249 59555 18959 44360 59618 28504
 39806 15962 39015 41075 39371 27317 48518 27483    49   884 43548 53451
 44279 24484 16240  4582  1512 45103   199 42447 38957 42941 53165 49621
 47672 34347 50499 18934 10197 17576 35634 30156 11551  4834 12046  3288
  7125 26707 12090 47642]


KeyboardInterrupt: 

In [None]:
import matplotlib.pyplot as plt