# 导入库

In [1]:
import numpy as np
from scipy import ndimage
import matplotlib.pyplot as plt
from load_data import *
%matplotlib inline

# 载入数据

In [2]:
training_data, validation_data, test_data = load_data_wrapper()

# 训练参数

In [3]:
n_epoch = 30
learning_rate = 0.1
batch_size = 10

# for adgrad
ESP = 0.000001

lr_b2 = ESP
lr_w2 = ESP
lr_b3 = ESP
lr_w3 = ESP



# 网络结构

In [4]:
n_node_input = 784
n_node_hidden = 100
n_node_output = 10

# 权重与偏置

In [5]:
W2=np.random.randn(n_node_hidden, n_node_input)
b2=np.random.randn(n_node_hidden, 1)

W3=np.random.randn(n_node_output, n_node_hidden)
b3=np.random.randn(n_node_output, 1)

# 激活函数

In [6]:
def sigmoid(z):
    """The sigmoid function."""
    return 1.0/(1.0+np.exp(-z))

def sigmoid_prime(z):
    """Derivative of the sigmoid function."""
    return sigmoid(z)*(1-sigmoid(z))

# 开始训练

In [None]:
### Training
test_errors = []
training_errors = []
n = len(training_data)


for j in range(n_epoch):

    ## Stochastic Gradient Descent
    np.random.shuffle(training_data)

    # for each batch
    sum_of_training_error = 0
    for k in range(0, n, batch_size):
        batch = training_data[k:k+batch_size]

        # average gradient for samples in a batch
        sum_gradient_b3 = 0
        sum_gradient_b2 = 0
        sum_gradient_W3 = 0
        sum_gradient_W2 = 0

        # for each sample
        for x, y in batch:
            ## Feed forward

            a1 = x
            z2 = np.dot(W2, a1) + b2
            a2 = sigmoid(z2)
            z3 = np.dot(W3, a2) + b3
            a3 = sigmoid(z3)

            ## Backpropagation

            # Step 1: Error at the output layer [Quadratic Cost]
            delta_3 = (a3-y)*sigmoid_prime(z3)
            # Step 2: Error relationship between two adjacent layers
            delta_2 =  sigmoid_prime(z2)*np.dot(W3.transpose(), delta_3)
            # Step 3: Gradient of C in terms of bias
            gradient_b3 = delta_3
            gradient_b2 = delta_2
            # Step 4: Gradient of C in terms of weight
            gradient_W3 = np.dot(delta_3, a2.transpose())
            gradient_W2 = np.dot(delta_2, a1.transpose())

            # update gradients
            sum_gradient_b3 += gradient_b3
            sum_gradient_b2 += gradient_b2
            sum_gradient_W3 += gradient_W3
            sum_gradient_W2 += gradient_W2

            ## Training Error
            sum_of_training_error += int(np.argmax(a3) != np.argmax(y))

        # update weights & biases via adgrad
        # adgrad
        lr_b3 = lr_b3 + (sum_gradient_b3 / batch_size) **2
        lr_w3 = lr_w3 + (sum_gradient_W3 / batch_size) **2
        lr_b2 = lr_b2 + (sum_gradient_b2 / batch_size) **2
        lr_w2 = lr_w2 + (sum_gradient_W2 / batch_size) **2
        
        b3 -= learning_rate / np.sqrt(lr_b3) * sum_gradient_b3 / batch_size
        b2 -= learning_rate / np.sqrt(lr_b2) * sum_gradient_b2 / batch_size
        W3 -= learning_rate / np.sqrt(lr_w3) * sum_gradient_W3 / batch_size
        W2 -= learning_rate / np.sqrt(lr_w2) * sum_gradient_W2 / batch_size

    # Report Training Error
    print("[TRAIN_ERROR] Epoch %02d: %5d / %05d" % (j, sum_of_training_error, n))
    training_errors.append(np.float(sum_of_training_error) / n)

    ### Test
    n_test = len(test_data)
    sum_of_test_error = 0
    for x, y in test_data:
        ## Feed forward

        a1 = x
        z2 = np.dot(W2, a1) + b2
        a2 = sigmoid(z2)
        z3 = np.dot(W3, a2) + b3
        a3 = sigmoid(z3)

        ## Test Error
        # in test data, label info is a number not one-hot vector as in training data
        sum_of_test_error += int(np.argmax(a3) != y)

    # Report Test Error
    print("[ TEST_ERROR] Epoch %02d: %5d / %05d" % (j, sum_of_test_error, n_test))

    test_errors.append(np.float(sum_of_test_error)/n_test)
print("done!")

[TRAIN_ERROR] Epoch 00: 14841 / 50000
[ TEST_ERROR] Epoch 00:  2465 / 10000
[TRAIN_ERROR] Epoch 01: 11787 / 50000
[ TEST_ERROR] Epoch 01:  2385 / 10000
[TRAIN_ERROR] Epoch 02:  9434 / 50000
[ TEST_ERROR] Epoch 02:  1461 / 10000
[TRAIN_ERROR] Epoch 03:  6777 / 50000
[ TEST_ERROR] Epoch 03:  1415 / 10000
[TRAIN_ERROR] Epoch 04:  6563 / 50000
[ TEST_ERROR] Epoch 04:  1405 / 10000
[TRAIN_ERROR] Epoch 05:  6409 / 50000
[ TEST_ERROR] Epoch 05:  1399 / 10000
[TRAIN_ERROR] Epoch 06:  6297 / 50000
[ TEST_ERROR] Epoch 06:  1386 / 10000
[TRAIN_ERROR] Epoch 07:  6221 / 50000
[ TEST_ERROR] Epoch 07:  1373 / 10000
[TRAIN_ERROR] Epoch 08:  6167 / 50000
[ TEST_ERROR] Epoch 08:  1367 / 10000
[TRAIN_ERROR] Epoch 09:  6114 / 50000
[ TEST_ERROR] Epoch 09:  1362 / 10000
[TRAIN_ERROR] Epoch 10:  6064 / 50000
[ TEST_ERROR] Epoch 10:  1363 / 10000
[TRAIN_ERROR] Epoch 11:  6015 / 50000
[ TEST_ERROR] Epoch 11:  1354 / 10000
[TRAIN_ERROR] Epoch 12:  5984 / 50000
[ TEST_ERROR] Epoch 12:  1357 / 10000
[TRAIN_ERROR