In [249]:
import pickle
import gzip
import numpy as np

In [250]:
f = gzip.open("./data/mnist.pkl.gz", "rb")

In [251]:
train_tuple_data, valid_tuple_data, test_tuple_data = pickle.load(f, encoding="bytes")

In [252]:
train_data = train_tuple_data[0]
train_label = train_tuple_data[1]

In [253]:
one_hot_train_label = []
for label in train_label:
    one_hot_label = np.zeros([10])
    one_hot_label[label] = 1
    one_hot_train_label.append(one_hot_label)

In [254]:
valid_data = valid_tuple_data[0]
valid_label = valid_tuple_data[1]

In [255]:
test_data = test_tuple_data[0]
test_label = test_tuple_data[1]

In [271]:
#定义网络
net = [784, 40, 10]
num_layers = len(net)
weights = [np.random.randn(i, j) for i, j in zip(net[:-1], net[1:])]
bias = [np.random.randn(i) for i in net[1:]]

In [272]:
def sigmoid(x):
    return 1.0 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))

In [273]:
def cost_derivative(output_activations, y):
    return (output_activations - y)

In [274]:
def forward(batch_data):
    a = batch_data
    for w, b in zip(weights, bias):
        z = np.matmul(a, w) + b
        a = sigmoid(z)
    return a

In [275]:
def back_prop(batch_data, batch_label):
    zs = []
    activates = [batch_data]
    #前向传播
    for w, b in zip(weights, bias):
        z = np.matmul(activates[-1], w) + b
        zs.append(z)
        activates.append(sigmoid(z))
    
    #delta的值
    delta = cost_derivative(activates[-1], batch_label) * sigmoid_derivative(zs[-1])
    #填充位，用于放参数
    nalba_w = [np.zeros(w.shape) for w in weights]
    nalba_b = [np.zeros(b.shape) for b in bias]
    nalba_b[-1] = delta
    nalba_w[-1] =  np.matmul(activates[-2].transpose(), delta)
    for l in range(2, len_net):
        sp = sigmoid_derivative(zs[-l])
        delta = np.matmul(delta, weights[-l+1].transpose()) * sp
        nalba_b[-l] = delta
        nalba_w[-l] = np.matmul(activates[-l-1].transpose(), delta)
    return nalba_w, nalba_b

In [277]:
epoch_num = 30
for epoch in range(epoch_num):
    batch_size = 10
    batch_num = len(train_data)//batch_size
    for batch in range(batch_num):
        begin = batch * batch_size
        end = begin + batch_size
        batch_data = train_data[begin:end]
        batch_label = one_hot_train_label[begin:end]

        nalba_w, nalba_b = back_prop(batch_data, batch_label)
        #跟新w,b
        eta = 3.0
        weights = [w - (eta * nw / batch_size) for w, nw in zip(weights, nalba_w)]
        bias = [b - (eta*np.sum(nb, axis=0) / batch_size) for b, nb in zip(bias, nalba_b)]
    
    x = forward(test_data)
    y = np.argmax(x, axis=1)
    correct_num = np.sum(y == test_label)
    print("epoch:{0}\tcorret:{1}/{2}".format(epoch, correct_num, len(test_data)))

epoch:0	 corret:9085/10000
epoch:1	 corret:9254/10000
epoch:2	 corret:9338/10000
epoch:3	 corret:9387/10000
epoch:4	 corret:9430/10000
epoch:5	 corret:9428/10000
epoch:6	 corret:9466/10000
epoch:7	 corret:9481/10000
epoch:8	 corret:9476/10000
epoch:9	 corret:9477/10000
epoch:10	 corret:9484/10000
epoch:11	 corret:9507/10000
epoch:12	 corret:9503/10000
epoch:13	 corret:9522/10000
epoch:14	 corret:9485/10000
epoch:15	 corret:9511/10000
epoch:16	 corret:9499/10000
epoch:17	 corret:9515/10000
epoch:18	 corret:9515/10000
epoch:19	 corret:9530/10000
epoch:20	 corret:9519/10000
epoch:21	 corret:9531/10000
epoch:22	 corret:9520/10000
epoch:23	 corret:9532/10000
epoch:24	 corret:9527/10000
epoch:25	 corret:9539/10000
epoch:26	 corret:9532/10000
epoch:27	 corret:9521/10000
epoch:28	 corret:9531/10000
epoch:29	 corret:9531/10000
