In [1]:
def sigmoid(z):
    return 1.0/(1.0+np.exp(-z))

# sigmoid求导
def sigmoid_prime(z):
    return sigmoid(z)*(1-sigmoid(z))

In [8]:
import numpy as np
import random


class Network(object):
    """
    sizes列表:定义每一层神经元的个数
    偏置和权重参数: 用N(0,1)初始化
    第一层是输入层, 一般不设置偏置参数
    """

    def __init__(self, sizes):
        self.num_layers = len(sizes)
        self.sizes = sizes
        self.biases = [np.random.randn(y, 1) for y in sizes[1:]]#[[30*1],[10*1]]
        self.weights = [np.random.randn(y, x) #[[784*30],[10*30]]
                        for x, y in zip(sizes[:-1], sizes[1:])]


    def SGD(self, training_data, epochs, mini_batch_size, eta, test_data=None):
        """
        input: training_data – 元组(x, y)的列表
        epochs:迭代遍数
        mini_batch_size:小batch大小
        eta:学习率/步长
        test_data:可选
        output: 无(因为参数已封装为类的成员函数)
        """
        training_data = list(training_data)
        n = len(training_data)

        if test_data:
            test_data = list(test_data)
            n_test = len(test_data)

        for j in range(epochs):
            random.shuffle(training_data)
            mini_batches = [
                training_data[k:k+mini_batch_size]
                for k in range(0, n, mini_batch_size)]
            for mini_batch in mini_batches:
                self.update_mini_batch(mini_batch, eta)
            if test_data:
                print('Epoch {}:{}/{}'.format(j+1, self.evaluate(test_data), n_test))
            else:
                print("Epoch {} complete".format(j+1))

    def update_mini_batch(self, mini_batch, eta):
        """
        input:
        mini_batch:元组(x, y)的列表
        eta:学习率/步长
        output: 无(因为参数已封装为类的成员函数)
        """
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        for x, y in mini_batch:
            delta_nabla_b, delta_nabla_w = self.backprop(x, y)
            nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
            nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
        self.weights = [w - (eta/len(mini_batch)) * nw for w,
                        nw in zip(self.weights, nabla_w)]
        self.biases = [b - (eta/len(mini_batch)) * nb for b,
                       nb in zip(self.biases, nabla_b)]

    def backprop(self, x, y):
        """
        input: 
        x –特征向量
        y – 类别标签向量
        output: 样本(x, y)的梯度
        """
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        activation = x  #784*1
        activations = [x]
        zs = []
        for b, w in zip(self.biases, self.weights):#[[(30*1),(30*784)],[(10*1),(10*30)]]
            z = np.dot(w, activation) + b #30*1
            zs.append(z)
            activation = sigmoid(z)
            activations.append(activation)  #[[30*1],[10*1]]
        delta=np.dot(activations[-1]-y,sigmoid_prime(zs[-1])) #10*1
        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta,activations[-2].transpose())  #10*1 1*30
#shapes (10,10) and (1,30) not aligned: 10 (dim 1) != 1 (dim 0)
        for l in range(2, self.num_layers):
            z = zs[-l]
            sp = sigmoid_prime(z)
            delta = np.dot(self.weights[-l + 1].transpose(), delta) * sp
            nabla_b[-l] = delta
            nabla_w[-l] = np.dot(delta,activations[-l - 1].transpose())
        return (nabla_b, nabla_w)

    def cost_derivative(self, output_activations, y):
        """
        输入: 
        output_activations:NN最后一层的输出
        y:类别标签
        输出: (𝒂 − 𝒚𝑖)
        """
        return (output_activations-y)

    def feedforward(self, a):
        for b, w in zip(self.biases, self.weights):
            a = sigmoid(np.dot(w, a)+b)
        return a
    
    def evaluate(self, test_data):  # 预测出每个测试样本的类别,然后根据标签统计出正确个数。
        test_results = [(np.argmax(self.feedforward(x)), y)
                        for (x, y) in test_data]   #y为标量
        return sum(int(x == y) for (x, y) in test_results)

In [9]:
import mnist_loader
training_data,validation_data,test_data=mnist_loader.load_data_wrapper()
training_data=list(training_data)
net=Network([784,30,10])
net.SGD(training_data,30,10,3.0,test_data=test_data)

Epoch 0:905/10000
Epoch 1:842/10000
Epoch 2:1023/10000
Epoch 3:1056/10000
Epoch 4:902/10000
Epoch 5:1306/10000
Epoch 6:1018/10000
Epoch 7:611/10000
Epoch 8:1086/10000
Epoch 9:908/10000
Epoch 10:946/10000
Epoch 11:1072/10000
Epoch 12:1068/10000
Epoch 13:1013/10000
Epoch 14:1019/10000
Epoch 15:1113/10000
Epoch 16:919/10000
Epoch 17:976/10000
Epoch 18:1103/10000
Epoch 19:1087/10000
Epoch 20:1025/10000
Epoch 21:1013/10000
Epoch 22:1103/10000
Epoch 23:983/10000
Epoch 24:992/10000
Epoch 25:1057/10000
Epoch 26:1108/10000
Epoch 27:829/10000
Epoch 28:1218/10000
Epoch 29:1322/10000


In [None]:
sizes=[784,30,10]
biases = [np.random.randn(y, 1) for y in sizes[1:]]#[[30*1],[10*1]
weights = [np.random.randn(y, x) #[[784*30],[30*10]]
                        for x, y in zip(sizes[:-1], sizes[1:])]

In [None]:
xx=np.array()