In [10]:
def sigmoid(z):
    return 1.0/(1.0+np.exp(-z))


def sigmoid_prime(z):
    return sigmoid(z)*(1-sigmoid(z))

In [15]:
import numpy as np
import random


class Network(object):
    """
    sizes列表:定义每一层神经元的个数
    偏置和权重参数: 用N(0,1)初始化
    第一层是输入层, 一般不设置偏置参数
    """

    def __init__(self, sizes):
        self.num_layers = len(sizes)
        self.sizes = sizes
        self.biases = [np.random.randn(y, 1)
                       for y in sizes[1:]]  # [[30*1],[10*1]]
        self.weights = [np.random.randn(y, x)  # [[784*30],[10*30]]
                        for x, y in zip(sizes[:-1], sizes[1:])]

    def SGD(self, training_data, epochs, mini_batch_size, eta, test_data=None):
        """
        input: training_data – 元组(x, y)的列表
        epochs:迭代遍数
        mini_batch_size:小batch大小
        eta:学习率/步长
        test_data:可选
        output: 无(因为参数已封装为类的成员函数)
        """
        training_data = list(training_data)
        n = len(training_data)

        if test_data:
            test_data = list(test_data)
            n_test = len(test_data)

        for j in range(epochs):
            random.shuffle(training_data)
            mini_batches = [
                training_data[k:k+mini_batch_size]
                for k in range(0, n, mini_batch_size)]
            for mini_batch in mini_batches:
                self.update_mini_batch(mini_batch, eta)
            if test_data:
                print('Epoch {}:Accuracy on test data is {} / {}'.format(j+1,
                      self.evaluate(test_data),n_test))
            else:
                print("Epoch {} complete".format(j+1))
        print("The eta is:{}  mini-batch size is:{}".format(eta,mini_batch_size))

    def update_mini_batch(self, mini_batch, eta):
        """
        input:
        mini_batch:元组(x, y)的列表
        eta:学习率/步长
        output: 无(因为参数已封装为类的成员函数)
        """
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        for x, y in mini_batch:
            delta_nabla_b, delta_nabla_w = self.backprop(x, y)
            nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
            nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
        self.weights = [w - (eta/len(mini_batch)) * nw for w,
                        nw in zip(self.weights, nabla_w)]
        self.biases = [b - (eta/len(mini_batch)) * nb for b,
                       nb in zip(self.biases, nabla_b)]

    def backprop(self, x, y):
        """
        input: 
        x –特征向量
        y – 类别标签向量
        output: 样本(x, y)的梯度
        """
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        activation = x  # 784*1
        activations = [x]
        zs = []
        # [[(30*1),(30*784)],[(10*1),(10*30)]]
        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, activation) + b  # 30*1
            zs.append(z)
            activation = sigmoid(z)
            activations.append(activation)  # [[30*1],[10*1]]
#         delta = np.dot(activations[-1]-y, sigmoid_prime(zs[-1]))  # 10*1


#         print(np.shape(activations[-1]),np.shape(y))
        y=np.array(y).reshape(10,1)
        delta = (activations[-1]-y)*sigmoid_prime(zs[-1])
        # shapes (10,10) and (1,30) not aligned: 10 (dim 1) != 1 (dim 0)
        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta, activations[-2].transpose())  # 10*1 1*30
        for l in range(2, self.num_layers):
            z = zs[-l]
            sp = sigmoid_prime(z)
            delta = np.dot(self.weights[-l + 1].transpose(), delta) * sp
            nabla_b[-l] = delta
            nabla_w[-l] = np.dot(delta, activations[-l - 1].transpose())
        return (nabla_b, nabla_w)

    def cost_derivative(self, output_activations, y):
        """
        输入: 
        output_activations:NN最后一层的输出
        y:类别标签
        输出: (𝒂 − 𝒚𝑖)
        """
        return (output_activations-y)

    def feedforward(self, a):
        for b, w in zip(self.biases, self.weights):
            a = sigmoid(np.dot(w, a)+b)
        return a

    def evaluate(self, test_data):  # 预测出每个测试样本的类别,然后根据标签统计出正确个数。
        test_results = [(np.argmax(self.feedforward(x)), y)
                        for (x, y) in test_data]  # y为标量
        return sum(int(x == y) for (x, y) in test_results)

In [14]:
import mnist_loader
training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
training_data = list(training_data)
net = Network([784, 100, 10])
net.SGD(training_data, 100, 10, 3.0, test_data=test_data)

Epoch 1:Accuracy on test data is 7222 / 10000
Epoch 2:Accuracy on test data is 9269 / 10000
Epoch 3:Accuracy on test data is 9412 / 10000
Epoch 4:Accuracy on test data is 9448 / 10000
Epoch 5:Accuracy on test data is 9503 / 10000
Epoch 6:Accuracy on test data is 9518 / 10000
Epoch 7:Accuracy on test data is 9544 / 10000
Epoch 8:Accuracy on test data is 9564 / 10000
Epoch 9:Accuracy on test data is 9577 / 10000
Epoch 10:Accuracy on test data is 9590 / 10000
Epoch 11:Accuracy on test data is 9594 / 10000
Epoch 12:Accuracy on test data is 9590 / 10000
Epoch 13:Accuracy on test data is 9622 / 10000
Epoch 14:Accuracy on test data is 9627 / 10000
Epoch 15:Accuracy on test data is 9623 / 10000
Epoch 16:Accuracy on test data is 9616 / 10000
Epoch 17:Accuracy on test data is 9628 / 10000
Epoch 18:Accuracy on test data is 9648 / 10000
Epoch 19:Accuracy on test data is 9625 / 10000
Epoch 20:Accuracy on test data is 9635 / 10000
Epoch 21:Accuracy on test data is 9662 / 10000
Epoch 22:Accuracy on t

In [None]:
import mnist_loader
training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
training_data = list(training_data)
net = Network([784, 30, 10])
net.SGD(training_data, 30, 10, 3.0, test_data=test_data)

In [12]:
import mnist_loader
training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
training_data = list(training_data)
net = Network([784, 30, 10])
net.SGD(training_data, 30, 10, 0.1, test_data=test_data)

Epoch 1:Accuracy on test data is 5359 / 10000
Epoch 2:Accuracy on test data is 6158 / 10000
Epoch 3:Accuracy on test data is 6519 / 10000
Epoch 4:Accuracy on test data is 6723 / 10000
Epoch 5:Accuracy on test data is 6880 / 10000
Epoch 6:Accuracy on test data is 6982 / 10000
Epoch 7:Accuracy on test data is 7049 / 10000
Epoch 8:Accuracy on test data is 7197 / 10000
Epoch 9:Accuracy on test data is 7615 / 10000
Epoch 10:Accuracy on test data is 7854 / 10000
Epoch 11:Accuracy on test data is 7939 / 10000
Epoch 12:Accuracy on test data is 7987 / 10000
Epoch 13:Accuracy on test data is 8022 / 10000
Epoch 14:Accuracy on test data is 8042 / 10000
Epoch 15:Accuracy on test data is 8060 / 10000
Epoch 16:Accuracy on test data is 8077 / 10000
Epoch 17:Accuracy on test data is 8134 / 10000
Epoch 18:Accuracy on test data is 8710 / 10000
Epoch 19:Accuracy on test data is 8869 / 10000
Epoch 20:Accuracy on test data is 8904 / 10000
Epoch 21:Accuracy on test data is 8938 / 10000
Epoch 22:Accuracy on t

In [13]:
import mnist_loader
training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
training_data = list(training_data)
net = Network([784, 30, 10])
net.SGD(training_data, 30, 10, 0.001, test_data=test_data)

Epoch 1:Accuracy on test data is 1164 / 10000
Epoch 2:Accuracy on test data is 1339 / 10000
Epoch 3:Accuracy on test data is 1494 / 10000
Epoch 4:Accuracy on test data is 1650 / 10000
Epoch 5:Accuracy on test data is 1779 / 10000
Epoch 6:Accuracy on test data is 1881 / 10000
Epoch 7:Accuracy on test data is 1951 / 10000
Epoch 8:Accuracy on test data is 2055 / 10000
Epoch 9:Accuracy on test data is 2115 / 10000
Epoch 10:Accuracy on test data is 2195 / 10000
Epoch 11:Accuracy on test data is 2265 / 10000
Epoch 12:Accuracy on test data is 2308 / 10000
Epoch 13:Accuracy on test data is 2356 / 10000
Epoch 14:Accuracy on test data is 2391 / 10000
Epoch 15:Accuracy on test data is 2437 / 10000
Epoch 16:Accuracy on test data is 2492 / 10000
Epoch 17:Accuracy on test data is 2528 / 10000
Epoch 18:Accuracy on test data is 2563 / 10000
Epoch 19:Accuracy on test data is 2613 / 10000
Epoch 20:Accuracy on test data is 2658 / 10000
Epoch 21:Accuracy on test data is 2711 / 10000
Epoch 22:Accuracy on t

In [17]:
import mnist_loader
training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
training_data = list(training_data)
net = Network([784, 30, 10])
net.SGD(training_data, 30, 10, 100, test_data=test_data)

Epoch 1:Accuracy on test data is 1201 / 10000
Epoch 2:Accuracy on test data is 1208 / 10000
Epoch 3:Accuracy on test data is 1214 / 10000
Epoch 4:Accuracy on test data is 1215 / 10000
Epoch 5:Accuracy on test data is 1215 / 10000
Epoch 6:Accuracy on test data is 1215 / 10000
Epoch 7:Accuracy on test data is 1216 / 10000
Epoch 8:Accuracy on test data is 1216 / 10000
Epoch 9:Accuracy on test data is 1215 / 10000
Epoch 10:Accuracy on test data is 1212 / 10000
Epoch 11:Accuracy on test data is 1211 / 10000
Epoch 12:Accuracy on test data is 1206 / 10000
Epoch 13:Accuracy on test data is 1205 / 10000
Epoch 14:Accuracy on test data is 1203 / 10000
Epoch 15:Accuracy on test data is 1198 / 10000
Epoch 16:Accuracy on test data is 1198 / 10000
Epoch 17:Accuracy on test data is 1190 / 10000
Epoch 18:Accuracy on test data is 1186 / 10000
Epoch 19:Accuracy on test data is 1179 / 10000
Epoch 20:Accuracy on test data is 1170 / 10000
Epoch 21:Accuracy on test data is 1158 / 10000
Epoch 22:Accuracy on t