In [1]:
import numpy as np

In [10]:
class Network(object):
    def __init__(self, sizes):
        self.num_layers = len(sizes)
        self.sizes = sizes
        # 除了输入神经元，剩下的神经元都有权重
        self.biases = [np.random.randn(y,1) for y in sizes[1:]]
        # 每两层之间有x*y个权重
        self.weights = [np.random.randn(y,x) for x,y in zip(
            sizes[:-1], sizes[1:])]
    
    # 激活函数
    def sigmoid(z):
        return 1.0/(1.0 + np.exp(-z))
    
    # 激活函数的倒数
    def sigmoid_prime(z):
        return sigmoid(z)*(1-sigmoid(z))
    
    # 前馈
    def feedforward(self, a):
        for b,w in zip(self,biases, self.weights):
            a = sigmoid(np.dot(w,a) + b)
            
        return a
    
    # 随机梯度下降
    # training_data 一个形如[(x1,y1),(x2,y2)...]的元祖列表
    # eta 学习速率
    def SGD(self, training_data, epochs, mini_batch_size, 
            eta, test_data=None):
        if test_data:
            n_test = len(test_data)
        
        n = len(training_data)
        for j in range(epochs):
            np.random.shuffle(training_data)
            mini_batches = [
                training_data[k:k+mini_batch_size]
                for k in range(0, n, mini_batch_size)]
            
            for mini_batch in mini_batches:
                self.update_mini_batch(mini_batch, eta)
            
            if test_data:
                print('Epoch {0}: {1} / {2}'.format(
                    j, self.evaluate(test_data), n_test))
            else:
                print('Epoch {0} complete'.format(j))          
    
    def update_mini_batch(self, mini_batch, eta):
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        for x,y in mini_batch:
            # b 和 w 的梯度
            delta_nabla_b, delta_nabla_w = self.backprop(x,y)
            nabla_b = [nb + dnb for nb,dnb in zip(
                nabla_b, delta_nabla_b)]
            nabla_w = [nw + dnw for nw,dnw in zip(
                nabla_w, delta_nabla_w)]
            self.weights = [w-(eta/len(mini_batch)) * nw
                for w, nw in zip(self.weights, nabla_w)]
            self.biases = [b-(eta/len(mini_batch)) * nb
                for b, nb in zip(self.biases, nabla_b)]
            
    def backprop(self, x, y):
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        # 前馈
        activation = x
        activations = [x] # 存下所有激活值，按层存储
        zs = [] # 存下所有z向量，按层存储
        for b,w in zip(self.biases, self.weights):
            z = np.dot(w, activation) + b
            zs.append(z)
            activation = sigmoid(z)
            activations.append(activation)
        
        # 反向传播
        delta = self.cost_derivate(activations[-1],y)*sigmoid_prime(zs[-1])
        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta, activations[-2].transpose())
        # 这里l=2表示倒数第二层
        for l in range(2, self.num_layers):
            z = zs[-l]
            sp = sigmoid_prime(z)
            delta = np.dot(self.weights[-l+1].transpose(),delta)*sp
            nabla_b[-l] = delta
            nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
        
        return (nabla_b, nabla_w)
        
    def cost_derivate(self, output_activations, y):
        return output_activations-y

In [12]:
import mnist_loader

In [15]:
training_data, validation_data, test_data = mnist_loader.load_data_wrapper()

FileNotFoundError: [Errno 2] No such file or directory: '../data/mnist.pkl.gz'