# 实现网络

In [5]:
import numpy as np

## 初始化 Network

In [26]:
## 初始化 Network 对象

class Network(object):
    
    def __init__(self, sizes):
        # 列表 sizes 包含各层神经元的数量，如 net = Network([2, 3, 1]) 表示第一层有 2 个神经元，第二层有 3 个神经元，最后一层有 1 个神经元的 Network 对象
        # np.random.randn 生成均值为 0 标准差为 1 的高斯分布
        self.num_layers = len(sizes)
        self.sizes = sizes
        # 设置每一层的偏置（不包括输入层）
        self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
        # 设置每一层的权重值
        self.weights = [np.random.randn(y, x) for x,y in zip(sizes[:-1], sizes[1:])]

In [7]:
sizes = ([2, 3, 1])

In [11]:
zip(sizes[:-1],sizes[1:])

[(2, 3), (3, 1)]

In [6]:
[np.random.randn(y, 1) for y in sizes[1:]]

[array([[-1.84567346],
        [-0.04157113],
        [ 0.51547234]]), array([[ 2.39312491]])]

In [8]:
[np.random.randn(y, x) for x,y in zip(sizes[:-1], sizes[1:])]

[array([[-2.5891381 , -0.68558773],
        [ 0.68784812, -0.86417404],
        [ 0.27833512, -0.10772027]]),
 array([[ 0.68225848, -1.16444573,  0.2073934 ]])]

In [12]:
np.random.randn(3,1)

array([[ 0.81884185],
       [ 0.09944421],
       [-0.76981553]])

## sigmoid

In [13]:
# 定义 sigmoid 函数

def sigmoid(z):
    return 1.0/(1.0 + np.exp(-z))

## feedforward

In [None]:
# 对 Network 类添加一个 feedforward 方法，对于网络给定一个输入 a，返回对应的输出
# 对每一层应用方程 a' = tau(wa + b)

def feedforward(self, a):
    for b, w in zip(self.biases, self.weights):
        a = sigmoid(np.dot(w, a) + b)
    return a

## SGD

In [19]:
# 随机梯度下降 SGD

def SGD(self, training_data, epochs, mini_batch_size, eta, test_data=None):
    # training_data is a list of tuples "(x, y)"
    # epochs 迭代期数量
    # mini_batch_size 采样时的小批量数据大小
    # eta 学习速率 yita
    # 如果给出 test_data，程序会在每个训练器后评估网络，并打印出部分进展
    if test_data:
        n_test = len(test_data)
    n = len(training_data)
    for j in xrange(epochs):
        random.shuffle(training_data)
        mini_batches = [training_data[k:k+mini_batch_size] for k in xrange(0, n, mini_batch_size)]
        for mini_batch in mini_batches:
            self.update_mini_batch(mini_batch, eta)
        if test_data:
            print "Epoch {0} : {1} / {2}".format(j, self.evaluate(test_data), n_test)
        else:
            print "Epoch {0} complete".format(j)

In [17]:
for j in xrange(5):
    print j

0
1
2
3
4


In [18]:
range(0, 100,20)

[0, 20, 40, 60, 80]

## update wb

In [25]:
# 对于每一个 mini_batch 应用一次梯度下降，根据单次梯度下降迭代更新网络的权重和偏置
# self.update_mini_batch(mini_batch, eta)

def update_mini_batch(mini_batch, eta):
    # 将 b 置为 0
    nabla_b = [np.zeros(b.shape) for b in self.biases]
    # 将 w 置为 0
    nabla_w = [np.zeros(b.shape) for b in self.weights]
    for x, y in mini_batch:
        # 反向传播算法
        # 一种快速计算代价函数的梯度的方法
        delta_nabla_b, delta_nabla_w = self.backprob(x, y)
        nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
        nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
        self.weights = [w-(eta/len(mini_batch))*nw for w, nw in zip(self.weights, nabla_w)]
        self.biases = [b-(eta/len(mini_batch))*nb for b, nb in zip(self.biases, nabla_b)]

In [23]:
biases = [np.random.randn(y, 1) for y in sizes[1:]]
biases

[array([[-1.02855548],
        [-0.43575453],
        [ 2.13699738]]), array([[ 0.48355206]])]

In [24]:
[np.zeros(b.shape) for b in biases]

[array([[ 0.],
        [ 0.],
        [ 0.]]), array([[ 0.]])]

## 完整代码

In [27]:
import numpy as np

## 初始化 Network 对象

class Network(object):
    
    def __init__(self, sizes):
        # 列表 sizes 包含各层神经元的数量，如 net = Network([2, 3, 1]) 表示第一层有 2 个神经元，
        # 第二层有 3 个神经元，最后一层有 1 个神经元的 Network 对象
        # np.random.randn 生成均值为 0 标准差为 1 的高斯分布
        self.num_layers = len(sizes)
        self.sizes = sizes
        # 设置每一层的偏置（不包括输入层）
        self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
        # 设置每一层的权重值
        self.weights = [np.random.randn(y, x) for x,y in zip(sizes[:-1], sizes[1:])]
    
    # 对 Network 类添加一个 feedforward 方法，对于网络给定一个输入 a，返回对应的输出
    # 对每一层应用方程 a' = tau(wa + b)

    def feedforward(self, a):
        for b, w in zip(self.biases, self.weights):
            a = sigmoid(np.dot(w, a) + b)
        return a
    
    # 随机梯度下降 SGD

    def SGD(self, training_data, epochs, mini_batch_size, eta, test_data=None):
        # training_data is a list of tuples "(x, y)"
        # epochs 迭代期数量
        # mini_batch_size 采样时的小批量数据大小
        # eta 学习速率 yita
        # 如果给出 test_data，程序会在每个训练器后评估网络，并打印出部分进展
        if test_data:
            n_test = len(test_data)
        n = len(training_data)
        for j in xrange(epochs):
            random.shuffle(training_data)
            mini_batches = [training_data[k:k+mini_batch_size] for k in xrange(0, n, mini_batch_size)]
            for mini_batch in mini_batches:
                self.update_mini_batch(mini_batch, eta)
            if test_data:
                print "Epoch {0} : {1} / {2}".format(j, self.evaluate(test_data), n_test)
            else:
                print "Epoch {0} complete".format(j)
    
    # 对于每一个 mini_batch 应用一次梯度下降，根据单次梯度下降迭代更新网络的权重和偏置
    # self.update_mini_batch(mini_batch, eta)

    def update_mini_batch(mini_batch, eta):
        # 将 b 置为 0
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        # 将 w 置为 0
        nabla_w = [np.zeros(b.shape) for b in self.weights]
        for x, y in mini_batch:
            # 反向传播算法
            # 一种快速计算代价函数的梯度的方法
            delta_nabla_b, delta_nabla_w = self.backprob(x, y)
            nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
            nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
            self.weights = [w-(eta/len(mini_batch))*nw for w, nw in zip(self.weights, nabla_w)]
            self.biases = [b-(eta/len(mini_batch))*nb for b, nb in zip(self.biases, nabla_b)]
    
    # 反向传播算法
    def backprob(self, x, y):
        # return a tuple "(nabla_b, nabla_w)"
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        # feedforward
        activation = x
        activations = [x] # list to store all the activations, layer by layer
        zs = [] # list to store all the z vectors, layer by layer
        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, activation) + b
            zs.append(z)
            activation = sigmoid(z)
            activations.append(activation)
        # backward pass
        delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1])
        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta, activations[-2].transpose())
        # Note that the variable l in the loop below is used a little
        # differently to the notation in chp2 of the book. Here, 
        # second-last layer, and so on. It's a renumbering of the 
        # scheme in the book, used here to take advantage of the fact
        # that Python can use negative indices in lists.
        for l in xrange(2, self.num_layers):
            z = zs[-l]
            sp = sigmoid_prime(z)
            delta = np.dot(self.weights[-l+1].transpose(), delta) * sp
            nabla_b[-l] = delta
            nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
        return (nabla_b, nabla_w)
    
    def evaluate(self, test_data):
        # Return the number of test inputs for which the neural
        # network outputs the correct result. Note that the neural
        # network's output is assumed to be the index of whichever
        # neuron in the final layer has the highest activation.
        test_results = [(np.argmax(self.feedforward(x)), y) for (x, y) in test_data]
        return sum(int(x == y) for (x, y) in test_results)
    
    def cost_derivative(self, output_activations, y):
        # Return the vector of partial derivatives \partial C_x/
        # \partial a for the output activations.
        return (output_activations-y)

# Miscellaneous functions
def sigmoid(z):
    return 1.0/(1.0+np.exp(-z))

def sigmoid_prime(z):
    return sigmoid(z)*(1-sigmoid(z))