In [1]:
import random
import json
import sys
import numpy as np

# 定义神经网络结构


class QuadraticCost(object):
    @staticmethod
    def fn(a, y):
        return 0.5 * np.linalg.norm(a-y) ** 2

    @staticmethod
    def delta(z, a, y):
        return np.dot((a-y),sigmoid_prime(z))
        


class CrossEntropyCost(object):
    '''
    a = np.array([[np.nan, np.inf],
                  [-np.nan, -np.inf]])
    np.nan_to_sum(a)

    array([[0.00000000e+000, 1.79769313e+308],
           [0.00000000e+000, -1.79769313e+308]])
    '''
    @staticmethod
    def fn(a, y):
        y=np.array(y)
        return np.sum(np.nan_to_num(-y * np.log(a) - (1-y) * np.log(1-a)))

    @staticmethod
    def delta(z, a, y):
        y=np.array(y).reshape(10,1)
        return (a-y)


class Network(object):
    def __init__(self, sizes, cost=CrossEntropyCost):
        # 网络层数
        self.num_layers = len(sizes)
        # 每层神经元的个数
        self.sizes = sizes
        # 初始化每层的偏置和权重
        self.default_weight_initializer()
        # 损失函数
        self.cost = cost

    def default_weight_initializer(self):
        # 初始化每层的偏置
        self.biases = [np.random.randn(y, 1) for y in self.sizes[1:]]
        # 初始化每层的权重
        self.weights = [np.random.randn(y, x)/np.sqrt(x)
                        for x, y in zip(self.sizes[:-1], self.sizes[1:])]

    def large_weight_initializer(self):
        # 初始化每层的偏置
        self.biases = [np.random.randn(y, 1) for y in self.sizes[1:]]
        # 初始化每层的权重
        self.weights = [np.random.randn(y, x) for x, y in zip(
            self.sizes[:-1], self.sizes[1:])]

    def feedforward(self, a):
        for b, w in zip(self.biases, self.weights):
            a = sigmoid(np.dot(w, a) + b)
        return a

    # 梯度下降
    def SGD(self, training_data, epochs, mini_batch_size, eta, lmbda=0.0, test_data=None):
        if test_data:
            n_test = len(list(test_data))
        # 训练数据总个数
        n = len(training_data)

        # 开始训练，循环每一个epochs
        for j in range(epochs):  
            # 洗牌 打乱训练数据
            random.shuffle(training_data)

            # mini_batch
            mini_batches = [training_data[k:k+mini_batch_size]
                            for k in range(0, n, mini_batch_size)]

            # 训练mini_batch
            for mini_batch in mini_batches:
                self.update_mini_batch(mini_batch, eta, lmbda, n)

            print("Epoch {0} complete".format(j))

            cost = self.total_cost(training_data, lmbda)
#             print("Cost on training data: {}".format(cost))
            accuracy = self.accuracy(training_data, convert=True)
            print("Accuracy on training data: {} / {}".format(accuracy, n))

            if test_data:
                cost = self.total_cost(test_data, lmbda, convert=True)
#                 print("Cost on test data: {}".format(cost))
                accuracy = self.accuracy(test_data)
                print("Accuracy on test data: {} / {}".format(accuracy, len(test_data)))

    def update_mini_batch(self, mini_batch, eta, lmbda, n):
        # 保存每层偏导
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]

        # 训练一个mini_batch
        for x, y in mini_batch:
            delta_nabla_b, delta_nabla_w = self.update(x, y)

            # 保存一次训练网络中每层的偏导
            nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
            nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]

        # 更新权重和偏置 Wn+1 = Wn - eta * nw
        self.weights = [(1-eta*(lmbda/n))*w - (eta / len(mini_batch))
                        * nw for w, nw in zip(self.weights, nabla_w)]
        self.biases = [b - (eta / len(mini_batch)) * nb for b,
                       nb in zip(self.biases, nabla_b)]

    # 前向传播
    def update(self, x, y):
        # 保存每层偏导
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        activation = x  # 保存的输入（训练数据）

        # 保存每一层的激励值a=sigmoid(z)
        activations = [x]

        # 保存每一层的z=wx+b
        zs = []
        # 前向传播
        for b, w in zip(self.biases, self.weights):
            # 计算每层的z
            z = np.dot(w, activation) + b

            # 保存每层的z
            zs.append(z)

            # 计算每层的a
            activation = sigmoid(z)

            # 保存每一层的a
            activations.append(activation)
        # 反向更新
        # 计算最后一层的误差
        delta = (self.cost).delta(zs[-1], activations[-1], y)

        # 最后一层权重和偏置的导数
        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta, activations[-2].transpose())

        # 倒数第二层一直到第一层 权重和偏置的导数
        for l in range(2, self.num_layers):
            z = zs[-l]
            sp = sigmoid_prime(z)

            # 当前层的误差
            delta = np.dot(self.weights[-l+1].T, delta) * sp

            # 当前层的偏置和权重的导数
            nabla_b[-l] = delta
            nabla_w[-l] = np.dot(delta, activations[-l-1].T)

        return (nabla_b, nabla_w)

    def accuracy(self, data, convert=False):
        if convert:
            # 如果是训练集数据，得到的结果是非0即1的one-hot编码方式
            results = [(np.argmax(self.feedforward(x)), np.argmax(y))
                       for (x, y) in data]
        else:
            # 如果是测试集数据，得到的结果就是0-9之间的数字分类
            results = [(np.argmax(self.feedforward(x)), y) for (x, y) in data]
        return sum(int(x == y) for (x, y) in results)

    def total_cost(self, data, lmbda, convert=False):
        cost = 0.0
        for x, y in data:
            a = self.feedforward(x)
            if convert:
                # convert为真，表示是测试数据集，将y转换为一个one-hot编码的十维向量
                y = mnist_loader.vectorized_result(y)
            cost += self.cost.fn(a, y) / len(data)
        cost += 0.5*(lmbda/len(data)) * \
            sum(np.linalg.norm(w)**2 for w in self.weights)
        return cost

    def cost_derivative(self, output_activation, y):
        return (output_activation - y)

    # 保存模型
    def save(self, filename):
        data = {"sizes": self.sizes,
                "weights": [w.tolist() for w in self.weights],
                "biases": [b.tolist() for b in self.biases],
                "cost": str(self.cost.__name__)
                }
        f = open(filename, "w")
        json.dump(data, f)  # json把字典类型转换成字符串
        f.close()

# 加载模型


def load(filename):
    f = open(filename, "r")
    data = json.load(f)
    f.close()
    cost = getattr(sys.modules[__name__], data["cost"])
    net = Network(data["sizes"], cost=cost)
    net.weights = [np.array(w) for w in data["weights"]]
    net.biases = [np.array(b) for b in data["biases"]]
    return net

# sigmoid激励函数


def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))


def sigmoid_prime(z):
    return sigmoid(z) * (1-sigmoid(z))




In [6]:
import mnist_loader
training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
training_data = list(training_data)
test_data=list(test_data)
# 28*28=784个像素， 可以定义30个神经元， 共有10种分类
net = Network([784, 100, 10])
net.SGD(training_data, 50, 10, 4, test_data=test_data)

Epoch 0 complete
Accuracy on training data: 43816 / 50000
Accuracy on test data: 8792 / 10000
Epoch 1 complete
Accuracy on training data: 44646 / 50000
Accuracy on test data: 8944 / 10000
Epoch 2 complete
Accuracy on training data: 44864 / 50000
Accuracy on test data: 9002 / 10000
Epoch 3 complete
Accuracy on training data: 45288 / 50000
Accuracy on test data: 9077 / 10000
Epoch 4 complete
Accuracy on training data: 45416 / 50000
Accuracy on test data: 9077 / 10000
Epoch 5 complete
Accuracy on training data: 45112 / 50000
Accuracy on test data: 9053 / 10000
Epoch 6 complete
Accuracy on training data: 46313 / 50000
Accuracy on test data: 9247 / 10000
Epoch 7 complete
Accuracy on training data: 46529 / 50000
Accuracy on test data: 9282 / 10000
Epoch 8 complete
Accuracy on training data: 46292 / 50000
Accuracy on test data: 9230 / 10000
Epoch 9 complete
Accuracy on training data: 46153 / 50000
Accuracy on test data: 9211 / 10000
Epoch 10 complete
Accuracy on training data: 46777 / 50000
A

  return 1.0 / (1.0 + np.exp(-z))


Epoch 12 complete
Accuracy on training data: 46537 / 50000
Accuracy on test data: 9269 / 10000
Epoch 13 complete
Accuracy on training data: 46478 / 50000
Accuracy on test data: 9267 / 10000
Epoch 14 complete
Accuracy on training data: 46882 / 50000
Accuracy on test data: 9330 / 10000
Epoch 15 complete
Accuracy on training data: 46963 / 50000
Accuracy on test data: 9348 / 10000
Epoch 16 complete
Accuracy on training data: 46807 / 50000
Accuracy on test data: 9332 / 10000
Epoch 17 complete
Accuracy on training data: 47178 / 50000
Accuracy on test data: 9399 / 10000
Epoch 18 complete
Accuracy on training data: 47106 / 50000
Accuracy on test data: 9365 / 10000
Epoch 19 complete
Accuracy on training data: 46953 / 50000
Accuracy on test data: 9324 / 10000
Epoch 20 complete
Accuracy on training data: 47133 / 50000
Accuracy on test data: 9376 / 10000
Epoch 21 complete
Accuracy on training data: 47293 / 50000
Accuracy on test data: 9396 / 10000
Epoch 22 complete
Accuracy on training data: 47243

In [None]:
import mnist_loader
training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
training_data = list(training_data)
test_data=list(test_data)
# 28*28=784个像素， 可以定义30个神经元， 共有10种分类
net = Network([784, 100, 10])
net.SGD(training_data, 150, 10, 3, test_data=test_data)

Epoch 0 complete
Accuracy on training data: 45515 / 50000
Accuracy on test data: 9095 / 10000
Epoch 1 complete
Accuracy on training data: 46021 / 50000
Accuracy on test data: 9180 / 10000
Epoch 2 complete
Accuracy on training data: 46977 / 50000
Accuracy on test data: 9314 / 10000
Epoch 3 complete
Accuracy on training data: 47323 / 50000
Accuracy on test data: 9399 / 10000
Epoch 4 complete
Accuracy on training data: 47423 / 50000
Accuracy on test data: 9436 / 10000
Epoch 5 complete
Accuracy on training data: 47639 / 50000
Accuracy on test data: 9420 / 10000
Epoch 6 complete
Accuracy on training data: 47890 / 50000
Accuracy on test data: 9507 / 10000
Epoch 7 complete
Accuracy on training data: 47800 / 50000
Accuracy on test data: 9470 / 10000
Epoch 8 complete
Accuracy on training data: 47836 / 50000
Accuracy on test data: 9476 / 10000
Epoch 9 complete
Accuracy on training data: 48029 / 50000
Accuracy on test data: 9493 / 10000
Epoch 10 complete
Accuracy on training data: 48067 / 50000
A

  return np.sum(np.nan_to_num(-y * np.log(a) - (1-y) * np.log(1-a)))
  return np.sum(np.nan_to_num(-y * np.log(a) - (1-y) * np.log(1-a)))


Accuracy on training data: 49211 / 50000
Accuracy on test data: 9629 / 10000
Epoch 49 complete
Accuracy on training data: 49134 / 50000
Accuracy on test data: 9607 / 10000
Epoch 50 complete
Accuracy on training data: 49187 / 50000
Accuracy on test data: 9629 / 10000
Epoch 51 complete
Accuracy on training data: 49231 / 50000
Accuracy on test data: 9631 / 10000
Epoch 52 complete
Accuracy on training data: 49246 / 50000
Accuracy on test data: 9630 / 10000
Epoch 53 complete
Accuracy on training data: 49262 / 50000
Accuracy on test data: 9653 / 10000
Epoch 54 complete
Accuracy on training data: 49281 / 50000
Accuracy on test data: 9618 / 10000
Epoch 55 complete
Accuracy on training data: 49294 / 50000
Accuracy on test data: 9643 / 10000
Epoch 56 complete
Accuracy on training data: 49303 / 50000
Accuracy on test data: 9641 / 10000
Epoch 57 complete
Accuracy on training data: 49347 / 50000
Accuracy on test data: 9637 / 10000
Epoch 58 complete
Accuracy on training data: 49419 / 50000
Accuracy 

  return 1.0 / (1.0 + np.exp(-z))


Accuracy on training data: 49684 / 50000
Accuracy on test data: 9649 / 10000
Epoch 111 complete
Accuracy on training data: 49682 / 50000
Accuracy on test data: 9638 / 10000
Epoch 112 complete
Accuracy on training data: 49617 / 50000
Accuracy on test data: 9643 / 10000
Epoch 113 complete
Accuracy on training data: 49735 / 50000
Accuracy on test data: 9656 / 10000
Epoch 114 complete
Accuracy on training data: 49719 / 50000
Accuracy on test data: 9639 / 10000
Epoch 115 complete
Accuracy on training data: 49572 / 50000
Accuracy on test data: 9655 / 10000
Epoch 116 complete
Accuracy on training data: 49694 / 50000
Accuracy on test data: 9666 / 10000
Epoch 117 complete
Accuracy on training data: 49732 / 50000
Accuracy on test data: 9654 / 10000
Epoch 118 complete
Accuracy on training data: 49761 / 50000
Accuracy on test data: 9648 / 10000
Epoch 119 complete
Accuracy on training data: 49763 / 50000
Accuracy on test data: 9662 / 10000
Epoch 120 complete
Accuracy on training data: 49815 / 50000

In [None]:
import mnist_loader
training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
training_data = list(training_data)
test_data=list(test_data)
# 28*28=784个像素， 可以定义30个神经元， 共有10种分类
net = Network([784, 100, 10])
net.SGD(training_data, 100, 20, 4, test_data=test_data)

In [1]:
def relu(z):
    if z<0:
        return 0
    else:
        return z
    
def reludiff(z):
    return 1