In [28]:
import json
import sys
import random
import numpy as np

def sigmoid(z):
    return 1.0/(1.0+np.exp(-z))   
    
def sigmoid_prime(z):
    return sigmoid(z)*(1-sigmoid(z))

def vectorized_result(j):
    e=np.zeros((10,1))
    e[j]=1.0
    return e

def load(filename):
    f=open(filename,"r")
    data=json.load(f)
    f.close()
    cost=getattr(sys.modules[__name__],data["cost"])
    net=Network(data["sizes"],cost=cost)
    net.weights=[np.array(w) for w in data["weights"]]
    net.biases=[np.array(b) for b in data["biases"]]
    return net

class CrossEntropyCost(object):
    @staticmethod
    def fn(a, y):
        return np.sum(np.nan_to_num(-y*np.log(a)-(1-y)*np.log(1-a)))
    @staticmethod
    def delta(z, a, y):
        return (a-y)
    
class QuadraticCost(object):
    @staticmethod
    def fn(a, y):
        return 0.5*np.linalg.norm(a-y)**2
    @staticmethod
    def delta(z, a, y):
        return (a-y) * sigmoid_prime(z)

class Network(object):
    def __init__(self, size,cost=CrossEntropyCost):
        self.num_layers = len(size)
        self.size = size
        self.default_weight_initializer()
        self.cost=cost
        
    def default_weight_initializer(self):
        self.biases=[np.random.randn(y,1) for y in self.size[1:]]
        self.weights=[np.random.randn(y,x)/np.square(x) for x,y in zip(self.size[:-1],self.size[1:])]
        
    def large_weight_initializer(self):
        self.biases=[np.random.randn(y,1) for y in self.size[1:]]
        self.weights=[np.random.randn(y,x) for x,y in zip(self.size[:-1],self.size[1:])]
        
    #前馈计算输出    
    def feedforward(self, a):
        for b, w in zip(self.biases, self.weights):
            a = sigmoid(np.dot(w, a)+b)
        return a
    #随机梯度下降
    
    def SGD(self, training_data, epochs, mini_batch_size, eta,lmbda=0.0,evaluation_data=None,
           monitor_evaluation_cost=False,
           monitor_evaluation_accuracy=False,
           monitor_training_cost=False,
           monitor_training_accuracy=False):
        if evaluation_data:
            n_data=len(evaluation_data)
        n=len(training_data)
        evaluation_cost,evaluation_accuracy=[],[]
        training_cost,training_accuracy=[],[]
        for j in range(epochs):
            random.shuffle(training_data)
            mini_batches=[training_data[k:k+mini_batch_size] for k in range(0,n,mini_batch_size)]
            for mini_batch in mini_batches: 
                self.update_mini_batch(mini_batch,eta,lmbda,len(training_data))
            print("Epoch %s training complete"%j)
            if monitor_training_cost:
                cost=self.total_cost(training_data,lmbda)
                training_cost.append(cost)
                print("Cost on training data:{}".format(cost))
            if monitor_training_accuracy:
                accuracy=self.accuracy(training_data,convert=True)
                training_accuracy.append(accuracy)
                print("Accuracy on training data:{}/{}".format(accuracy,n))
            if monitor_evaluation_cost:
                cost=self.total_cost(evaluation_data,lmbda,convert=True)
                evaluation_cost.append(cost)
                print("Cost on evaluation data:{}".format(cost))
            if monitor_evaluation_accuracy:
                accuracy=self.accuracy(evaluation_data)
                evaluation_accuracy.append(accuracy)
            print("Accuracy on evaluation data:{}/{}".format(self.accuracy(evaluation_data),n_data))
        return evaluation_cost,evaluation_accuracy,training_cost,training_accuracy
    
    #更新权值与偏置
    def update_mini_batch(self, mini_batch, eta,lmbda,n):
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        for x, y in mini_batch:
            delta_nabla_b, delta_nabla_w = self.backprop(x, y)
            nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
            nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
        self.weights = [(1-eta*(lmbda/n))*w-(eta/len(mini_batch))*nw for w, nw in zip(self.weights, nabla_w)]
        self.biases = [b-(eta/len(mini_batch))*nb for b, nb in zip(self.biases, nabla_b)]
    
    #反向传播求偏导
    def backprop(self, x, y):
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        # feedforward
        activation = x
        activations = [x] 
        zs = [] 
        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, activation)+b
            zs.append(z)
            activation = sigmoid(z)
            activations.append(activation)
        
        delta = self.cost.delta(zs[-1],activations[-1], y)
        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta, activations[-2].transpose())
        for l in range(2, self.num_layers):
            z = zs[-l]
            sp = sigmoid_prime(z)
            delta = np.dot(self.weights[-l+1].transpose(), delta) * sp
            nabla_b[-l] = delta
            nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
        return (nabla_b, nabla_w)
    
    def accuracy(self,data,convert=False):
        if convert:
            results=[(np.argmax(self.feedforward(x)),np.argmax(y)) for (x,y) in data]
        else:
            results=[(np.argmax(self.feedforward(x)),y) for (x,y) in data]
        return sum(int(x==y) for (x,y) in results)

    def total_cost(self,data,lmbda,convert=False):
        cost=0.0
        for x,y in data:
            a=self.feedforward(x)
            if convert:
                y=vectorized_result(y)
            cost+=self.cost.fn(a,y)/len(data)
        cost+=0.5*(lmbda/len(data))*sum(np.linalg.norm(w)**2 for w in self.weights)
        return cost
    
    def save(self,filename):
        data={"size":self.size,"weights":[w.tolist() for w in self.weights],
             "biases":[b.tolist() for b in self.biases],
             "cost":str(self.cost.__name__)}
        f=open(filename,"w")
        json.dump(data,f)
        f.close()

In [29]:
import mnist_loader
training_data,validation_data,test_data=mnist_loader.load_data_wrapper()
net=Network([784,30,10],cost=CrossEntropyCost)
net.default_weight_initializer()
net.SGD(training_data,40,20,0.5,lmbda=5.0,evaluation_data=validation_data,
           monitor_evaluation_cost=True,
           monitor_evaluation_accuracy=True,
           monitor_training_cost=True,
           monitor_training_accuracy=True)

Compilation is falling back to object mode WITH looplifting enabled because Function "SGD" failed type inference due to: [1m[1mnon-precise type pyobject[0m
[0m[1m[1] During: typing of argument at <ipython-input-28-e6f8df7915f7> (71)[0m
[1m
File "<ipython-input-28-e6f8df7915f7>", line 71:[0m
[1m    def SGD(self, training_data, epochs, mini_batch_size, eta,lmbda=0.0,evaluation_data=None,
        <source elided>
           monitor_training_accuracy=False):
[1m        if evaluation_data:
[0m        [1m^[0m[0m
[0m
  @jit
Compilation is falling back to object mode WITHOUT looplifting enabled because Function "SGD" failed type inference due to: [1m[1mcannot determine Numba type of <class 'numba.dispatcher.LiftedLoop'>[0m
[1m
File "<ipython-input-28-e6f8df7915f7>", line 76:[0m
[1m    def SGD(self, training_data, epochs, mini_batch_size, eta,lmbda=0.0,evaluation_data=None,
        <source elided>
        training_cost,training_accuracy=[],[]
[1m        for j in range(epoch

Epoch 0 training complete
Cost on training data:0.5155043796797054
Accuracy on training data:46533/50000
Cost on evaluation data:0.6873582399226027
Accuracy on evaluation data:9334/10000
Epoch 1 training complete
Cost on training data:0.4566267632364201
Accuracy on training data:47351/50000
Cost on evaluation data:0.7242354653833223
Accuracy on evaluation data:9491/10000
Epoch 2 training complete
Cost on training data:0.3998343302033394
Accuracy on training data:47849/50000
Cost on evaluation data:0.7301243991180546
Accuracy on evaluation data:9526/10000
Epoch 3 training complete
Cost on training data:0.37861174575337325
Accuracy on training data:48041/50000
Cost on evaluation data:0.7511766670512814
Accuracy on evaluation data:9569/10000
Epoch 4 training complete
Cost on training data:0.3808318292089895
Accuracy on training data:48041/50000
Cost on evaluation data:0.7790740638224632
Accuracy on evaluation data:9577/10000
Epoch 5 training complete
Cost on training data:0.37312400852263

([0.6873582399226027,
  0.7242354653833223,
  0.7301243991180546,
  0.7511766670512814,
  0.7790740638224632,
  0.796588719748411,
  0.8118598281584495,
  0.8028949634356523,
  0.8186410448739486,
  0.8255695756007033,
  0.8454994174573471,
  0.8506034033551548,
  0.8534158096602934,
  0.8536274573288727,
  0.843584260102657,
  0.8461722421967239,
  0.8496135548080523,
  0.8532711950648573,
  0.865478112401948,
  0.8480131758509442,
  0.8546025326949147,
  0.8466196653967679,
  0.8547288053769835,
  0.8714842432740684,
  0.8689363296273603,
  0.8598680095719096,
  0.8610831380111155,
  0.8552224266977068,
  0.85931008667645,
  0.8598718306676199,
  0.8624871260813629,
  0.8716448819813394,
  0.8895975758328205,
  0.8634132279659643,
  0.8644774633960464,
  0.8611361510797564,
  0.8796777966980664,
  0.8699277623996714,
  0.8745753156092229,
  0.8750294578546004],
 [9334,
  9491,
  9526,
  9569,
  9577,
  9577,
  9579,
  9612,
  9622,
  9623,
  9601,
  9585,
  9609,
  9615,
  9649,
  96