In [22]:
import numpy as np

class DeepNet(object):
    # yi = Wi * xi + bi
    def __init__(self, sizes):
        # Number of layers in the network
        self.num_layers = len(sizes)
        # Number of neuron in the network
        self.num_neuron = sizes
        # Populate Gaussian random of bias vectors, layer by layer
        self.bs = [ np.random.rand(r, 1) for r in sizes[1:] ]
        # Populate Gaussian random of weight matrix, layer by layer
        self.Ws = [ np.random.rand(r, c) for r, c in zip(sizes[1:], sizes[:-1]) ]
    
    # Function sigmoid neutron
        # Input: weighted input vector
        # Output: normalized value of weighted input
    def sigma(z):
        return 1.0 / (1 + np.exp(-z))
    
    # Function derivative of sigmoid neutron
        # Input: sigmoid neutron z (normalized )
        # Output: rate of change in sigmoid neutron z
    def sigma_rate(self, z):
        return sigma(z) * (1 - sigma(z))
    
    # Function feedfordward:
        # Input: activation vector a
        # Output: activation vector for the next layer (forward activation)    
    def feedforward(self, a):
        for b, W in zip(self.bs, self.Ws):
            # Compute weighted input
            a = sigma(np.dot(W, a) + b)
        return a
   
    # Function backpropagation
        # Input: a data sample x, y
        # Output: a tuple of (gd_bs, gd_Ws) representing the gradient for the loss function
        # gd_bs, with same dimension to bs', is list of bias vectors, layer by layer
        # gd_Ws, with same dimension to Ws', is list of weight matrices, layer by layer
    def backpropagation(self, x, y):
        # Populate vectors in gd_bs with 0 layer by layer
        gd_bs = [ np.zeros(b.shape) for b in self.bs ]
        # Populate matrices in gd_Ws with 0 layer by layer
        gd_WS = [ np.zeros(W.shape) for W in self.Ws ]
        
        # Feedforward
        a = x               # input vector (the 1st layer)
        activations = [x]   # list of all activation vectors from the 1st to the last layers
        zs = []             # list of all weighted input vectors from the 2nd to the last layers
        
        for b, W in zip(self.bs, self.Ws):
            # Compute the individual weighted input vector, layer by layer, then save in zs
            z = np.dot(W, activation) + b
            zs.append(z)
            # Compute the forward activation a, layer by layer, then save in activations
            a = sigma(z)
            activations.append(a)
        
        # Back propagation
        delta = (activations[-1] - y) * sigma_rate(zs[-1])
        gd_bs[-1], gd_Ws[-1]= delta, np.dot(delta, activations[-2].transposes())
        for k in range(2, self.num_layers):
            z, s = zs[-k], sigma_rate(z)
            delta = np.dot(self.Ws[-k + 1].transpose(), delta) * s
            gd_bs[-k], gd_Ws[-k] = delta, np.dot(delta, activations[-k - 1].transposes())
        
        return (gd_bs, gd_Ws)
        
    # Function evaluate:
        # Input: test data in tuple of (x, y) 
        # Output: number of correct predictions
    def evaluate(self, test):
        results = [ (np.argmax(self.forwardfeed(x)), y) for (x, y) in test ]
        return sum(int(y0 == y1) for (y0, y1) in results)
    
    # Function update_para
        # Input: a batch of mini samples mini_batch, and learning rate eta
        # Output: None, just update the network's bias vectors bs and the weight matrix Ws,
        # layer by layer using gradient descent and backpropagation algorithm 
        # applied to the mini batch with following formulars:
        # new W = current W - eta * change in loss function per change in weight
        # new b = current b - eta * change in loss function per change in weight
    def update_para(self, mini_batch, eta):
        # Populate vectors in gd_bs with 0 layer by layer
        gd_bs = [ np.zeros(b.shape) for b in self.bs ]
        # Populate matrices in gd_Ws with 0 layer by layer
        gd_WS = [ np.zeros(W.shape) for W in self.Ws ]
        for x, y in mini_batch:
            # Compute delta bias bs and delta weights Ws
            dt_bs, dt_Ws = backpropagation(x, y)
            # Update vectors of gradient in bias for the loss function
            gd_bs = [ gd_b + dt_b for gd_b, dt_b in zip(gd_bs, dt_bs)]
            # Update matrices of gradient in weight for the loss function
            gd_Ws = [ gd_W + dt_W for gd_W, dt_W in zip(gd_Ws, dt_Ws) ]
        
        # Update bias vectors in the network, layer by layer
        self.bs = [b - (eta / len(mini_batch)) * gd_b for b, gd_b in zip(self.bs, gd_bs)]
        # Update weight matrices in the network, layer by layer
        self.Ws = [W - (eta / len(mini_batch)) * gd_W for W, gd_W in zip(self.Ws, gd_Ws)]
        
        
    
    
        


In [19]:
net = DeepNet([4,5,4,3,1])

In [20]:
net.bs

[array([[0.82003933],
        [0.78953694],
        [0.04211883],
        [0.22198753],
        [0.93319019]]),
 array([[0.51021025],
        [0.06248863],
        [0.66052186],
        [0.55094786]]),
 array([[0.8835669 ],
        [0.78567719],
        [0.29393298]]),
 array([[0.5719593]])]

In [21]:
net.Ws

[array([[0.08113222, 0.88901792, 0.08917793, 0.52732948],
        [0.57057364, 0.89866055, 0.8649722 , 0.07670039],
        [0.47706599, 0.89996795, 0.36966847, 0.01682053],
        [0.94455757, 0.19365979, 0.38129643, 0.28483987],
        [0.52039522, 0.11143282, 0.15919477, 0.25961113]]),
 array([[0.13246825, 0.62444603, 0.0316855 , 0.35444947, 0.75866716],
        [0.63630608, 0.59366771, 0.34180139, 0.97517483, 0.71122407],
        [0.84166072, 0.69149115, 0.61316299, 0.28953297, 0.69593004],
        [0.23414129, 0.11782254, 0.40530646, 0.56770224, 0.73024333]]),
 array([[0.85214822, 0.3157396 , 0.211461  , 0.46292401],
        [0.32080462, 0.82897685, 0.64763325, 0.3779733 ],
        [0.93957912, 0.55148697, 0.12178442, 0.84041606]]),
 array([[0.69774941, 0.19504526, 0.18102969]])]