In [1]:
import numpy as np
import matplotlib.pyplot as plt
import numpy.random as random
from numpy.linalg import pinv
%matplotlib inline

## 1. Implement a fixed network to solve the XOR operation, where the total number of neurons is 3 and the number of layers is 2. Use the batch gradient descent for the optimization.

In [28]:
class Layer(object):
    """
    Layer contains an array of neurons
    """
    
    def __init__(self, *args):
        self.weights = args[0]
        self.bias = args[1]
        """ gd: for gradient descent """
        self.bp2_gd = []
        self.bp3_gd = []
        self.bp4_gd = []
        self.a_gd = []
        
    def __call__(self, x):
        """ 
        Calling on the layer will calculate the output a of the activation function 
        and z, the intermediate calculation that sums the weights and bias
        
        x: list of inputs
        """
        self.z = self.basis_function(self.weights, self.bias, x)
        self.a = self.activation_function(self.sigmoid, self.z)
        return (self.z, self.a)
        
    def basis_function(self, w, b, x):
        z = w.T.dot(x) + b
        return z
    
    def sigmoid(self, a):
        return 1 / (1 + np.exp(-a))
    
    def activation_function(self, fun, z):
        return fun(z)

In [40]:
class Network(object):
    """A Network contains a list of layers, and functions to do feedforward and backpropagation"""
    
    def __init__(self, *args):
        self.layers = []
        
        """
        a list of number of neurons in each layer
        i.e. [2, 2, 1] means that there are 2 neurons in the input layer and 2 in the first and 1 in the second layer
        """
        self.neuron_nums = args[0]
        # x: features to be trained
        self.x = args[1]
        # N: number of training data
        self.N = args[2]
        # t: result for comparison
        self.t = args[3]
        """ gd: for gradient descent"""
        self.bp1_gd = []
        
    def initialize_layers(self, neuron_nums, N):
        for i in range(len(neuron_nums) - 1): 
            self.initialize_layer(N, neuron_nums[i], neuron_nums[i+1])
    
    def initialize_layer(self, N, prev, curr, mu=0):
        """ 
        Initializes weights and bias for current layer
        N: number of training data
        prev: number of neurons in previous layer
        curr: number of neurons in current layer

        mu = 0
        sigma = 1 / sqrt(N) in order to avoid network saturation
        """
        mu = 0
        sigma = 1 / np.sqrt(N)

        W = np.zeros((curr, prev))
        b = np.zeros((curr, 1))
        for c in range(curr):
            b[c] = random.normal(mu, sigma)
            for p in range(prev):
                W[c][p] = random.normal(mu, sigma)

        layer = Layer(W.T, b)
        self.layers.append(layer)
    
    def feed_forward(self, layers, x, counter):
        """
        Start off with the first layer, where the input is x and the counter is 0. 
        Then we increase the counter and move to the next layer. 
        The output from the previous layer will become the input for the next layer.

        layers: list of layers in the network
        x: input to the layer
        counter: keep track of which layer we are in
        """
        layer = layers[counter] # get the current layer we are in
        z, a = layer(x) # calculate the value of a and z
        
        # checks whether we the number of z's is the same as the number of neurons in the layer
        assert z.shape == (self.neuron_nums[counter+1], 1)
        assert a.shape == (self.neuron_nums[counter+1], 1)
        
        layer.z = z # save the current value of z in the layer
        layer.a = a # save the current value of a in the layer
        layer.a_gd.append(a)
        if (counter==len(layers)-1):
            return # if we have reached the last layer, stop!
        else:
            self.feed_forward(layers, layer.a, counter+1) # else keep feeding the result forward
    
    def back_prop1(self, layers):
        """ 
        Calculates the first error to be backpropagated (from the last layer)
        """
        last_layer = layers[len(layers) - 1]
        a = last_layer.a
        z = last_layer.z
        sigmoid = last_layer.sigmoid
        self.bp1 = (a - self.t) * (sigmoid(z) * (1 - sigmoid(z)))
        self.bp1_gd.append(self.bp1)
        
    def back_prop2(self, layers, counter, bp1):
        """ 
        Calculates the error to be backpropagated between layers 
        """
        if (counter<0):
            return
        current_layer = layers[counter]
        next_layer = layers[counter+1]
        sigmoid = current_layer.sigmoid
        z = current_layer.z
        w = next_layer.weights.reshape(self.neuron_nums[counter+2], self.neuron_nums[counter+1])
        assert w.shape == (self.neuron_nums[counter+2], self.neuron_nums[counter+1])
        
        current_layer.bp2 = np.multiply(w.T.dot(bp1), (np.multiply(sigmoid(z), (1 - sigmoid(z)))))
        current_layer.bp2_gd.append(current_layer.bp2)
        print(current_layer.bp2_gd)
        self.back_prop2(layers, counter-1, current_layer.bp2)
    
    def back_prop3(self, layers, counter):
        if (counter<0):
            return
        
        current_layer = layers[counter]
        if (counter==0):
            a = self.x
        else:
            prev_layer = layers[counter-1]
            a = prev_layer.a
        
        if (counter == len(layers) - 1): # if it is the last layer, get bp1
            bp2 = self.bp1
        else: 
            bp2 = current_layer.bp2
        
        current_layer.bp3 = bp2.dot(a.T)
        current_layer.bp3_gd.append(current_layer.bp3)
        self.back_prop3(layers, counter-1)
        
    def back_prop4(self, layers, counter):
        if (counter < 0):
            return
        current_layer = layers[counter]
        if (counter == len(layers) - 1):
            current_layer.bp4 = self.bp1
            current_layer.bp4_gd.append(current_layer.bp4)
        else:
            current_layer.bp4 = current_layer.bp2
            current_layer.bp4_gd.append(current_layer.bp4)
        self.back_prop4(layers, counter-1)
    
    def gradient_descent(self, layers, counter, alpha):
        """
        alpha refers to the rate of learning
        """
        if (counter < 0):
            return
        
        current_layer = layers[counter]
        if (counter==0):
            a = self.x
        else:
            prev_layer = layers[counter-1]
            a = prev_layer.a
            
        if (counter == len(layers) - 1): # if it is the last layer, get bp1
            bp2 = self.bp1
        else: 
            bp2 = current_layer.bp2
    
        w_old = current_layer.weights
        b_old = current_layer.bias
        current_layer.weights = w_old - (alpha / self.N) * np.sum(bp2_gd.dot(a.T))
        current_layer.bias = b_old - alpha * bp2
        
        self.gradient_descent(layers, counter-1, alpha)

### Running the Network

In [41]:
x = np.asmatrix([[0, 0], [1, 0], [0, 1], [1, 1]])
t = np.asarray([0, 1, 1, 0]).reshape((4, 1))
# x = np.zeros((2, 1)) # the input
# t = 0 # the result
N = len(x)
neuron_nums = [2, 2, 1]
for i in range(N):
    network = Network(neuron_nums, x[i].T, N, t[i])
    network.initialize_layers(network.neuron_nums, network.N)
    network.feed_forward(network.layers, x[i].T, 0)
    network.back_prop1(network.layers)
    network.back_prop2(network.layers, len(network.layers)-2, network.bp1)
    network.back_prop3(network.layers, len(network.layers)-1)
    network.back_prop4(network.layers, len(network.layers)-1)
#     network.gradient_descent(network.layers, len(network.layers)-1, 20)
#     network.feed_forward(network.layers, x[i].T, 0)
# network.layers[1].bias
len(network.layers[0].bp2_gd)

[matrix([[  2.87299804e-05],
        [  3.30236290e-03]])]
[matrix([[-0.01225961],
        [ 0.02762571]])]
[matrix([[ 0.00083415],
        [-0.00436362]])]
[matrix([[-0.00332215],
        [ 0.00960377]])]


1

[[ 0.0057662 ]
 [ 0.05290287]]


ValueError: shapes (2,1) and (2,1) not aligned: 1 (dim 1) != 2 (dim 0)

In [426]:
network.neuron_nums[1]

2

In [190]:
last_layer = network.layers[1]
prev_layer = network.layers[0]
sigmoid = prev_layer.sigmoid
z = prev_layer.z
last_layer.weights.dot(network.bp1) * (sigmoid(z) * (1 - sigmoid(z)))

array([[ 0.02519494],
       [ 0.05052484],
       [-0.03420019]])

In [126]:
last_layer = network.layers[1]
prev_layer = network.layers[0]
z = prev_layer.z
sigmoid = prev_layer.sigmoid

np.sum(network.bp1.dot(last_layer.weights).T.dot(sigmoid(z).T.dot(1 - sigmoid(z))))

0.05548232938898897

### Testing the output

In [88]:
print(network.layers[0].z)

[[ 0.61965343]
 [ 0.05668075]]


In [6]:


N = 4
layer1 = initialize_layer(N, 2, 2) # 2 input neurons from input layer, 2 current neurons
layer2 = initialize_layer(N, 2, 1) # 2 input neurons from layer 1, 1 current neuron

layers = []
layers.append(layer1)
layers.append(layer2)
x = np.zeros((2, 1)) # x1 = 0, x2 = 0

def feed_forward(N, x, layer1, layer2):
    z1, a1 = layer1(x)
    z2, a2 = layer2(a1)
    return z1, a1, z2, a2

z1, a1, z2, a2 = feed_forward(N)
a2

NameError: name 'self' is not defined