In [1]:
import numpy as np

In [3]:
def sigmoid(x):
    denom = 1 + np.exp(-1 * x)
    return (1 / denom)

def relu(x):
    return None

In [287]:
class NeuralNetwork:

    # numLayers is number of layers in Neural Network, including the input layer and output layer
    # there are (numLayers - 2) hidden layers
    # we need (numLayers - 1) weights and biases
    def __init__(self, numLayers, layerDims):
        self.numLayers = numLayers
        self.layerDims = layerDims

        self.weights = []
        self.biases = []
        for i in range(1, numLayers):
            # initialize weights to uniform random values between -1, 1 (np.random.rand returns values in [0, 1])
            weightMatrix = 2 * np.random.rand(self.layerDims[i], self.layerDims[i - 1]) - 1
            biasVector = 2 * np.random.rand(self.layerDims[i]) - 1
            self.weights.append(weightMatrix)
            self.biases.append(biasVector)

    # forward pass algorithm
    def forwardPass(self, input):
        layerValues = []
        layerValues.append(input)
        for i in range(1, self.numLayers):
            z = np.dot(self.weights[i - 1], layerValues[i - 1]) + self.biases[i - 1]
            newLayer = sigmoid(z)
            layerValues.append(newLayer)
        return layerValues
    
    def backPropagate(self, layers, expected):
        deltas = []
        biasGradients = []
        weightJacobians = []
        
        currLayer = layers[self.numLayers - 1]
        deltas.insert(0, (currLayer - expected) * currLayer * (1 - currLayer))
        
        for l in range(self.numLayers - 1, 0, -1):
            biasGradients.insert(0, deltas[0])
            jacobian = np.outer(deltas[0], layers[l - 1])
            weightJacobians.insert(0, jacobian)
            
            nextDelta = np.dot(self.weights[l - 1].T, deltas[0]) * layers[l - 1] * (1 - layers[l-1])
            deltas.insert(0, nextDelta)
        
        return weightJacobians, biasGradients
    


    def gradientDescent(self, learningRate, weightJacobians, biasGradients):

        for i in range(len(weightJacobians)):
            self.weights[i] -= (learningRate * weightJacobians[i])
            self.biases[i] -= (learningRate * biasGradients[i])


    def train(self, trainingData, epochs, learningRate):
        epoch = 0
        trainingSetSize = len(trainingData)

        while (epoch < epochs):
            for i in range(len(trainingData)):
                currInput = trainingData[i][0]
                currExpected = trainingData[i][1]
                currLayers = self.forwardPass(currInput)
                weightJacobians, biasGradients = self.backPropagate(currLayers, currExpected)
                # print(biasGradients)
                self.gradientDescent(learningRate, weightJacobians, biasGradients)
            epoch += 1
        return


    def predict(self, input):
        output = self.forwardPass(input)[-1]
        return np.argmax(output), output

    def mse_error(self, output, expectedOutput):
        diff = output - expectedOutput
        return(np.dot(diff, diff))


In [288]:
nn = NeuralNetwork(3, [2, 2, 1])

In [289]:
nn.weights, nn.biases

([array([[ 0.24294923, -0.27688315],
         [-0.15745964, -0.44786474]]), array([[ 0.94919197, -0.71820465]])],
 [array([0.79671019, 0.77468696]), array([-0.56564414])])

In [290]:
p1 = (np.array([0, 0]), np.array([0]))
p2 = (np.array([0, 1]), np.array([1]))
p3 = (np.array([1, 0]), np.array([1]))
p4 = (np.array([1, 1]), np.array([0]))
trainingData = [p1, p2, p3, p4]



In [291]:
nn.train(trainingData, 10000, 0.2)

In [295]:
nn.predict(np.array([1,1]))

(0, array([0.04023206]))

In [185]:
nn.predict(np.array([0]))

[array([0]), array([0.41886156]), array([0.53206977])]

In [172]:
exp = p1[1]
j, b = nn.backPropagate(layers, exp)


L gradient:  [-3.9070304]
L value: 2
Lth layer:  [-0.9070304]
Lth bias gradient:  [6.75812551]
Lth weight jacobian:  [[2.01292044]]
L - 1 layer gradient:  [-4.36576436]
L value: 1
Lth layer:  [0.29785189]
Lth bias gradient:  [-0.91303911]
Lth weight jacobian:  [[-0.]]
L - 1 layer gradient:  [0.38019906]


In [130]:
layerVals = nn.forwardPass(p1[0])
layerVals

[array([0]), array([0.29740094]), array([0.47304873])]

In [131]:
exp = p1[1]
j, b = nn.backPropagate(layerVals, exp)
j, b

L value: 2
Lth layer:  [0.47304873]
Lth bias gradient:  [-0.11791857]
Lth weight jacobian:  [[-0.03506909]]
L - 1 layer gradient:  [-0.0379522]
L value: 1
Lth layer:  [0.29740094]
Lth bias gradient:  [-0.00793025]
Lth weight jacobian:  [[-0.]]
L - 1 layer gradient:  [-0.00720378]


([array([[-0.]]), array([[-0.03506909]])],
 [array([-0.00793025]), array([-0.11791857])])

In [133]:
nn.predict(np.array([0]))

[array([0]), array([0.29740094]), array([0.47304873])]

In [79]:
layers = nn.forwardPass(p1[0])
exp = p1[1]
weightJacob, biasGrad = nn.backPropagate(layers, exp)
weightJacob, biasGrad

L value: 2
Lth layer:  [1.75922244]
Lth bias gradient:  [-2.34968988]
Lth weight jacobian:  [[-3.08849888]]
L - 1 layer gradient:  [-1.47509353]
L value: 1
Lth layer:  [1.3144283]
Lth bias gradient:  [0.60964651]
Lth weight jacobian:  [[0.]]
L - 1 layer gradient:  [0.38681782]


([array([[0.]]), array([[-3.08849888]])],
 [array([0.60964651]), array([-2.34968988])])

In [80]:
nn.gradientDescent(0.1, weightJacob, biasGrad)

In [None]:
      currLayer = layerValues[self.numLayers - 1]
        biasGradients = []
        layerGradients = []
        weightJacobians = []
        layerGradients.insert(0, currLayer - expectedOutput)
        print("L gradient: ", layerGradients[0])
        

        numWeights = self.numLayers - 1
        
        for l in range(numWeights, 0, -1):
            print("L value: " + str(l))
            # layer l
            currLayer = layerValues[l]
            print("Lth layer: ", currLayer)
            # gradient of b_l wrt cost
            biasGradient = np.reshape(layerGradients[0] * currLayer * (1 - currLayer), self.biases[l - 1].shape)
            print("Lth bias gradient: ", biasGradient)
            # jacobian of W_l wrt cost
            weightJacobian = np.reshape(np.dot(biasGradient.T, layerValues[l - 1]), self.weights[l - 1].shape)
            print("Lth weight jacobian: ", weightJacobian)
            # lth weight matrix
            W_l = self.weights[l - 1]
            # gradient of layer l-1 wrt cost
            layerGradient = np.dot(W_1.T, layerGradients[0]) * currLayer * (1 - currLayer)
            print("L - 1 layer gradient: ", layerGradient)
            
            biasGradients.insert(0, biasGradient)
            weightJacobians.insert(0, weightJacobian)
            layerGradients.insert(0, layerGradient)
            
        return weightJacobians, biasGradients