In [3]:
import numpy as np

np.random.seed(0)

#set toy data set
x = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[0], [1], [1], [0]])

In [4]:
#initialize weights and biases --> very basic
def initializeParameters(inputSize, hiddenSize, outputSize):
    np.random.seed(0)
    #random.randn returns random samples from the standard distrubition
    #0-1 i believe; assigning random weights to the notes (ask kenny)
    w1 = np.random.rand(hiddenSize, inputSize)*0.01 #making the weights weigh less? not sure why
    print(w1)
    print(x)
    #returns an array of zeros (a vector, NOTE THAT THE WHOLE THING 
    #IS IN BRACKETS TO DECLARE THE SIZE OF THE VECTOR)
    b1 = np.zeros((hiddenSize, 1))
    w2 = np.random.rand(outputSize, hiddenSize)*0.01
    b2 = np.zeros((outputSize, 1))
    #dictionary holding all the values; need keys to access
    parameters = {"w1": w1, "b1": b1, "w2": w2, "b2": b2}
    return parameters

parameters = initializeParameters(2,3,1)

[[0.00548814 0.00715189]
 [0.00602763 0.00544883]
 [0.00423655 0.00645894]]
[[0 0]
 [0 1]
 [1 0]
 [1 1]]


In [5]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

In [6]:
def forwardFeed(x, parameters):
    #retrieve parameters
    w1 = parameters["w1"]
    b1 = parameters["b1"]
    w2 = parameters["w2"]
    b2 = parameters["b2"]

    #activation of the hidden layer
    z1 = np.dot(w1, x.T) + b1 #.T is the transpose of the matrix
    a1 = sigmoid(z1)

    #activation of the output layer
    z2 = np.dot(w2, a1) + b2
    a2 = sigmoid(z2)
    cache = {"z1": z1, "a1": a1, "z2": z2, "a2": a2} #intermediate values for backprop

    return a2, cache

a2, cache = forwardFeed(x, parameters)

In [7]:
#define loss function - binary cross-entropy loss function

def binaryCrossEntrropyLoss(a2, y): #y is the true label (actual truth; what we want to model)
    m = y.shape[0]                  #a2 is the last output of the network
    loss = -1*(1/m)*np.sum(y*np.log(a2)+(1-y)*np.log(1-a2))
    return loss

In [8]:
#back prop
def backProp(parameters, cache, x, y):
    m = y.shape[0]

    # retrieve the intermediate values
    z1 = cache["z1"]
    a1 = cache["a1"]
    z2 = cache["z2"]
    a2 = cache["a2"]
    
    # derivative of loss w.r.t a2
    dA2 = -(y/a2) + ((1-y)/(1-a2))

    # derivative of the activation function of the output layer
    dZ2 = dA2 * (a2*(1-a2))
    
    #derivative of the weights and biases of the output layer
    dW2 = (1/m)*np.dot(dZ2, a1.T)
    dB2 = (1/m)*np.sum(dZ2, axis = 1, keepdims=True)
    # .sum(array (elements to sum), axis that sum is performed, keepdims keep the output the same size as the input)

    # derivative of activation of hidden layer
    dA1 = np.dot(parameters["w2"].T, dZ2)
    dZ1 = dA1*(a1*(1-a1))

    #derivative of weights and biases of the hidden layer
    dW1 = (1/m)*np.dot(dZ1, x)
    dB1 = (1/m)*np.sum(dZ1, axis=1, keepdims=True)
    gradients = {"dW1": dW1, "dB1": dB1, "dW2": dW2, "dB2": dB2}

    return gradients

In [9]:
# implement backprop; improve and update parameters
def updateParam(parameters, gradients, learningRate):
    #get gradients (derivatives)
    dW1 = gradients["dW1"]
    dB1 = gradients["dB1"]
    dW2 = gradients["dW2"]
    dB2 = gradients["dB2"]

    #retrieve weights and biases
    w1 = parameters["w1"]
    b1 = parameters["b1"]
    w2 = parameters["w2"]
    b2 = parameters["b2"]

    #update weights and biases
    # *note: this is the general weight update equation
    w1 = w1 - learningRate*dW1
    b1 = b1 - learningRate*dB1
    w2 = w2 - learningRate*dW2
    b2 = b2 - learningRate*dB2
    
    parameters = {"w1": w1, "b1": b1, "w2": w2, "b2": b2}
    
    return parameters

In [13]:
#training the neural network
def train(x, y, hiddenLayerSize, iterations, learningRate):
    #initialize weights and biases
    parameters = initializeParameters(x.shape[1], hiddenLayerSize, 1)
    for i in range(iterations):
        a2, cache = forwardFeed(x, parameters)

        #compute loss
        loss = binaryCrossEntrropyLoss(a2, y)

        #back prop
        gradients = backProp(parameters, cache, x, y)
        #update params
        parameters = updateParam(parameters, gradients, learningRate)
        
        if i%1000 == 0:
            print(f"iteration{i}: loss = {loss}")
    return parameters

parameters = train(x, y.T, hiddenLayerSize=4, iterations=10000, learningRate=0.1)

[[0.00548814 0.00715189]
 [0.00602763 0.00544883]
 [0.00423655 0.00645894]
 [0.00437587 0.00891773]]
[[0 0]
 [0 1]
 [1 0]
 [1 1]]
iteration0: loss = 2.7726782167210433
iteration1000: loss = 2.7725887221668755
iteration2000: loss = 2.7725887221667747
iteration3000: loss = 2.772588722166674
iteration4000: loss = 2.7725887221665726
iteration5000: loss = 2.7725887221664713
iteration6000: loss = 2.7725887221663696
iteration7000: loss = 2.772588722166267
iteration8000: loss = 2.7725887221661654
iteration9000: loss = 2.7725887221660632


In [14]:
# predict the labels for new data
def predict(x, parameters):
    A2, test = forwardFeed(x, parameters)
    predictions = (A2 > 0.5).astype(int)
    return predictions   

In [17]:
predictions = predict(x, parameters)
print(predictions)

[[0 1 0 1]]
