<h3>Full credit goes to this blog post <a href='https://victorzhou.com/blog/intro-to-neural-networks/'>here</a> by Victor Zhou.</h3>

<h1>This notebook creates a 3 input, 2 neuron hidden layer and 1 output, neural network from scratch.</h1>

In [129]:
import numpy as np

In [130]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def sigmoid_derivative(z):
    return sigmoid(z)*(1 - sigmoid(z))

def mse_loss(y_true, y_pred):
  # y_true and y_pred are numpy arrays of the same length.
  return ((y_true - y_pred) ** 2).mean()

class NeuralNetwork():
    """
    This Network takes in 3 inputs
    Has 2 Hidden Neurons
    And 1 Output Layer
    All of the derivatives are written out as explicitly as possible
    """
    def __init__(self):
        self.w1 = np.random.normal()
        self.w2 = np.random.normal()
        self.w3 = np.random.normal()
        self.w4 = np.random.normal()
        self.w5 = np.random.normal()
        self.w6 = np.random.normal()
        self.w7 = np.random.normal()
        self.w8 = np.random.normal()
        
        self.b1 = np.random.normal()
        self.b2 = np.random.normal()
        self.b3 = np.random.normal()
        
    def feed_forward(self,x):
        z1 = self.w1 * x[:,0] + self.w2 * x[:,1] + self.w3 * x[:,2] + self.b1
        h1 = sigmoid(z1)

        z2 = self.w4 * x[:,0] + self.w5 * x[:,1] + self.w6 * x[:,2] + self.b2
        h2 = sigmoid(z2)

        z3 = self.w7*h1 + self.w8*h2 + self.b3
        output = sigmoid(z3)
        return output
    
    def train(self,data,all_y_trues,epochs,alpha):
        for epoch in range(epochs):
            for x,y_true in zip(data,all_y_trues):
                #print(x,y_true)
                z1 = self.w1 * x[0] + self.w2 * x[1] + self.w3 * x[2] + self.b1
                h1 = sigmoid(z1)

                z2 = self.w4 * x[0] + self.w5 * x[1] + self.w6 * x[2] + self.b2
                h2 = sigmoid(z2)

                z3 = self.w7*h1 + self.w8*h2 + self.b3
                output = sigmoid(z3)
                d_L_ypred = - 2 * (y_true - output)

                #Output Neuron
                d_ypred_d_z3 = sigmoid_derivative(z3)
                d_z3_d_w7= h1
                d_z3_d_w8 = h2
                d_L_dw7 = d_L_ypred * d_ypred_d_z3 * d_z3_d_w7
                d_L_dw8 = d_L_ypred * d_ypred_d_z3 * d_z3_d_w8
                d_L_db3 = d_L_ypred * d_ypred_d_z3 

                #Hidden Neuron 1
                d_z3_d_h1 = self.w7
                d_h1_d_z1 = sigmoid_derivative(z1)
                d_z1_d_w1 = x[0]
                d_z1_d_w2 = x[1]
                d_z1_d_w3 = x[2]
                d_L_dw1 = d_L_ypred * d_ypred_d_z3 * d_z3_d_h1 * d_h1_d_z1 * d_z1_d_w1
                d_L_dw2 = d_L_ypred * d_ypred_d_z3 * d_z3_d_h1 * d_h1_d_z1 * d_z1_d_w2
                d_L_dw3 = d_L_ypred * d_ypred_d_z3 * d_z3_d_h1 * d_h1_d_z1 * d_z1_d_w3
                d_L_db1 = d_L_ypred * d_ypred_d_z3 * d_z3_d_h1 * d_h1_d_z1 

                #Hidden Neuron 2
                d_z3_d_h2 = self.w8
                d_h2_d_z2 = sigmoid_derivative(z2)
                d_z2_d_w4 = x[0]
                d_z2_d_w5 = x[1]
                d_z2_d_w6 = x[2]
                d_L_dw4 = d_L_ypred * d_ypred_d_z3 * d_z3_d_h2 * d_h2_d_z2 * d_z2_d_w4
                d_L_dw5 = d_L_ypred * d_ypred_d_z3 * d_z3_d_h2 * d_h2_d_z2 * d_z2_d_w5
                d_L_dw6 = d_L_ypred * d_ypred_d_z3 * d_z3_d_h2 * d_h2_d_z2 * d_z2_d_w6
                d_L_db2 = d_L_ypred * d_ypred_d_z3 * d_z3_d_h2 * d_h2_d_z2
                
                self.w1 -= alpha * d_L_dw1
                self.w2 -= alpha * d_L_dw2
                self.w3 -= alpha * d_L_dw3
                self.b1 -= alpha * d_L_db1

                self.w4 -= alpha * d_L_dw4
                self.w5 -= alpha * d_L_dw5
                self.w6 -= alpha * d_L_dw6
                self.b2 -= alpha * d_L_db2

                self.w7 -= alpha * d_L_dw7
                self.w8 -= alpha * d_L_dw8
                self.b3 -= alpha * d_L_db3
            if epoch % 100 == 0:
                y_pred = self.feed_forward(data)
                loss = mse_loss(all_y_trues,y_pred)
                print("Epoch %d loss: %.3f" % (epoch, loss))      

<h3><center>We are going to use the Neural Network on this table of values</center></h3>

|$X_{1}$|$X_{2}$|$X_{3}$|Y|
|-|-|-|-|
|1|0|0|0|
|1|1|0|1|
|1|0|1|1|
|1|1|1|1|

In [126]:
X = np.array([
    [1,0,0],
    [1,1,0],
    [1,0,1],
    [1,1,1]
])
Y = np.array([0,1,1,0])
NN = NeuralNetwork()
NN.train(X,Y,2500,0.1)

Epoch 0 loss: 0.439
Epoch 100 loss: 0.249
Epoch 200 loss: 0.246
Epoch 300 loss: 0.244
Epoch 400 loss: 0.241
Epoch 500 loss: 0.237
Epoch 600 loss: 0.231
Epoch 700 loss: 0.224
Epoch 800 loss: 0.217
Epoch 900 loss: 0.208
Epoch 1000 loss: 0.199
Epoch 1100 loss: 0.188
Epoch 1200 loss: 0.171
Epoch 1300 loss: 0.146
Epoch 1400 loss: 0.118
Epoch 1500 loss: 0.091
Epoch 1600 loss: 0.070
Epoch 1700 loss: 0.056
Epoch 1800 loss: 0.045
Epoch 1900 loss: 0.037
Epoch 2000 loss: 0.031
Epoch 2100 loss: 0.027
Epoch 2200 loss: 0.024
Epoch 2300 loss: 0.021
Epoch 2400 loss: 0.019


In [128]:
NN.feed_forward(X)

array([0.13964953, 0.88084825, 0.87978237, 0.13729175])

<h3>It goes a pretty good job for a bare minimun Neural Network, with more iterations or a different value of alpha you could achieve even better results.</h3>