In [1]:
import numpy as np

In [6]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return sigmoid(x) * (1-sigmoid(x))

def mse_loss(y_true,y_pred):
    return ((y_true - y_pred) ** 2).mean()

class Perceptron:
    """
    A simple 2 input Perceptron written as explicitly as possible
    """
    def __init__(self):
        self.w1 = np.random.normal()
        self.w2 = np.random.normal()
        self.w3 = np.random.normal()

        self.b1 = np.random.normal()
        self.b2 = np.random.normal()
    
    def feed_forward(self,x):
        z1 = self.w1 * x[:,0] + self.w2 * x[:,1] + self.b1
        h1 = sigmoid(z1)
        
        z2 = self.w3 * h1 + self.b2
        output = sigmoid(z2)
        return output
    
    def train(self,x_data,y_data,alpha,epochs):
        for epoch in range(epochs):
            for x,y in zip(x_data,y_data):
                z1 = self.w1 * x[0] + self.w2 * x[1] + self.b1
                h1 = sigmoid(z1)

                z2 = self.w3 * h1 + self.b2
                y_pred = sigmoid(z2)
                
                d_L_d_ypred = - 2 * (y - y_pred)
                
                #Output Neuron
                d_ypred_d_z2 = sigmoid_derivative(z2)
                d_z2_d_w3 = h1
                d_L_d_w3 = d_L_d_ypred * d_ypred_d_z2 * d_z2_d_w3
                d_L_d_b2 = d_L_d_ypred * d_ypred_d_z2
                
                #Hidden Layer
                d_z2_d_h1 = self.w3
                d_h1_d_z1 = sigmoid_derivative(z1)
                d_z1_d_w2 = x[1]
                d_z1_d_w1 = x[0]
                d_L_d_w1 = d_L_d_ypred * d_ypred_d_z2 * d_z2_d_h1 * d_h1_d_z1 * d_z1_d_w1
                d_L_d_w2 = d_L_d_ypred * d_ypred_d_z2 * d_z2_d_h1 * d_h1_d_z1 * d_z1_d_w2
                d_L_d_b1= d_L_d_ypred * d_ypred_d_z2 * d_z2_d_h1 * d_h1_d_z1
                
                #Updates Weights
                self.w1 -= alpha * d_L_d_w1
                self.w2 -= alpha * d_L_d_w2
                self.w3 -= alpha * d_L_d_w3
                self.b1 -= alpha * d_L_d_b1
                self.b2 -= alpha * d_L_d_b2
            if epoch % 100 == 0:
                y_pred =  self.feed_forward(x_data)
                loss = mse_loss(y_data,y_pred)
                print("Epoch %d loss: %.3f" % (epoch, loss))   

<h2><center>Trying to solve a logic OR gate</center></h2>

In [7]:
X = np.array([
    [0,0],
    [1,0],
    [0,1],
    [1,1]
])
Y = np.array([
    0,
    1,
    1,
    1,
])
NN = Perceptron()
NN.train(X,Y,0.3,1000)

Epoch 0 loss: 0.210
Epoch 100 loss: 0.075
Epoch 200 loss: 0.021
Epoch 300 loss: 0.010
Epoch 400 loss: 0.006
Epoch 500 loss: 0.004
Epoch 600 loss: 0.003
Epoch 700 loss: 0.003
Epoch 800 loss: 0.002
Epoch 900 loss: 0.002


In [9]:
NN.feed_forward(X)

array([0.06314576, 0.96466155, 0.96453308, 0.98411097])

<h2><center>Trying to solve a logic OR gate</center></h2>

In [10]:
X = np.array([
    [0,0],
    [1,0],
    [0,1],
    [1,1]
])
Y = np.array([
    0,
    0,
    0,
    1,
])
NN = Perceptron()
NN.train(X,Y,0.3,1000)

Epoch 0 loss: 0.345
Epoch 100 loss: 0.181
Epoch 200 loss: 0.134
Epoch 300 loss: 0.055
Epoch 400 loss: 0.021
Epoch 500 loss: 0.011
Epoch 600 loss: 0.007
Epoch 700 loss: 0.005
Epoch 800 loss: 0.004
Epoch 900 loss: 0.003


In [11]:
NN.feed_forward(X)

array([0.00965879, 0.04609439, 0.04633098, 0.92233412])