### Creating a neural network

In [2]:
import numpy as np

Creating a single neuron to simulate and AND gate

In [3]:
"""Sigmoid can be used as an activation function: The activation function is used 
    to turn an unbounded input into an output that has a nice, predictable form."""

def sigmoid(x):
    return (1/(1 + np.exp(-x)))

# def andGate(z):
#     if(z>0):
#         return 1
#     else:
#         return 0

class Neuron:
    def __init__(self,weights,bias):
        self.weights = weights
        self.bias = bias

    def feedforward(self,inputs):
        total = np.dot(self.weights,inputs) + self.bias #np function for dot product
        # outputs = andGate(total)
        outputs = sigmoid(total)
        return outputs

# inputs1 = np.array([0,1]) 
# inputs2 = np.array([1,1]) 
inputs1 = np.array([2,3]) 

# weights = np.array([1,1])
# bias = -1
weights = np.array([0,1])
bias = 0

n = Neuron(weights,bias)
# print("AND of 0 and 1")
print(n.feedforward(inputs1))
# print("\nAND of 1 and 1")
# print(n.feedforward(inputs2))


0.9525741268224334


Creating a neural network by combining more neurons

In [4]:
class ourNN:
    """A NN with a hidden layer containing two neural networks"""
    def __init__(self):
        self.h1 = Neuron(weights,bias)
        self.h2 = Neuron(weights,bias)
        self.o1 = Neuron(weights,bias)

    def feedforward(self,x):
        out_h1 = self.h1.feedforward(x)
        out_h2 = self.h2.feedforward(x)
        
        out_o1 = self.o1.feedforward(np.array([out_h1,out_h2]))

        return out_o1

NN = ourNN()
# print("AND of 0 and 1")
print(NN.feedforward(inputs1))
# print("\nAND of 1 and 1")
# print(NN.feedforward(inputs2))

0.7216325609518421


Now we create a Loss function to train data (The data used in tutorial is to predict gender based on height, weight) using MSE loss

In [5]:
def mse_loss(y_true, y_pred):
  # y_true and y_pred are numpy arrays of the same length.
  return ((y_true - y_pred) ** 2).mean()

y_true = np.array([1, 1, 1, 1])
y_pred = np.array([0, 0, 0, 0])

mse_loss(y_true, y_pred)

1.0

Now we have to try to minimize the Loss of the neural network by adjusting the weights and biases
### Backpropogation:
We do this based on the equation derived: 
#### dL/dw1 = dL/dy_pred * dy_pred/dh1 * dh1/dw1 
(d is partial derivative here)

h1 = hidden layer 1 output

w1 = weight of hidden layer 1 neuron

We're finding how much the final output(y_pred) is affected by the first weight to the hidden layer neuron

#### Stochastic gradient descent is next used to vary the weights
All we’re doing is subtracting  η*∂L/∂w1 from w1 where η is the learning rate,

If the above quantity is +ve, w1 will decrease, which makes L decrease

If the above quantity is -ve, w1 will increase, which makes L decrease

If we do this for every weight and bias in the network, the loss will slowly decrease and our network will improve.

In [14]:
def deriv_sigmoid(x):
        # Derivative of sigmoid: f'(x) = f(x) * (1 - f(x))
        fx = sigmoid(x)
        return fx * (1 - fx)

class finalNN():
    '''A neural network with:
        - 2 inputs
        - a hidden layer with 2 neurons (h1, h2): with weights w1(associated with weight),
            w2(associated with height) and bias b1 for h1 and so on
        - an output layer with 1 neuron (o1)'''
    def __init__(self):
        #weights
        self.w1 = np.random.normal()
        self.w2 = np.random.normal()
        self.w3 = np.random.normal()
        self.w4 = np.random.normal()
        self.w5 = np.random.normal()
        self.w6 = np.random.normal()

        # Biases
        self.b1 = np.random.normal()
        self.b2 = np.random.normal()
        self.b3 = np.random.normal()

    def feedforward(self, x):
        h1 = sigmoid(self.w1 * x[0] + self.w2 * x[1] + self.b1)
        h2 = sigmoid(self.w3 * x[0] + self.w4 * x[1] + self.b2)
        o1 = sigmoid(self.w5 * h1 + self.w6 * h2 + self.b3)
        return o1    

    def train(self, data, y_actual):

        epochs = 1000
        learn_rate = 0.1

        for epoch in range(epochs):
            for x, y_true in zip(data,y_actual):
                # --- Do a feedforward (we'll need these values later)
                sum_h1 = self.w1 * x[0] + self.w2 * x[1] + self.b1
                h1 = sigmoid(sum_h1)

                sum_h2 = self.w3 * x[0] + self.w4 * x[1] + self.b2
                h2 = sigmoid(sum_h2)

                sum_o1 = self.w5 * h1 + self.w6 * h2 + self.b3
                o1 = sigmoid(sum_o1)
                y_pred = o1

                # --- Calculate partial derivatives.
                # --- Naming: d_L_d_w1 represents "partial L / partial w1"
                d_L_d_ypred = -2 * (y_true - y_pred)

                # Neuron o1
                d_ypred_d_w5 = h1 * deriv_sigmoid(sum_o1)
                d_ypred_d_w6 = h2 * deriv_sigmoid(sum_o1)
                d_ypred_d_b3 = deriv_sigmoid(sum_o1)

                d_ypred_d_h1 = self.w5 * deriv_sigmoid(sum_o1)
                d_ypred_d_h2 = self.w6 * deriv_sigmoid(sum_o1)

                # Neuron h1
                d_h1_d_w1 = x[0] * deriv_sigmoid(sum_h1)
                d_h1_d_w2 = x[1] * deriv_sigmoid(sum_h1)
                d_h1_d_b1 = deriv_sigmoid(sum_h1)

                # Neuron h2
                d_h2_d_w3 = x[0] * deriv_sigmoid(sum_h2)
                d_h2_d_w4 = x[1] * deriv_sigmoid(sum_h2)
                d_h2_d_b2 = deriv_sigmoid(sum_h2)

                # --- Update weights and biases
                # Neuron h1
                self.w1 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_w1
                self.w2 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_w2
                self.b1 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_b1

                # Neuron h2
                self.w3 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_w3
                self.w4 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_w4
                self.b2 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_b2

                # Neuron o1
                self.w5 -= learn_rate * d_L_d_ypred * d_ypred_d_w5
                self.w6 -= learn_rate * d_L_d_ypred * d_ypred_d_w6
                self.b3 -= learn_rate * d_L_d_ypred * d_ypred_d_b3

            # --- Calculate total loss at the end of each epoch
            if epoch % 10 == 0:
                y_preds = np.apply_along_axis(self.feedforward, 1, data)
                loss = mse_loss(y_actual, y_preds)
                print("Epoch %d loss: %.3f" % (epoch, loss))


In [12]:
# Define dataset
data = np.array([
  [-2, -1],  # Alice
  [25, 6],   # Bob
  [17, 4],   # Charlie
  [-15, -6], # Diana
])
all_y_trues = np.array([
  1, # Alice
  0, # Bob
  0, # Charlie
  1, # Diana
])

# Train our neural network!
network = finalNN()
network.train(data, all_y_trues)

Epoch 0 loss: 0.156
Epoch 10 loss: 0.093
Epoch 20 loss: 0.068
Epoch 30 loss: 0.053
Epoch 40 loss: 0.043
Epoch 50 loss: 0.036
Epoch 60 loss: 0.030
Epoch 70 loss: 0.026
Epoch 80 loss: 0.023
Epoch 90 loss: 0.020
Epoch 100 loss: 0.018
Epoch 110 loss: 0.017
Epoch 120 loss: 0.015
Epoch 130 loss: 0.014
Epoch 140 loss: 0.013
Epoch 150 loss: 0.012
Epoch 160 loss: 0.011
Epoch 170 loss: 0.010
Epoch 180 loss: 0.010
Epoch 190 loss: 0.009
Epoch 200 loss: 0.009
Epoch 210 loss: 0.008
Epoch 220 loss: 0.008
Epoch 230 loss: 0.007
Epoch 240 loss: 0.007
Epoch 250 loss: 0.007
Epoch 260 loss: 0.007
Epoch 270 loss: 0.006
Epoch 280 loss: 0.006
Epoch 290 loss: 0.006
Epoch 300 loss: 0.006
Epoch 310 loss: 0.005
Epoch 320 loss: 0.005
Epoch 330 loss: 0.005
Epoch 340 loss: 0.005
Epoch 350 loss: 0.005
Epoch 360 loss: 0.005
Epoch 370 loss: 0.004
Epoch 380 loss: 0.004
Epoch 390 loss: 0.004
Epoch 400 loss: 0.004
Epoch 410 loss: 0.004
Epoch 420 loss: 0.004
Epoch 430 loss: 0.004
Epoch 440 loss: 0.004
Epoch 450 loss: 0.004

In [13]:
# Make some predictions
emily = np.array([-7, -3]) # 128 pounds, 63 inches
frank = np.array([20, 2])  # 155 pounds, 68 inches
print("Emily: %.3f" % network.feedforward(emily)) # 0.951 - F
print("Frank: %.3f" % network.feedforward(frank)) # 0.039 - M

Emily: 0.967
Frank: 0.039
