## This will be a simple, bare-bones feedforward neural network for learning purposes.

Tutorial source:https://victorzhou.com/blog/intro-to-neural-networks/?fbclid=IwAR1nCgd2a1GzX1LvdrT_0cHn3Bl3xFJF7VUrmQw3j9CTMOwjfVGef52phBE

In [1]:
# Let's define our functions (sigmoid activation function, neuron, and feedforward)
import numpy as np

def sigmoid(x):
    # Activation function: f(x) = 1 / (1 + e^(-x))
    return 1 / (1 + np.exp(-x))

class Neuron:
    def __init__(self, weights, bias):
        self.weights = weights
        self.bias = bias
        
    def feedforward(self, inputs):
        # Weight inputs, add bias, then use the activation function
        total = np.dot(self.weights, inputs) + self.bias
        return sigmoid(total)

In [2]:
# Let's test with some sample numbers

weights = np.array([0,1])
bias = 4
n = Neuron(weights, bias)

x = np.array([2, 3])
print(n.feedforward(x))

0.9990889488055994


In [4]:
 # Now let's define our Neural Network class
class OurNeuralNetwork:
    """
    A neural network with
        - two inputs
        - a hidden layer with two neurons (h1, h2)
        - an output layer with one neuron (o1)
    Each neuron has the same weights and biase
        - w = [0, 1]
        - b = 0
    """
    def __init__(self):
        weights = np.array([0, 1])
        bias = 0
        
        self.h1 = Neuron(weights, bias)
        self.h2 = Neuron(weights, bias)
        self.o1 = Neuron(weights, bias)
        
    def feedforward(self, x):
        out_h1 = self.h1.feedforward(x)
        out_h2 = self.h2.feedforward(x)
        
        # Inputs for o1 are the outputs from h1 and h2
        out_o1 = self.o1.feedforward(np.array([out_h1, out_h2]))
        
        return out_o1

In [5]:
network = OurNeuralNetwork()
x = np.array([2, 3])
print(network.feedforward(x))

0.7216325609518421


## Now let's train our neural network to predict someone's gender given their height and weight

We have the following measurements:
- Alice, 133, 65, F
- Bob, 160, 72, M
- Charlie, 152, 70, M
- Diana, 120, 60, F

Let's represent Male with 0 and Female with 1 and shift the data to make it easier to use. Normally you will shift by the mean.

**Note:** With a larger dataset, we should read in our data, then write a function to make this conversion and possibly make an ID column instead of using names.

- Alice, -2, -1, 1
- Bob, 25, 6, 0
- Charlie, 17, 4, 0
- Diana, -15, -6, 1

Before we train our network, let's define how to quantify how good it is. We will use the mean squared error (MSE) for our loss. It finds the difference between the correct answer and our model's predicted answer, squares that number, sums the squares, then divides by the number of samples.


In [6]:
# Define MSE (loss)
def mse_loss(y_true, y_pred):
    # y_true and y_pred are numpy arrays of the same length.
    return((y_true - y_pred) **2).mean()

y_true = np.array([1, 0, 0, 1]) 
y_pred = np.array([0, 0, 0, 0]) # This array would occur if the model predicted all males


print(mse_loss(y_true, y_pred))

0.5


Our training process will use stochastic gradient descent (we only operate on one sample at a time), which will tell us how to change our weights and biases to minimize loss.
The process
* 1 Choose one sample from dataset
* 2 Calculate all the partial derivates of loss with respect to weights or biases.
* 3 Use the update equation to update each weight and bias.
* 4 Go back to step 1.

In [20]:
import numpy as np

def sigmoid(x):
    # Sigmoid activation function: f(x) = 1 / (1 + e^(-x))
    return 1 / (1 + np.exp(-x))

def deriv_sigmoid(x):
    # Derivative of sigmoid: f'(x) = f(x) * (1 - f(x))
    fx = sigmoid(x)
    return fx * (1 - fx)

class OurNeuralNetwork:
    def __init__(self):
        # Weights
        self.w1 = np.random.normal()
        self.w2 = np.random.normal()
        self.w3 = np.random.normal()
        self.w4 = np.random.normal()
        self.w5 = np.random.normal()
        self.w6 = np.random.normal()

        # Biases
        self.b1 = np.random.normal()
        self.b2 = np.random.normal()
        self.b3 = np.random.normal()

    def feedforward(self, x):
        # x is a numpy array with 2 elements.
        h1 = sigmoid(self.w1 * x[0] + self.w2 * x[1] + self.b1)
        h2 = sigmoid(self.w3 * x[0] + self.w4* x[1] + self.b2)
        o1 = sigmoid(self.w5 * h1 + self.w6 * h2 + self.b3)
        return o1

    def train(self, data, all_y_trues):
        """
        - data is a (n x 2) numpy array, n = # of samples in the dataset.
        - all_y_trues is a numpy array with n elements.
          Elements in all_y_trues correspond to those in data.
          """
        learn_rate = 0.1
        epochs = 1000 # number of times to loop through the entire dataset

        for epoch in range(epochs):
            for x, y_true in zip(data, all_y_trues):
                # ---Do a feedforward (we'll need these values later)
                sum_h1 = self.w1 * x[0] + self.w2 * x[1] + self.b2
                h1 = sigmoid(sum_h1)

                sum_h2 = self.w3 * x[0] + self.w4 * x[1] + self.b2
                h2 = sigmoid(sum_h2)

                sum_o1 = self.w5 * h1 + self.w6 * h2 + self.b3
                o1 = sigmoid(sum_o1)
                y_pred = o1

                # --- Calculate partial derivatives
                # --- naming represents "partial L / partial w1"
                d_L_d_ypred = -2 * (y_true - y_pred)

                # Neuron o1
                d_ypred_d_w5 = h1 * deriv_sigmoid(sum_o1)
                d_ypred_d_w6 = h2 * deriv_sigmoid(sum_o1)
                d_ypred_d_b3 = deriv_sigmoid(sum_o1)

                d_ypred_d_h1 = self.w5 * deriv_sigmoid(sum_o1)
                d_ypred_d_h2 = self.w6 * deriv_sigmoid(sum_o1)

                # Neuron h1
                d_h1_d_w1 = x[0] * deriv_sigmoid(sum_h1)
                d_h1_d_w2 = x[1] * deriv_sigmoid(sum_h1)
                d_h1_d_b1 = deriv_sigmoid(sum_h1)

                # Neuron h2
                d_h2_d_w3 = x[0] * deriv_sigmoid(sum_h2)
                d_h2_d_w4 = x[1] * deriv_sigmoid(sum_h2)
                d_h2_d_b2 = deriv_sigmoid(sum_h2)

                # --- Update weights and biases
                # Neuron h1
                self.w1 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_w1
                self.w2 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_w2
                self.b1 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_b1

                # Neuron h2
                self.w3 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_w3
                self.w4 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_w4
                self.b2 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_b2

                # Neuron o1
                self.w5 -= learn_rate * d_L_d_ypred * d_ypred_d_w5
                self.w6 -= learn_rate * d_L_d_ypred * d_ypred_d_w6
                self.b3 -= learn_rate * d_L_d_ypred * d_ypred_d_b3

                # --- Calculate total loss at the end of each epoch
                if epoch % 10 == 0:
                    y_preds = np.apply_along_axis(self.feedforward, 1, data)
                    loss = mse_loss(all_y_trues, y_preds)
                    print("Epoch %d loss: %.3f" % (epoch, loss))         
            

In [21]:
# Define dataset
data = np.array([
    [-2, -1], # Alice
    [25, 6], # Bob
    [17, 4], # Charlie
    [-15, -6], # Diana
])
all_y_trues = np.array([
    1,
    0,
    0,
    1
])

# Train our neural network!
network = OurNeuralNetwork()
network.train(data, all_y_trues)
    

Epoch 0 loss: 0.526
Epoch 0 loss: 0.526
Epoch 0 loss: 0.525
Epoch 0 loss: 0.522
Epoch 10 loss: 0.422
Epoch 10 loss: 0.422
Epoch 10 loss: 0.422
Epoch 10 loss: 0.415
Epoch 20 loss: 0.298
Epoch 20 loss: 0.298
Epoch 20 loss: 0.297
Epoch 20 loss: 0.292
Epoch 30 loss: 0.223
Epoch 30 loss: 0.222
Epoch 30 loss: 0.221
Epoch 30 loss: 0.219
Epoch 40 loss: 0.175
Epoch 40 loss: 0.173
Epoch 40 loss: 0.172
Epoch 40 loss: 0.171
Epoch 50 loss: 0.138
Epoch 50 loss: 0.137
Epoch 50 loss: 0.136
Epoch 50 loss: 0.136
Epoch 60 loss: 0.111
Epoch 60 loss: 0.110
Epoch 60 loss: 0.109
Epoch 60 loss: 0.109
Epoch 70 loss: 0.090
Epoch 70 loss: 0.089
Epoch 70 loss: 0.089
Epoch 70 loss: 0.088
Epoch 80 loss: 0.074
Epoch 80 loss: 0.074
Epoch 80 loss: 0.073
Epoch 80 loss: 0.073
Epoch 90 loss: 0.062
Epoch 90 loss: 0.062
Epoch 90 loss: 0.062
Epoch 90 loss: 0.062
Epoch 100 loss: 0.053
Epoch 100 loss: 0.053
Epoch 100 loss: 0.053
Epoch 100 loss: 0.053
Epoch 110 loss: 0.046
Epoch 110 loss: 0.046
Epoch 110 loss: 0.046
Epoch 110 

## Let's make some predictions!

In [22]:
emily = np.array([-7, -3]) # 128 pounds, 63 inches
frank = np.array([20, 2]) # 155 pounds, 68 inches
print("Emily: %.3f" % network.feedforward(emily))
print("Frank: %.3f" % network.feedforward(frank))

Emily: 0.967
Frank: 0.057
