In [0]:
import numpy as np

def sigmoid(x):
  # Our activation function: f(x) = 1 / (1 + e^(-x))
  return 1 / (1 + np.exp(-x))

class Neuron:
  def __init__(self, weights, bias):
    self.weights = weights
    self.bias = bias

  def feedforward(self, inputs):
    # Weight inputs, add bias, then use the activation function
    total = np.dot(self.weights, inputs) + self.bias
    return sigmoid(total)


In [4]:
class OurNeuralNetwork:
  '''
  A neural network with:
    - 2 inputs
    - a hidden layer with 2 neurons (h1, h2)
    - an output layer with 1 neuron (o1)
  Each neuron has the same weights and bias:
    - w = [0, 1]
    - b = 0
  '''
  def __init__(self):
    weights = np.array([0, 1])
    bias = 0

    # The Neuron class here is from the previous section
    self.h1 = Neuron(weights, bias)
    self.h2 = Neuron(weights, bias)
    self.o1 = Neuron(weights, bias)

  def feedforward(self, x):
    out_h1 = self.h1.feedforward(x)
    out_h2 = self.h2.feedforward(x)

    # The inputs for o1 are the outputs from h1 and h2
    out_o1 = self.o1.feedforward(np.array([out_h1, out_h2]))

    return out_o1

network = OurNeuralNetwork()
x = np.array([2, 3])
print(network.feedforward(x)) # 0.7216325609518421

0.7216325609518421


Relu activation function


In [0]:
def relu(x):
  return max(0, x)

def deriv_relu(x):
  k  = relu(x)
  if(k > 0):
    return 1
  return 0

Define a small dataset

In [0]:
# Define dataset
data = np.array([
  [-2, -1],  # Alice
  [25, 6],   # Bob
  [17, 4],   # Charlie
  [-15, -6], # Diana
])
all_y_trues = np.array([
  1, # Alice
  0, # Bob
  0, # Charlie
  1, # Diana
])


Sigmoid and mse

In [0]:
def sigmoid(x):
  # Sigmoid activation function: f(x) = 1 / (1 + e^(-x))
  return 1 / (1 + np.exp(-x))

def deriv_sigmoid(x):
  # Derivative of sigmoid: f'(x) = f(x) * (1 - f(x))
  fx = sigmoid(x)
  return fx * (1 - fx)

def mse_loss(y_true, y_pred):
  # y_true and y_pred are numpy arrays of the same length.
  return ((y_true - y_pred) ** 2).mean()

Default neural network

In [0]:
import numpy as np



class OurNeuralNetwork:
  '''
  A neural network with:
    - 2 inputs
    - a hidden layer with 2 neurons (h1, h2)
    - an output layer with 1 neuron (o1)
  '''
  def __init__(self):
    # Weights
    self.w1 = np.random.normal()
    self.w2 = np.random.normal()
    self.w3 = np.random.normal()
    self.w4 = np.random.normal()
    self.w5 = np.random.normal()
    self.w6 = np.random.normal()

    # Biases
    self.b1 = np.random.normal()
    self.b2 = np.random.normal()
    self.b3 = np.random.normal()

  def feedforward(self, x):
    # x is a numpy array with 2 elements.
    h1 = sigmoid(self.w1 * x[0] + self.w2 * x[1] + self.b1)
    h2 = sigmoid(self.w3 * x[0] + self.w4 * x[1] + self.b2)
    o1 = sigmoid(self.w5 * h1 + self.w6 * h2 + self.b3)
    return o1

  def train(self, data, all_y_trues):
    '''
    - data is a (n x 2) numpy array, n = # of samples in the dataset.
    - all_y_trues is a numpy array with n elements.
      Elements in all_y_trues correspond to those in data.
    '''
    learn_rate = 0.1
    epochs = 1000 # number of times to loop through the entire dataset

    for epoch in range(epochs):
      for x, y_true in zip(data, all_y_trues):
        # --- Do a feedforward (we'll need these values later)
        sum_h1 = self.w1 * x[0] + self.w2 * x[1] + self.b1
        h1 = sigmoid(sum_h1)

        sum_h2 = self.w3 * x[0] + self.w4 * x[1] + self.b2
        h2 = sigmoid(sum_h2)

        sum_o1 = self.w5 * h1 + self.w6 * h2 + self.b3
        o1 = sigmoid(sum_o1)
        y_pred = o1

        # --- Calculate partial derivatives.
        # --- Naming: d_L_d_w1 represents "partial L / partial w1"
        d_L_d_ypred = -2 * (y_true - y_pred)

        # Neuron o1
        d_ypred_d_w5 = h1 * deriv_sigmoid(sum_o1)
        d_ypred_d_w6 = h2 * deriv_sigmoid(sum_o1)
        d_ypred_d_b3 = deriv_sigmoid(sum_o1)

        d_ypred_d_h1 = self.w5 * deriv_sigmoid(sum_o1)
        d_ypred_d_h2 = self.w6 * deriv_sigmoid(sum_o1)

        # Neuron h1
        d_h1_d_w1 = x[0] * deriv_sigmoid(sum_h1)
        d_h1_d_w2 = x[1] * deriv_sigmoid(sum_h1)
        d_h1_d_b1 = deriv_sigmoid(sum_h1)

        # Neuron h2
        d_h2_d_w3 = x[0] * deriv_sigmoid(sum_h2)
        d_h2_d_w4 = x[1] * deriv_sigmoid(sum_h2)
        d_h2_d_b2 = deriv_sigmoid(sum_h2)

        # --- Update weights and biases
        # Neuron h1
        self.w1 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_w1
        self.w2 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_w2
        self.b1 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_b1

        # Neuron h2
        self.w3 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_w3
        self.w4 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_w4
        self.b2 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_b2

        # Neuron o1
        self.w5 -= learn_rate * d_L_d_ypred * d_ypred_d_w5
        self.w6 -= learn_rate * d_L_d_ypred * d_ypred_d_w6
        self.b3 -= learn_rate * d_L_d_ypred * d_ypred_d_b3

      # --- Calculate total loss at the end of each epoch
      if epoch % 10 == 0:
        y_preds = np.apply_along_axis(self.feedforward, 1, data)
        loss = mse_loss(all_y_trues, y_preds)
        print("Epoch %d loss: %.3f" % (epoch, loss))



In [0]:
# Train our neural network!
network = OurNeuralNetwork()
network.train(data, all_y_trues)
emily = np.array([-7, -3]) # 128 pounds, 63 inches
frank = np.array([20, 2])  # 155 pounds, 68 inches
claudia = np.array([-12, 2])
jon = np.array([10, 7])
print("Emily: %.3f" % network.feedforward(emily)) # 0.951 - F
print("Frank: %.3f" % network.feedforward(frank)) # 0.039 - M
print("Claudi: %.3f" % network.feedforward(claudia)) # 0.951 - F
print("Jon: %.3f" % network.feedforward(jon)) # 0.039 - M

Neural network with relu activation function for the hidden layer

In [0]:
class OurNeuralNetwork2:
  '''
    The relu activation can be used only inside the network (in the hidden layers). 
    Considering that the output should be a value in [0, 1] that represents the probability of the person
    being a woman, relu output is inapropriate. I used relu for the hidden layer and sigmoid for the output layer

  '''
  def __init__(self):
    # Weights
    self.w1 = np.random.normal()
    self.w2 = np.random.normal()
    self.w3 = np.random.normal()
    self.w4 = np.random.normal()
    self.w5 = np.random.normal()
    self.w6 = np.random.normal()

    # Biases
    self.b1 = np.random.normal()
    self.b2 = np.random.normal()
    self.b3 = np.random.normal()

  def feedforward(self, x):
    # x is a numpy array with 2 elements.
    h1 = relu(self.w1 * x[0] + self.w2 * x[1] + self.b1)
    h2 = relu(self.w3 * x[0] + self.w4 * x[1] + self.b2)
    o1 = sigmoid(self.w5 * h1 + self.w6 * h2 + self.b3)
    return o1

  def train(self, data, all_y_trues):
    learn_rate = 0.10
    epochs = 500 # number of times to loop through the entire dataset

    for epoch in range(epochs):
      for x, y_true in zip(data, all_y_trues):
        # --- Do a feedforward (we'll need these values later)
        sum_h1 = self.w1 * x[0] + self.w2 * x[1] + self.b1
        h1 =relu(sum_h1)

        sum_h2 = self.w3 * x[0] + self.w4 * x[1] + self.b2
        h2 =relu(sum_h2)

        sum_o1 = self.w5 * h1 + self.w6 * h2 + self.b3
        o1 =sigmoid(sum_o1)
        y_pred = o1

        # --- Calculate partial derivatives.
        # --- Naming: d_L_d_w1 represents "partial L / partial w1"
        d_L_d_ypred = -2 * (y_true - y_pred)

        # Neuron o1
        d_ypred_d_w5 = h1 * deriv_sigmoid(sum_o1)
        d_ypred_d_w6 = h2 * deriv_sigmoid(sum_o1)
        d_ypred_d_b3 = deriv_sigmoid(sum_o1)

        d_ypred_d_h1 = self.w5 * deriv_relu(sum_o1)
        d_ypred_d_h2 = self.w6 * deriv_relu(sum_o1)

        # Neuron h1
        d_h1_d_w1 = x[0] * deriv_relu(sum_h1)
        d_h1_d_w2 = x[1] * deriv_relu(sum_h1)
        d_h1_d_b1 = deriv_relu(sum_h1)

        # Neuron h2
        d_h2_d_w3 = x[0] * deriv_relu(sum_h2)
        d_h2_d_w4 = x[1] * deriv_relu(sum_h2)
        d_h2_d_b2 = deriv_relu(sum_h2)

        # --- Update weights and biases
        # Neuron h1
        self.w1 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_w1
        self.w2 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_w2
        self.b1 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_b1

        # Neuron h2
        self.w3 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_w3
        self.w4 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_w4
        self.b2 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_b2

        # Neuron o1
        self.w5 -= learn_rate * d_L_d_ypred * d_ypred_d_w5
        self.w6 -= learn_rate * d_L_d_ypred * d_ypred_d_w6
        self.b3 -= learn_rate * d_L_d_ypred * d_ypred_d_b3

      # --- Calculate total loss at the end of each epoch
      if epoch % 10 == 0:
        y_preds = np.apply_along_axis(self.feedforward, 1, data)
        #print(y_preds)
        #print(all_y_trues)
        loss = mse_loss(all_y_trues, y_preds)
        #print(str(self.w1) + " " + str(self.w2) + " " + str(self.w3) + " " + str(self.w4)+ " " + str(self.w5)+ " " + str(self.w6)+ " ")
        print("Epoch %d loss: %.3f" % (epoch, loss))

In [161]:
network2 = OurNeuralNetwork2()
network2.train(data, all_y_trues)
print("Emily: %.3f" % network2.feedforward(emily)) # 0.951 - F
print("Frank: %.3f" % network2.feedforward(frank)) # 0.039 - M
print("Claudi: %.3f" % network2.feedforward(claudia)) # 0.951 - F
print("Jon: %.3f" % network2.feedforward(jon)) # 0.039 - M

Epoch 0 loss: 0.395
Epoch 10 loss: 0.357
Epoch 20 loss: 0.297
Epoch 30 loss: 0.217
Epoch 40 loss: 0.140
Epoch 50 loss: 0.087
Epoch 60 loss: 0.057
Epoch 70 loss: 0.040
Epoch 80 loss: 0.030
Epoch 90 loss: 0.023
Epoch 100 loss: 0.019
Epoch 110 loss: 0.016
Epoch 120 loss: 0.013
Epoch 130 loss: 0.012
Epoch 140 loss: 0.010
Epoch 150 loss: 0.009
Epoch 160 loss: 0.008
Epoch 170 loss: 0.008
Epoch 180 loss: 0.007
Epoch 190 loss: 0.006
Epoch 200 loss: 0.006
Epoch 210 loss: 0.005
Epoch 220 loss: 0.005
Epoch 230 loss: 0.005
Epoch 240 loss: 0.005
Epoch 250 loss: 0.004
Epoch 260 loss: 0.004
Epoch 270 loss: 0.004
Epoch 280 loss: 0.004
Epoch 290 loss: 0.003
Epoch 300 loss: 0.003
Epoch 310 loss: 0.003
Epoch 320 loss: 0.003
Epoch 330 loss: 0.003
Epoch 340 loss: 0.003
Epoch 350 loss: 0.003
Epoch 360 loss: 0.003
Epoch 370 loss: 0.003
Epoch 380 loss: 0.002
Epoch 390 loss: 0.002
Epoch 400 loss: 0.002
Epoch 410 loss: 0.002
Epoch 420 loss: 0.002
Epoch 430 loss: 0.002
Epoch 440 loss: 0.002
Epoch 450 loss: 0.002

Add one more neuron in the hidden layer

In [0]:
class OurNeuralNetwork3:
  '''
  This neural network has an extra neuron in the hidden layer
  '''
  def __init__(self):
    # Weights
    self.w1 = np.random.normal()
    self.w2 = np.random.normal()
    self.w3 = np.random.normal()
    self.w4 = np.random.normal()
    self.w5 = np.random.normal()
    self.w6 = np.random.normal()
    self.w7 = np.random.normal()
    self.w8 = np.random.normal()
    self.w9 = np.random.normal()

    # Biases
    self.b1 = np.random.normal()
    self.b2 = np.random.normal()
    self.b3 = np.random.normal()
    self.b4 = np.random.normal()

  def feedforward(self, x):
    # x is a numpy array with 2 elements.
    h1 = sigmoid(self.w1 * x[0] + self.w2 * x[1] + self.b1)
    h2 = sigmoid(self.w3 * x[0] + self.w4 * x[1] + self.b2)
    h3 = sigmoid(self.w7 * x[0] + self.w8 * x[1] + self.b4)
    o1 = sigmoid(self.w5 * h1 + self.w6 * h2 + self.w9 * h3 + self.b3)
    return o1

  def train(self, data, all_y_trues):
    '''
    - data is a (n x 2) numpy array, n = # of samples in the dataset.
    - all_y_trues is a numpy array with n elements.
      Elements in all_y_trues correspond to those in data.
    '''
    learn_rate = 0.001
    epochs = 1000 # number of times to loop through the entire dataset

    for epoch in range(epochs):
      for x, y_true in zip(data, all_y_trues):
        # --- Do a feedforward (we'll need these values later)
        sum_h1 = self.w1 * x[0] + self.w2 * x[1] + self.b1
        h1 =sigmoid(sum_h1)

        sum_h2 = self.w3 * x[0] + self.w4 * x[1] + self.b2
        h2 =sigmoid(sum_h2)

        sum_h3 = self.w7 * x[0] + self.w8 * x[1] + self.b4
        h3 =sigmoid(sum_h3)

        sum_o1 = self.w5 * h1 + self.w6 * h2 + self.w8 * h3 + self.b3
        o1 =sigmoid(sum_o1)
        y_pred = o1

        # --- Calculate partial derivatives.
        # --- Naming: d_L_d_w1 represents "partial L / partial w1"
        d_L_d_ypred = -2 * (y_true - y_pred)

        # Neuron o1
        d_ypred_d_w5 = h1 * deriv_sigmoid(sum_o1)
        d_ypred_d_w6 = h2 * deriv_sigmoid(sum_o1)
        d_ypred_d_w9 = h3 * deriv_sigmoid(sum_o1)
        d_ypred_d_b3 = deriv_sigmoid(sum_o1)

        d_ypred_d_h1 = self.w5 * deriv_sigmoid(sum_o1)
        d_ypred_d_h2 = self.w6 * deriv_sigmoid(sum_o1)
        d_ypred_d_h3 = self.w9 * deriv_sigmoid(sum_o1)

        # Neuron h1
        d_h1_d_w1 = x[0] * deriv_sigmoid(sum_h1)
        d_h1_d_w2 = x[1] * deriv_sigmoid(sum_h1)
        d_h1_d_b1 = deriv_sigmoid(sum_h1)

        # Neuron h2
        d_h2_d_w3 = x[0] * deriv_sigmoid(sum_h2)
        d_h2_d_w4 = x[1] * deriv_sigmoid(sum_h2)
        d_h2_d_b2 = deriv_sigmoid(sum_h2)


        # Neuron h3
        d_h3_d_w7 = x[0] * deriv_sigmoid(sum_h3)
        d_h3_d_w8 = x[1] * deriv_sigmoid(sum_h3)
        d_h3_d_b4 = deriv_sigmoid(sum_h3)

        # --- Update weights and biases
        # Neuron h1
        self.w1 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_w1
        self.w2 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_w2
        self.b1 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_b1

        # Neuron h2
        self.w3 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_w3
        self.w4 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_w4
        self.b2 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_b2

        # Neuron h3
        self.w7 -= learn_rate * d_L_d_ypred * d_ypred_d_h3 * d_h3_d_w7
        self.w8 -= learn_rate * d_L_d_ypred * d_ypred_d_h3 * d_h3_d_w8
        self.b4 -= learn_rate * d_L_d_ypred * d_ypred_d_h3 * d_h3_d_b4

        # Neuron o1
        self.w5 -= learn_rate * d_L_d_ypred * d_ypred_d_w5
        self.w6 -= learn_rate * d_L_d_ypred * d_ypred_d_w6
        self.w9 -= learn_rate * d_L_d_ypred * d_ypred_d_w9
        self.b3 -= learn_rate * d_L_d_ypred * d_ypred_d_b3

      # --- Calculate total loss at the end of each epoch
      if epoch % 10 == 0:
        y_preds = np.apply_along_axis(self.feedforward, 1, data)
        loss = mse_loss(all_y_trues, y_preds)
        print("Epoch %d loss: %.3f" % (epoch, loss))
    

In [163]:
network3 = OurNeuralNetwork3()
network3.train(data, all_y_trues)
print("Emily: %.3f" % network3.feedforward(emily)) # 0.951 - F
print("Frank: %.3f" % network3.feedforward(frank)) # 0.039 - M
print("Claudi: %.3f" % network3.feedforward(claudia)) # 0.951 - F
print("Jon: %.3f" % network3.feedforward(jon)) # 0.039 - M

Epoch 0 loss: 0.782
Epoch 10 loss: 0.780
Epoch 20 loss: 0.779
Epoch 30 loss: 0.778
Epoch 40 loss: 0.776
Epoch 50 loss: 0.775
Epoch 60 loss: 0.773
Epoch 70 loss: 0.772
Epoch 80 loss: 0.771
Epoch 90 loss: 0.769
Epoch 100 loss: 0.768
Epoch 110 loss: 0.767
Epoch 120 loss: 0.765
Epoch 130 loss: 0.764
Epoch 140 loss: 0.762
Epoch 150 loss: 0.761
Epoch 160 loss: 0.760
Epoch 170 loss: 0.758
Epoch 180 loss: 0.757
Epoch 190 loss: 0.756
Epoch 200 loss: 0.754
Epoch 210 loss: 0.753
Epoch 220 loss: 0.751
Epoch 230 loss: 0.750
Epoch 240 loss: 0.748
Epoch 250 loss: 0.747
Epoch 260 loss: 0.746
Epoch 270 loss: 0.744
Epoch 280 loss: 0.743
Epoch 290 loss: 0.741
Epoch 300 loss: 0.740
Epoch 310 loss: 0.738
Epoch 320 loss: 0.737
Epoch 330 loss: 0.735
Epoch 340 loss: 0.734
Epoch 350 loss: 0.733
Epoch 360 loss: 0.731
Epoch 370 loss: 0.730
Epoch 380 loss: 0.728
Epoch 390 loss: 0.727
Epoch 400 loss: 0.725
Epoch 410 loss: 0.724
Epoch 420 loss: 0.722
Epoch 430 loss: 0.721
Epoch 440 loss: 0.719
Epoch 450 loss: 0.718

Add an extra hidden layer. For the begining, I used the same model as above, writing the functions for each neuron.

In [0]:
class OurNeuralNetwork41:
  def __init__(self):
    # Weights
    self.w1 = np.random.normal()
    self.w2 = np.random.normal()
    self.w3 = np.random.normal()
    self.w4 = np.random.normal()
    self.w5 = np.random.normal()
    self.w6 = np.random.normal()
    self.w7 = np.random.normal()
    self.w8 = np.random.normal()
    self.w9 = np.random.normal()
    self.w10 = np.random.normal()

    # Biases
    self.b1 = np.random.normal()
    self.b2 = np.random.normal()
    self.b3 = np.random.normal()
    self.b4 = np.random.normal()
    self.b5 = np.random.normal()

  def feedforward(self, x):
    # x is a numpy array with 2 elements.
    h1 = sigmoid(self.w1 * x[0] + self.w2 * x[1] + self.b1)
    h2 = sigmoid(self.w3 * x[0] + self.w4 * x[1] + self.b2)
    h3 = sigmoid(self.w5 * h1 + self.w6 * h2 + self.b3)
    h4 = sigmoid(self.w7 * h1 + self.w8 * h2 + self.b4)
    o1 = sigmoid(self.w9 * h3 + self.w10 * h4 + self.b5)
    return o1

  def train(self, data, all_y_trues):
    
    learn_rate = 0.1
    epochs = 1000 # number of times to loop through the entire dataset

    for epoch in range(epochs):
      for x, y_true in zip(data, all_y_trues):
        # --- Do a feedforward (we'll need these values later)
        sum_h1 = self.w1 * x[0] + self.w2 * x[1] + self.b1
        h1 = sigmoid(sum_h1)

        sum_h2 = self.w3 * x[0] + self.w4 * x[1] + self.b2
        h2 = sigmoid(sum_h2)

        sum_h3 = self.w5 * h1 + self.w6 * h2 + self.b3
        h3 = sigmoid(sum_h3)

        sum_h4 = self.w7 * h1 + self.w8 * h2 + self.b4
        h4 = sigmoid(sum_h4)

        sum_o1 = self.w9 * h3 + self.w10 * h4 + self.b5
        o1 = sigmoid(sum_o1)
        y_pred = o1

        # --- Calculate partial derivatives.
        # --- Naming: d_L_d_w1 represents "partial L / partial w1"
        d_L_d_ypred = -2 * (y_true - y_pred)

        # Neuron o1
        d_ypred_d_w9 = h3 * deriv_sigmoid(sum_o1)
        d_ypred_d_w10 = h4 * deriv_sigmoid(sum_o1)
        d_ypred_d_b5 = deriv_sigmoid(sum_o1)
        d_ypred_d_h3 = self.w9 * deriv_sigmoid(sum_o1)
        d_ypred_d_h4 = self.w10 * deriv_sigmoid(sum_o1)

        #neuron h3
        d_h3_d_w5 = h1 * deriv_sigmoid(sum_h3)
        d_h3_d_w6 = h2 * deriv_sigmoid(sum_h3)
        d_h3_d_h1 = self.w5 * deriv_sigmoid(sum_h3)
        d_h3_d_h2 = self.w6 * deriv_sigmoid(sum_h3)
        d_h3_d_b3 = deriv_sigmoid(sum_h3)

        #neuron h4
        d_h4_d_w7 = h1 * deriv_sigmoid(sum_h4)
        d_h4_d_w8 = h2 * deriv_sigmoid(sum_h4)
        d_h4_d_h1 = self.w7 * deriv_sigmoid(sum_h4)
        d_h4_d_h2 = self.w8 * deriv_sigmoid(sum_h4)
        d_h4_d_b4 = deriv_sigmoid(sum_h4)
        
        # Neuron h1
        d_h1_d_w1 = x[0] * deriv_sigmoid(sum_h1)
        d_h1_d_w2 = x[1] * deriv_sigmoid(sum_h1)
        d_h1_d_b1 = deriv_sigmoid(sum_h1)

        # Neuron h2
        d_h2_d_w3 = x[0] * deriv_sigmoid(sum_h2)
        d_h2_d_w4 = x[1] * deriv_sigmoid(sum_h2)
        d_h2_d_b2 = deriv_sigmoid(sum_h2)

        # --- Update weights and biases
        # Neuron h1
        self.w1 -= learn_rate * d_L_d_ypred * (d_ypred_d_h3 * d_h3_d_h1 +  d_ypred_d_h4 * d_h4_d_h1) * d_h1_d_w1 
        self.w2 -= learn_rate * d_L_d_ypred *(d_ypred_d_h3 * d_h3_d_h1 +  d_ypred_d_h4 * d_h4_d_h1) * d_h1_d_w2
        self.b1 -= learn_rate * d_L_d_ypred * (d_ypred_d_h3 * d_h3_d_h1 +  d_ypred_d_h4 * d_h4_d_h1) * d_h1_d_b1
        # Neuron h2
        self.w3 -= learn_rate * d_L_d_ypred * (d_ypred_d_h3 * d_h3_d_h2 +  d_ypred_d_h4 * d_h4_d_h2) * d_h2_d_w3 
        self.w4 -= learn_rate * d_L_d_ypred *  (d_ypred_d_h3 * d_h3_d_h2 +  d_ypred_d_h4 * d_h4_d_h2) * d_h2_d_w4
        self.b2 -= learn_rate * d_L_d_ypred *  (d_ypred_d_h3 * d_h3_d_h2 +  d_ypred_d_h4 * d_h4_d_h2) * d_h2_d_b2

        # Neuron h3
        self.w5 -= learn_rate * d_L_d_ypred * d_ypred_d_h3 * d_h3_d_w5
        self.w6 -= learn_rate * d_L_d_ypred * d_ypred_d_h3 * d_h3_d_w6
        self.b3 -= learn_rate * d_L_d_ypred * d_ypred_d_h3 * d_h3_d_b3

        # Neuron h4
        self.w7 -= learn_rate * d_L_d_ypred * d_ypred_d_h4 * d_h4_d_w7
        self.w8 -= learn_rate * d_L_d_ypred * d_ypred_d_h4 * d_h4_d_w8
        self.b4 -= learn_rate * d_L_d_ypred * d_ypred_d_h4 * d_h4_d_b4

        # Neuron o1
        self.w9 -= learn_rate * d_L_d_ypred * d_ypred_d_w9
        self.w10 -= learn_rate * d_L_d_ypred * d_ypred_d_w10
        self.b5 -= learn_rate * d_L_d_ypred * d_ypred_d_b5

      # --- Calculate total loss at the end of each epoch
      if epoch % 10 == 0:
        y_preds = np.apply_along_axis(self.feedforward, 1, data)
        loss = mse_loss(all_y_trues, y_preds)
        print("Epoch %d loss: %.3f" % (epoch, loss))

In [165]:
network41 = OurNeuralNetwork41()
network41.train(data, all_y_trues)
emily = np.array([-7, -3]) # 128 pounds, 63 inches
frank = np.array([20, 2])  # 155 pounds, 68 inches
claudia = np.array([-12, 2])
jon = np.array([10, 7])
print("Emily: %.3f" % network41.feedforward(emily)) # 0.951 - F
print("Frank: %.3f" % network41.feedforward(frank)) # 0.039 - M
print("Claudi: %.3f" % network41.feedforward(claudia)) # 0.951 - F
print("Jon: %.3f" % network41
      .feedforward(jon)) # 0.039 - M


Epoch 0 loss: 0.386
Epoch 10 loss: 0.336
Epoch 20 loss: 0.285
Epoch 30 loss: 0.257
Epoch 40 loss: 0.246
Epoch 50 loss: 0.240
Epoch 60 loss: 0.234
Epoch 70 loss: 0.227
Epoch 80 loss: 0.219
Epoch 90 loss: 0.208
Epoch 100 loss: 0.195
Epoch 110 loss: 0.181
Epoch 120 loss: 0.165
Epoch 130 loss: 0.149
Epoch 140 loss: 0.133
Epoch 150 loss: 0.117
Epoch 160 loss: 0.103
Epoch 170 loss: 0.090
Epoch 180 loss: 0.079
Epoch 190 loss: 0.069
Epoch 200 loss: 0.061
Epoch 210 loss: 0.054
Epoch 220 loss: 0.048
Epoch 230 loss: 0.043
Epoch 240 loss: 0.038
Epoch 250 loss: 0.034
Epoch 260 loss: 0.031
Epoch 270 loss: 0.028
Epoch 280 loss: 0.026
Epoch 290 loss: 0.024
Epoch 300 loss: 0.022
Epoch 310 loss: 0.021
Epoch 320 loss: 0.019
Epoch 330 loss: 0.018
Epoch 340 loss: 0.017
Epoch 350 loss: 0.016
Epoch 360 loss: 0.015
Epoch 370 loss: 0.014
Epoch 380 loss: 0.013
Epoch 390 loss: 0.012
Epoch 400 loss: 0.012
Epoch 410 loss: 0.011
Epoch 420 loss: 0.011
Epoch 430 loss: 0.010
Epoch 440 loss: 0.010
Epoch 450 loss: 0.009

I tried to implement the neural network with matrix.

In [0]:
class OurNeuralNetwork4:
  '''
  This neural network has 2 hidden layers
  '''
  def __init__(self):
    # Weights
    self.w1 = np.random.rand(2, 2)
    self.w2 = np.random.rand(2, 2)
    self.w3 = np.random.rand(1, 2)
    
    # Biases
    self.b1 = np.random.rand(2, 1)
    self.b2 = np.random.rand(2, 1)
    self.b3 = np.random.normal()

  def feedforward(self, x):
    # x is a numpy array with 2 elements.
    x = x.reshape(2,1)
    l1 = sigmoid(np.dot(self.w1 , x) + self.b1)
    l2 = sigmoid(np.dot(self.w2 , l1) + self.b2)
    o1 = sigmoid(np.dot(self.w3 , l2) + self.b3)
    o1 = o1[0][0]

    return o1
  def train(self, data, all_y_trues):
    learn_rate = 0.1
    epochs = 1000 # number of times to loop through the entire dataset
    for epoch in range(epochs):
      for x, y_true in zip(data, all_y_trues):
        x = x.reshape(2,1)
        sum_l1 = np.dot(self.w1 , x) + self.b1
        l1 = sigmoid(sum_l1)
        sum_l2 = np.dot(self.w2 , l1) + self.b2
        l2 = sigmoid(sum_l2)
        sum_o1 = np.dot(self.w3 , l2) + self.b3
        o1 = sigmoid(sum_o1)
        y_pred = o1[0][0]
        d_L_d_ypred = -2 * (y_true - y_pred)
        d_ypred_d_w3 = np.dot(l2 , deriv_sigmoid(sum_o1))
        d_ypred_d_b3 = deriv_sigmoid(sum_o1)
        d_ypred_d_l2 = self.w3 * deriv_sigmoid(sum_o1)

        d_l2_d_w2 = np.dot(deriv_sigmoid(sum_l2), l1.transpose())
        d_l2_d_l1 = self.w2 * deriv_sigmoid(sum_l2)
        d_l2_d_b2 = deriv_sigmoid(sum_l2)
        
        d_l1_d_w1 = np.dot(deriv_sigmoid(sum_l1), x.transpose())
        d_l1_d_b1 = deriv_sigmoid(sum_l1)
    
        self.w3  -= (learn_rate * d_L_d_ypred * d_ypred_d_w3).transpose();
        self.b3  -= learn_rate * d_L_d_ypred * d_ypred_d_b3;
        self.w2 -= learn_rate * d_L_d_ypred * d_ypred_d_l2.transpose() * d_l2_d_w2
        self.b2 -= learn_rate * d_L_d_ypred * d_ypred_d_l2.transpose() * d_l2_d_b2
        self.w1 -= learn_rate * d_L_d_ypred * np.dot(d_ypred_d_l2 , d_l2_d_l1) * d_l1_d_w1
        self.b1 -= learn_rate * d_L_d_ypred * np.dot(d_ypred_d_l2 , d_l2_d_l1).transpose() * d_l1_d_b1

        '''if epoch % 10 == 0:
          y_preds = np.apply_along_axis(self.feedforward, 1, data)
          loss = mse_loss(all_y_trues, y_preds)
          print("Epoch %d loss: %.3f" % (epoch, loss))'''



In [167]:
network4 = OurNeuralNetwork4()
network4.train(data, all_y_trues)
emily = np.array([-7, -3]) # 128 pounds, 63 inches
frank = np.array([20, 2])  # 155 pounds, 68 inches
claudia = np.array([-12, 2])
jon = np.array([10, 7])
print("Emily: %.3f" % network4.feedforward(emily)) # 0.951 - F
print("Frank: %.3f" % network4.feedforward(frank)) # 0.039 - M
print("Claudi: %.3f" % network4.feedforward(claudia)) # 0.951 - F
print("Jon: %.3f" % network41
      .feedforward(jon)) # 0.039 - M

Emily: 0.945
Frank: 0.064
Claudi: 0.945
Jon: 0.051


Generate dataset. Height and weight are generated as distributions. Labels are generated considering height and weight. If a person is above the average, that means he is probably a male, and if not, is probably a female.

In [0]:
height= np.round( np.random.normal(1.75, 0.20, 100), 2)
weight= np.round( np.random.normal(60.32, 15, 100), 2)
np_people=np.column_stack((height, weight))
label = np.where(np.logical_and(height < 1.75, weight < 60) , 1, 0)

In [172]:

network4 = OurNeuralNetwork4()
network4.train(np_people[0:80], label[:80])
y_preds = np.apply_along_axis(network4.feedforward, 1, np_people[80:])
print("loss")
loss = mse_loss(label[80:], y_preds)
print(loss)
y_pred = np.where(y_preds < 0.5, 0, 1)
print("nr predictii gresite")
print(np.count_nonzero(y_pred==label[80:]))


loss
0.16975212928336433
nr predictii gresite
16


Added the minibatch.
Before recalculationg the weights, the NN takes a random number between 0 and the (length of the training - no of the batch size).
Batch size is 20% of total number 

In [0]:
class OurNeuralNetwork5:
  def __init__(self):
    # Weights
    self.w1 = np.random.normal()
    self.w2 = np.random.normal()
    self.w3 = np.random.normal()
    self.w4 = np.random.normal()
    self.w5 = np.random.normal()
    self.w6 = np.random.normal()

    # Biases
    self.b1 = np.random.normal()
    self.b2 = np.random.normal()
    self.b3 = np.random.normal()

  def feedforward(self, x):
    # x is a numpy array with 2 elements.
    h1 = sigmoid(self.w1 * x[0] + self.w2 * x[1] + self.b1)
    h2 = sigmoid(self.w3 * x[0] + self.w4 * x[1] + self.b2)
    o1 = sigmoid(self.w5 * h1 + self.w6 * h2 + self.b3)
    return o1

  def train(self, my_data, my_all_y_trues):
    '''
    - data is a (n x 2) numpy array, n = # of samples in the dataset.
    - all_y_trues is a numpy array with n elements.
      Elements in all_y_trues correspond to those in data.
    '''
    learn_rate = 0.1
    epochs = 1000 # number of times to loop through the entire dataset
    data_len = my_data.shape[0]
    compute_nb = int(0.2 * data_len)
    for epoch in range(epochs):
      mini_batch_data = np.random.randint(0, data_len - compute_nb)
      data = my_data[mini_batch_data: mini_batch_data + compute_nb]
      all_y_trues = my_all_y_trues[mini_batch_data: mini_batch_data + compute_nb]
      for x, y_true in zip(data, all_y_trues):
        # --- Do a feedforward (we'll need these values later)
        sum_h1 = self.w1 * x[0] + self.w2 * x[1] + self.b1
        h1 = sigmoid(sum_h1)

        sum_h2 = self.w3 * x[0] + self.w4 * x[1] + self.b2
        h2 = sigmoid(sum_h2)

        sum_o1 = self.w5 * h1 + self.w6 * h2 + self.b3
        o1 = sigmoid(sum_o1)
        y_pred = o1

        # --- Calculate partial derivatives.
        # --- Naming: d_L_d_w1 represents "partial L / partial w1"
        d_L_d_ypred = -2 * (y_true - y_pred)

        # Neuron o1
        d_ypred_d_w5 = h1 * deriv_sigmoid(sum_o1)
        d_ypred_d_w6 = h2 * deriv_sigmoid(sum_o1)
        d_ypred_d_b3 = deriv_sigmoid(sum_o1)

        d_ypred_d_h1 = self.w5 * deriv_sigmoid(sum_o1)
        d_ypred_d_h2 = self.w6 * deriv_sigmoid(sum_o1)

        # Neuron h1
        d_h1_d_w1 = x[0] * deriv_sigmoid(sum_h1)
        d_h1_d_w2 = x[1] * deriv_sigmoid(sum_h1)
        d_h1_d_b1 = deriv_sigmoid(sum_h1)

        # Neuron h2
        d_h2_d_w3 = x[0] * deriv_sigmoid(sum_h2)
        d_h2_d_w4 = x[1] * deriv_sigmoid(sum_h2)
        d_h2_d_b2 = deriv_sigmoid(sum_h2)

        # --- Update weights and biases
        # Neuron h1
        self.w1 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_w1
        self.w2 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_w2
        self.b1 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_b1

        # Neuron h2
        self.w3 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_w3
        self.w4 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_w4
        self.b2 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_b2

        # Neuron o1
        self.w5 -= learn_rate * d_L_d_ypred * d_ypred_d_w5
        self.w6 -= learn_rate * d_L_d_ypred * d_ypred_d_w6
        self.b3 -= learn_rate * d_L_d_ypred * d_ypred_d_b3

      # --- Calculate total loss at the end of each epoch
      if epoch % 10 == 0:
        y_preds = np.apply_along_axis(self.feedforward, 1, data)
        loss = mse_loss(all_y_trues, y_preds)
        print("Epoch %d loss: %.3f" % (epoch, loss))

In [0]:

'''print(np_people)
print(label)'''
network5 = OurNeuralNetwork5()
network5.train(np_people[0:80], label[:80])
y_preds = np.apply_along_axis(network5.feedforward, 1, np_people[80:])
print("loss")
loss = mse_loss(label[80:], y_preds)
print(loss)
y_pred = np.where(y_preds < 0.5, 0, 1)
print("nr predictii gresite")
print(np.count_nonzero(y_pred==label[80:]))
