In [983]:
import numpy as np

In [984]:
class Layer():
  def __init__(self, input_size, output_size):
    self.W = np.zeros((input_size, output_size))
    self.b = np.zeros(output_size)

layer = Layer(4, 3)
layer.W

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [985]:
class Layer():
  def __init__(self, input_size, output_size):
    self.W = np.zeros((input_size, output_size))
    self.b = np.zeros(output_size)
  
  def forward(self, input):
    return np.dot(self.W.T, input) + self.b

layer = Layer(4, 3)
layer.W = np.array([[1, 2, 3], [3, 4, 5], [5, 6, 7], [7, 8, 9]])
layer.b = np.ones((3, 1))
input = np.array([[1, 2, 3, 4]]).T
input, layer.forward(input)

(array([[1],
        [2],
        [3],
        [4]]),
 array([[51.],
        [61.],
        [71.]]))

In [986]:
layer = Layer(4, 3)
layer.W = np.array([[1, 2, 3], [3, 4, 5], [5, 6, 7], [7, 8, 9]])
layer.b = np.array([2, 2, 2])

input = np.array([1, 2, 3, 4])
input, layer.forward(input)

(array([1, 2, 3, 4]), array([52, 62, 72]))

In [987]:
# switch to row vectors
class Layer():
  def __init__(self, input_size, output_size):
    self.W = np.zeros((input_size, output_size))
    self.b = np.zeros(output_size)
  
  def forward(self, input):
    return np.dot(input, self.W) + self.b

layer = Layer(4, 3)
layer.W = np.array([[1, 2, 3], [3, 4, 5], [5, 6, 7], [7, 8, 9]])
layer.b = np.ones((1,3))

input = np.array([[1, 2, 3, 4]])
input, layer.forward(input)

(array([[1, 2, 3, 4]]), array([[51., 61., 71.]]))

In [988]:
layer = Layer(4, 3)
layer.W = np.array([[1, 2, 3], [3, 4, 5], [5, 6, 7], [7, 8, 9]])
layer.b = np.array([2, 2, 2])

input = np.array([1, 2, 3, 4])
input, layer.forward(input)

(array([1, 2, 3, 4]), array([52, 62, 72]))

In [989]:
class NeuralNetwork():
  def __init__(self):
    self.layers = []

  def add(self, layer):
    self.layers.append(layer)

  def forward(self, input):
    for layer in self.layers:
      # output of current layer becomes input of next layer
      input = layer.forward(input)
    return input

nn = NeuralNetwork()
nn.add(Layer(5, 3))
nn.add(Layer(3, 1))

# set weights to 1 instead of 0
for layer in nn.layers:
  layer.W += 1

input = np.array([1,2,3,4,5])
nn.forward(input)


array([45.])

In [990]:
nn = NeuralNetwork()
nn.add(Layer(5, 3))
nn.add(Layer(3, 1))

age = 20
weight = 200
some_medical_number = 0.5
some_medical_number2 = 114
some_medical_number3 = -7.12

input = np.array([age, weight, some_medical_number, some_medical_number2, some_medical_number3])
prediction = nn.forward(input)
prediction

array([0.])

In [991]:
nn = NeuralNetwork()
nn.add(Layer(5, 3))
nn.add(Layer(3, 1))

for layer in nn.layers:
  layer.W += 1

sample_1 = np.random.randn(5)
sample_2 = np.random.randn(5)
sample_3 = np.random.randn(5)
samples = np.array([sample_1, sample_2, sample_3])

samples, nn.forward(samples)

(array([[-0.75057531, -0.50684212, -0.48841224,  0.92122166,  1.09457649],
        [-3.50109046,  0.09410201, -0.52806599, -1.05141354, -0.70532309],
        [-0.07247148, -1.29050858,  0.21669748,  1.33593511,  0.84481254]]),
 array([[  0.80990542],
        [-17.07537319],
        [  3.10339523]]))

In [992]:
class MSE():
  def loss(self, Y_hat, Y):
    return np.mean(np.square(Y - Y_hat))

predictions = np.array([0.5, 2.4, 2.9])
true_values = np.array([1, 2, 3])

error = MSE()
error.loss(predictions, true_values)

0.13999999999999999

In [993]:
class MSE():
  @staticmethod
  def loss(Y_hat, Y):
    return np.mean(np.square(Y - Y_hat))

  @staticmethod
  def loss_derivative(Y_hat, Y):
    n = Y_hat.size
    return 2 * (Y_hat - Y) /n

predictions = np.array([[0.5, 2.4], [3.1, 10]])
true_values = np.array([[1, 2], [3, 4]])

error = MSE
print(error.loss(predictions, true_values))
print(error.loss_derivative(predictions, true_values))

9.105
[[-0.25  0.2 ]
 [ 0.05  3.  ]]


In [994]:
class Layer():
  def __init__(self, input_size, output_size):
    self.W = np.zeros((input_size, output_size))
    self.b = np.zeros(output_size)
  
  def forward(self, input):
    self.X = input
    return np.dot(input, self.W) + self.b

  def backward(self, dL_dY):
    dL_dX = np.dot(dL_dY, self.W.T)

    dL_dW_transpose = np.dot(self.X.T, dL_dY) 
    dL_dB = dL_dY
    print('Weights:\n', dL_dW_transpose)
    print((self.X.T, dL_dY, dL_dX))
    print('Biases:\n', dL_dB)

    return dL_dX

nn = NeuralNetwork()
layer1 = Layer(5, 3)
layer2 = Layer(3, 2)
nn.add(layer1)
nn.add(layer2)

input = np.array([[1, 1, 1, 1, 1]])
prediction = nn.forward(input)
true_value = np.array([[1, 1]])

input, prediction, true_value

(array([[1, 1, 1, 1, 1]]), array([[0., 0.]]), array([[1, 1]]))

In [995]:
loss = MSE.loss(prediction, true_value)
dL_dYhat = MSE.loss_derivative(prediction, true_value)

loss, dL_dYhat

(1.0, array([[-1., -1.]]))

In [996]:
dL_dZ = layer2.backward(dL_dYhat)
dL_dZ

Weights:
 [[0. 0.]
 [0. 0.]
 [0. 0.]]
(array([[0.],
       [0.],
       [0.]]), array([[-1., -1.]]), array([[0., 0., 0.]]))
Biases:
 [[-1. -1.]]


array([[0., 0., 0.]])

In [997]:
layer1.backward(dL_dZ)

Weights:
 [[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]
(array([[1],
       [1],
       [1],
       [1],
       [1]]), array([[0., 0., 0.]]), array([[0., 0., 0., 0., 0.]]))
Biases:
 [[0. 0. 0.]]


array([[0., 0., 0., 0., 0.]])

In [998]:
class Layer():
  def __init__(self, input_size, output_size):
    # initialize weights and biases to 0
    self.W = np.zeros((input_size, output_size))
    self.b = np.zeros((1, output_size))
  
  def forward(self, input):
    self.X = input # store input to use it during backprogation
    return np.dot(input, self.W) + self.b

  def backward(self, dL_dY, learning_rate):
    # input derivative to be used by the previous layer
    dL_dX = np.dot(dL_dY, self.W.T)

    # weights and biases derivatives
    dL_dW_transpose = np.dot(self.X.T, dL_dY) 
    dL_dB = dL_dY

    # update weights and biases
    self.W -= learning_rate *  dL_dW_transpose
    self.b -= learning_rate * dL_dB

    return dL_dX

class NeuralNetwork():
  def __init__(self, learning_rate=0.01):
    self.layers = []
    self.learning_rate = learning_rate

  def add(self, layer):
    self.layers.append(layer)

  def forward(self, input):
    for layer in self.layers:
      # output of current layer becomes input of next layer
      input = layer.forward(input)
    return input
  
  def backward(self, dL_dY):
    for layer in reversed(self.layers):
      # output derivative of current layer becomes input derivative of next layer
      dL_dY = layer.backward(dL_dY, self.learning_rate)
    return dL_dY 

nn = NeuralNetwork()
nn.add(Layer(5, 3))
nn.add(Layer(3, 2))

input = np.array([1, 1, 1, 1, 1])
predictions = nn.forward(input)
true_values = np.array([1, 1])
print(predictions, true_values, MSE.loss_derivative(predictions, true_values))
nn.backward(MSE.loss_derivative(predictions, true_values))

[[0. 0.]] [1 1] [[-1. -1.]]


ValueError: shapes (5,) and (1,3) not aligned: 5 (dim 0) != 1 (dim 0)

In [999]:
class Layer():
  def __init__(self, input_size, output_size):
    # initialize weights and biases
    self.W = np.ones((input_size, output_size))
    self.b = np.zeros((1, output_size))
  
  def forward(self, input):
    self.X = input # store input to use it during backprogation
    return np.dot(input, self.W) + self.b

  def backward(self, dL_dY, learning_rate):
    # input derivative to be used by the previous layer
    dL_dX = np.dot(dL_dY, self.W.T)

    # weights and biases derivatives
    dL_dW_transpose = np.dot(self.X.T, dL_dY) 
    dL_dB = dL_dY

    # update weights and biases
    self.W -= learning_rate *  dL_dW_transpose
    self.b -= learning_rate * dL_dB

    return dL_dX

class NeuralNetwork():
  def __init__(self, error):
    self.layers = []
    self.error = error

  def add(self, layer):
    self.layers.append(layer)

  def forward(self, input):
    for layer in self.layers:
      # output of current layer becomes input of next layer
      input = layer.forward(input)
    return input
  
  def backward(self, dL_dY, learning_rate):
    for layer in reversed(self.layers):
      # input derivative of current layer becomes output derivative of previous layer
      dL_dY = layer.backward(dL_dY, learning_rate)
    return dL_dY 

  def train(self, X, Y, iterations=100, learning_rate=0.01):
    for i in range(iterations):
      Y_hat = self.forward(X)
      dL_dY_hat = self.error.loss_derivative(Y_hat, Y)
      self.backward(dL_dY_hat, learning_rate)
      
      
nn = NeuralNetwork(error=MSE)
nn.add(Layer(5, 3))
nn.add(Layer(3, 2))

X = np.array([[1, 1, 1, 1, 1]])
Y = np.array([[1, 1]])
nn.train(X, Y, iterations=5)

for layer in nn.layers:
  print(layer.W, layer.b)

X, nn.forward(X), Y

[[0.69844916 0.69844916 0.69844916]
 [0.69844916 0.69844916 0.69844916]
 [0.69844916 0.69844916 0.69844916]
 [0.69844916 0.69844916 0.69844916]
 [0.69844916 0.69844916 0.69844916]] [[-0.30155084 -0.30155084 -0.30155084]]
[[0.15949004 0.15949004]
 [0.15949004 0.15949004]
 [0.15949004 0.15949004]] [[-0.18297055 -0.18297055]]


(array([[1, 1, 1, 1, 1]]), array([[1.34368168, 1.34368168]]), array([[1, 1]]))

In [1000]:
class Layer():
  def __init__(self, input_size, output_size):
    # initialize weights and biases
    self.W = np.ones((input_size, output_size))
    self.b = np.zeros((1, output_size))
  
  def forward(self, input):
    self.X = input # store input to use it during backprogation
    return np.dot(input, self.W) + self.b

  def backward(self, dL_dY, learning_rate):
    # input derivative to be used by the previous layer
    dL_dX = np.dot(dL_dY, self.W.T)

    # weights and biases derivatives
    dL_dW_transpose = np.dot(self.X.T, dL_dY) 
    dL_dB = dL_dY

    # update weights and biases
    self.W -= learning_rate *  dL_dW_transpose
    self.b -= learning_rate * np.sum(dL_dB, axis=0, keepdims=True)

    return dL_dX

class NeuralNetwork():
  def __init__(self, error):
    self.layers = []
    self.error = error

  def add(self, layer):
    self.layers.append(layer)

  def forward(self, input):
    for layer in self.layers:
      # output of current layer becomes input of next layer
      input = layer.forward(input)
    return input
  
  def backward(self, dL_dY, learning_rate):
    for layer in reversed(self.layers):
      # input derivative of current layer becomes output derivative of previous layer
      dL_dY = layer.backward(dL_dY, learning_rate)
    return dL_dY 

  def train(self, X, Y, iterations=100, learning_rate=0.01):
    for i in range(iterations):
      Y_hat = self.forward(X)
      dL_dY_hat = self.error.loss_derivative(Y_hat, Y)
      self.backward(dL_dY_hat, learning_rate)
      
      
nn = NeuralNetwork(error=MSE)
nn.add(Layer(5, 3))
nn.add(Layer(3, 2))

X = np.array([[1, 1, 1, 1, 1], [2, 2, 2, 2, 2]])
Y = np.array([[1, 2], [2, 4]])
nn.train(X, Y, iterations=100)

for layer in nn.layers:
  print(layer.W, layer.b)

X, nn.forward(X), Y

[[-0.13059845 -0.13059845 -0.13059845]
 [-0.13059845 -0.13059845 -0.13059845]
 [-0.13059845 -0.13059845 -0.13059845]
 [-0.13059845 -0.13059845 -0.13059845]
 [-0.13059845 -0.13059845 -0.13059845]] [[-0.5729722 -0.5729722 -0.5729722]]
[[-0.37990553 -0.63897373]
 [-0.37990553 -0.63897373]
 [-0.37990553 -0.63897373]] [[-0.2271764  0.1221751]]


(array([[1, 1, 1, 1, 1],
        [2, 2, 2, 2, 2]]),
 array([[1.1700756 , 2.47225234],
        [1.91430169, 3.72398702]]),
 array([[1, 2],
        [2, 4]]))

In [1001]:
nn = NeuralNetwork(error=MSE)
nn.add(Layer(5, 3))
nn.add(Layer(3, 2))

X = np.array([[1, 1, 1, 1, 1], [2, 2, 2, 2, 2], [5, 5, 5, 5, 5]])
Y = np.array([[1, 2], [2, 4], [5, 10]])
nn.train(X, Y, iterations=10, learning_rate=0.001)

for layer in nn.layers:
  print(layer.W, layer.b)

X, nn.forward(X), Y

[[0.7046685 0.7046685 0.7046685]
 [0.7046685 0.7046685 0.7046685]
 [0.7046685 0.7046685 0.7046685]
 [0.7046685 0.7046685 0.7046685]
 [0.7046685 0.7046685 0.7046685]] [[-0.07866017 -0.07866017 -0.07866017]]
[[0.09930589 0.19351691]
 [0.09930589 0.19351691]
 [0.09930589 0.19351691]] [[-0.05208275 -0.04607867]]


(array([[1, 1, 1, 1, 1],
        [2, 2, 2, 2, 2],
        [5, 5, 5, 5, 5]]),
 array([[ 0.97414901,  1.95373418],
        [ 2.02381502,  3.99921325],
        [ 5.17281304, 10.13565048]]),
 array([[ 1,  2],
        [ 2,  4],
        [ 5, 10]]))

In [1002]:
XOR_input = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
XOR_output = np.array([[0], [1], [1], [0]])

nn = NeuralNetwork(MSE)
nn.add(Layer(2, 2))
nn.add(Layer(2, 1))

nn.train(XOR_input, XOR_output)

for layer in nn.layers:
  print(layer.W, layer.b)

XOR_input, nn.forward(XOR_input), XOR_output

[[0.73609391 0.73609391]
 [0.73609391 0.73609391]] [[-0.10073698 -0.10073698]]
[[0.31283932]
 [0.31283932]] [[-0.02498902]]


(array([[0, 0],
        [0, 1],
        [1, 0],
        [1, 1]]),
 array([[-0.088018  ],
        [ 0.37254023],
        [ 0.37254023],
        [ 0.83309847]]),
 array([[0],
        [1],
        [1],
        [0]]))

In [1003]:
class ReLU():
  def forward(self, input):
    self.X = input
    return np.maximum(0, input)
  
  def backward(self, dL_dY, learning_rate=0):
    dY_dX = 1 * (self.X > 0)
    # print(('Test', dL_dY))
    # print(( dY_dX, self.X))
    return dL_dY * dY_dX

input = np.array([-1, 0, 0.2, 1])
activation = ReLU()
activation.forward(input)
activation.backward(np.ones(4))

array([0., 0., 1., 1.])

In [1633]:
class Layer():
  def __init__(self, input_size, output_size):
    # initialize weights and biases
    self.W = np.random.rand(input_size, output_size) - .5
    # self.W = np.ones((input_size, output_size))
    # self.b = (np.random.rand(1, output_size) - .5)*100
    self.b = np.zeros((1, output_size))
  
  def forward(self, input):
    self.X = input # store input to use it during backprogation
    return np.dot(input, self.W) + self.b

  def backward(self, dL_dY, learning_rate):
    # input derivative to be used by the previous layer
    dL_dX = np.dot(dL_dY, self.W.T)

    # weights and biases derivatives
    dL_dW_transpose = np.dot(self.X.T, dL_dY) 
    dL_dB = dL_dY

    # update weights and biases
    self.W -= learning_rate *  dL_dW_transpose
    self.b -= learning_rate * np.sum(dL_dB, axis=0, keepdims=True)

    return dL_dX

XOR_input = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
XOR_output = np.array([[0], [1], [1], [0]])

nn = NeuralNetwork(MSE)
nn.add(Layer(2, 2))
nn.add(ReLU())
nn.add(Layer(2, 1))

In [1626]:
nn.train(XOR_input, XOR_output, iterations=6000 ,learning_rate=0.001)

print(nn.forward(XOR_input))

for layer in nn.layers:
  try:
    print((layer.W, layer.b))
  except:
    pass

[[0.48172703]
 [0.57250466]
 [0.6112832 ]
 [0.32930414]]
(array([[0.27462132, 0.47141235],
       [0.19242211, 0.47139885]]), array([[ 0.16181266, -0.47152328]]))
(array([[ 0.47176298],
       [-0.79093198]]), array([[0.40538981]]))


In [1635]:
layer1 = Layer(2, 2)
layer1.W = np.array([[-0.81781753,  0.71323677], [ 0.48803631, -0.71286155]])
layer2 = Layer(2, 1)
layer2.W = np.array([[2.04849235], [1.40170791]])

nn = NeuralNetwork(MSE)
nn.add(layer1)
nn.add(ReLU())
nn.add(layer2)

print(nn.forward(XOR_input))

# make sure code works as expected
nn.train(XOR_input, XOR_output, iterations=10000)
print(nn.forward(XOR_input)) 
print(nn.error.loss(nn.forward(XOR_input), XOR_output))

[[0.00000000e+00]
 [9.99738648e-01]
 [9.99749622e-01]
 [5.25948842e-04]]
[[5.94526151e-15]
 [1.00000000e+00]
 [1.00000000e+00]
 [6.10090960e-15]]
4.1896998398519884e-29


In [1654]:
attempts = 0
while True:
  attempts += 1
  nn = NeuralNetwork(MSE)
  nn.add(Layer(2,2))
  nn.add(ReLU())
  nn.add(Layer(2, 1))

  nn.train(XOR_input, XOR_output, iterations=1000)
  loss = nn.error.loss(nn.forward(XOR_input), XOR_output)
  if loss < 0.1:
    break

In [1655]:
for layer in nn.layers:
  try:
    print((layer.W, layer.b))
  except:
    pass
  
nn.forward(XOR_input), loss, attempts

(array([[-0.73481199,  0.74329449],
       [ 0.73471697, -0.74351637]]), array([[0.00022715, 0.00017662]]))
(array([[1.07380845],
       [1.05957544]]), array([[0.11986323]]))


(array([[0.12029428],
        [0.90905243],
        [0.90762695],
        [0.1200051 ]]),
 0.01141904476365414,
 29)