## Goal:
* To understand how deep learning works on a deep level
* To understand how to make code more efficient using vectorization
* Implement neural network from scratch and understand all the math along the way

chain rule: f(g(x)) = f'(g(x)) * g'(x)

In [1]:
n = [2, 3, 3, 1]
print("layer 0 / input layer size", n[0])
print("layer 1 size", n[1])
print("layer 2 size", n[2])
print("layer 3 size", n[3])

layer 0 / input layer size 2
layer 1 size 3
layer 2 size 3
layer 3 size 1


In [2]:
import numpy as np

In [3]:
W1 = np.random.randn(n[1], n[0])
W2 = np.random.randn(n[2], n[1])
W3 = np.random.randn(n[3], n[2])
b1 = np.random.randn(n[1], 1)
b2 = np.random.randn(n[2], 1)
b3 = np.random.randn(n[3], 1)

In [4]:
print("Weights for layer 1 shape:", W1.shape)
print("Weights for layer 2 shape:", W2.shape)
print("Weights for layer 3 shape:", W3.shape)
print("bias for layer 1 shape:", b1.shape)
print("bias for layer 2 shape:", b2.shape)
print("bias for layer 3 shape:", b3.shape)

Weights for layer 1 shape: (3, 2)
Weights for layer 2 shape: (3, 3)
Weights for layer 3 shape: (1, 3)
bias for layer 1 shape: (3, 1)
bias for layer 2 shape: (3, 1)
bias for layer 3 shape: (1, 1)


In [5]:
W1

array([[-0.06262043, -1.48846792],
       [ 0.61068388, -0.9170953 ],
       [ 0.41309267,  1.24040929]])

In [6]:
X = np.array([
    [150, 70],
    [254, 73],
    [312, 68],
    [120, 60],
    [154, 61],
    [212, 65],
    [216, 67],
    [145, 67],
    [184, 64],
    [130, 69]
])

print(X.shape)

(10, 2)


In [7]:
A0 = X.T
print(A0.shape)

(2, 10)


In [8]:
y = np.array([
    0,
    1, 
    1,
    0,
    0,
    1,
    1,
    0,
    1,
    0
])
m = 10

Y = y.reshape(n[3], m)
Y.shape

(1, 10)

In [9]:
def sigmoid(arr):
    return 1 / (1 + np.exp(-1 * arr))

In [10]:
sigmoid(np.array([1, 2, 3, -40, 100]))

array([7.31058579e-01, 8.80797078e-01, 9.52574127e-01, 4.24835426e-18,
       1.00000000e+00])

In [11]:
m = 10
# layer 1 calculations
Z1 = W1 @ A0 + b1 # @ means matrix multiplication

assert Z1.shape == (n[1], m) # checking if shapes are ok
A1 = sigmoid(Z1)

# layer 2 calculations
Z2 = W2 @ A1 + b2
assert Z2.shape == (n[2], m)
A2 = sigmoid(Z2)

# layer 3 calculations
Z3 = W3 @ A2 + b3
assert Z3.shape == (n[3], m)
A3 = sigmoid(Z3)

In [12]:
print(A3.shape)
y_hat = A3
print(y_hat)

(1, 10)
[[0.90827825 0.90827825 0.90827825 0.90827825 0.90827825 0.90827825
  0.90827825 0.90827825 0.90827825 0.90827825]]


### organized version:

In [15]:
import numpy as np

# 1. network architecture
L = 3
n = [2, 3, 3, 1]

# 2. weights and biases
W1 = np.random.randn(n[1], n[0])
W2 = np.random.randn(n[2], n[1])
W3 = np.random.randn(n[3], n[2])
b1 = np.random.randn(n[1], 1)
b2 = np.random.randn(n[2], 1)
b3 = np.random.randn(n[3], 1)

# 3. training data and labels
def prepare_data():
    X = np.array([
        [150, 70],
        [254, 73],
        [312, 68],
        [120, 60],
        [154, 61],
        [212, 65],
        [216, 67],
        [145, 67],
        [184, 64],
        [130, 69]
    ])
    y = np.array([0, 1, 1, 0, 0, 1, 1, 0, 1, 0])
    m = 10
    A0 = X.T
    Y = y.reshape(n[L], m)

    return A0, Y

# 4. activation function
def sigmoid(arr):
    return 1 / (1 + np.exp(-1 * arr))

def feed_forward(A0):

    # layer 1
    Z1 = W1 @ A0 + b1
    A1 = sigmoid(Z1)

    # layer 2
    Z2 = W2 @ A1 + b2
    A2 = sigmoid(Z2)
    
    # layer 3
    Z3 = W3 @ A2 + b3
    A3 = sigmoid(Z3)

    y_hat = A3
    return y_hat

A0, Y = prepare_data()
y_hat = feed_forward(A0)
print(y_hat)

[[0.11668854 0.11668854 0.11668854 0.11668854 0.11668854 0.11668854
  0.11668854 0.11668854 0.11668854 0.11668854]]


In [20]:
""" Cost function examples:
    Mean squared error
    Root mean squared error
    Mean absolute error....
"""
# I'll use binary cross entropy loss function

hey


In [21]:
def cost(y_hat, y):
    losses = - ( ( y * np.log(y_hat)) + (1 - y) * np.log(1 - y_hat))
    m = y_hat.reshape(-1).shape[0]

    summed_losses = (1 / m) * np.sum(losses, axis=1)

    return np.sum(summed_losses)