In [1]:
# Handwritten Neural Network test: HandwrittenNN.ipynb
# Author: comtalyst

'''
TODO:
- 
'''

In [2]:
import numpy as np
import matplotlib.pyplot as plot

In [3]:
def sigmoid(Z):
    A = 1/(1 + np.exp(-Z))
    return A

def sigmoid_deriv(Z):
    '''
    (Coursera DL Notes, p. 8)
    '''
    A = sigmoid(Z)
    return np.multiply(A,(1 - A))

In [4]:
def relu(Z):
    A = np.max(0, Z)
    return A

def relu_deriv(Z):
    '''
    (Coursera DL Notes, p. 9)
    '''
    if Z < 0:
        return 0
    else:
        return 1

In [82]:
def activate(Z, activation):
    if activation == "sigmoid":
        return sigmoid(Z)
    elif activation == "relu":
        return relu(Z)
    else:
        return Z

def activate_deriv(Z, activation):
    if activation == "sigmoid":
        return sigmoid_deriv(Z)
    elif activation == "relu":
        return relu_deriv(Z)
    else:
        return 1

In [92]:
# These are untested, will be tested when running descent
def compute_loss(Y_, Y):
    loss = -(np.dot(Y, np.log(Y_).T) + np.dot(1-Y, np.log(1-Y_).T) )
    return loss

def compute_cost(Y_, Y):
    cost = np.mean(compute_loss(Y_, Y))


In [87]:
# X: input in [features x samples]
# W: learnable parameters in [layer x n(l) x n(l-1)]
# b: bias parameters in [layer x n(l) x 1]
# activations: an arraylist of string, size of l, denotes preferred activation for each layer
#   example: {relu, relu, sidmoid} means relu in l = 1,2, sigmoid in l = 3
def forward_propagation(X, W, b, activations):
    L = W.shape[0]-1        # layers
    n = X.shape[0]          # features
    m = X.shape[1]          # samples

    # initialize linear output
    '''
    This np.ndarray allow us to contruct an array with size initialized and can have any data type in it (from dtype=object)
    Therefore, we could use this to create histogram-like array for the uneven NN
    '''
    Z = np.ndarray(shape=[L+1], dtype=object)
    A = np.ndarray(shape=[L+1], dtype=object)
    
    # base case
    Z[1] = np.dot(W[1], X) + b[1]
    # activation
    A[1] = activate(Z[1], activations[0])

    # loop the layers 2 to L
    for l in range(2, L+1):
        Z[l] = np.dot(W[l], A[l-1]) + b[l]
        A[l] = activate(Z[l], activations[l-1])     # note that activations[]'s index is behind for the ease of user

    return A, Z
        

In [88]:
# This is untested, will be test using gradient checking
def backward_propagation(X, Z, A, W, b, Y, activations):
    L = W.shape[0]-1        # layers
    n = X.shape[0]          # features
    m = X.shape[1]          # samples

    dZ = np.ndarray(shape=Z.shape, dtype=object)
    dA = np.ndarray(shape=A.shape, dtype=object)
    dW = np.ndarray(shape=W.shape, dtype=object)
    db = np.ndarray(shape=b.shape, dtype=object)
    '''
    (Coursera DL Notes, p. 10)
    '''
    A[0] = X                # to make it work properly when l = 1 

    # base case
    dZ[L] = A[L] - Y
    dW[L] = (1/m)*np.dot(dZ[L], A[L-1].T)
    db[L] = (1/m)*np.sum(dZ[L], axis = 1, keepdims = True) 

    # loop the layers L-1 to 1
    for l in reversed(range(1, L)):
        dA[l] = np.dot(W[l+1].T, dZ[l+1])
        dZ[l] = np.multiply(dA[l], activate_deriv(Z[l], activations[l]) )
        dW[l] = (1/m)*np.dot(dZ[l], A[l-1].T)
        db[l] = (1/m)*np.sum(dZ[l], axis = 1, keepdims = True) 
    
    return dA, db


In [90]:
##########      BELOW THIS LINE IS EXPERIMENTAL AREA, THE CODE MAY BE MESSY      ##########








In [7]:
dumb = np.ndarray(shape=[4, 5])
dumb = dumb.T
dumb

array([[6.23042070e-307, 2.22523004e-307, 1.69120145e-306,
        9.34583987e-307],
       [4.67296746e-307, 1.29062229e-306, 9.34598246e-307,
        1.24610723e-306],
       [1.69121096e-306, 1.69121367e-306, 9.34599604e-307,
        2.04722549e-306],
       [1.29061821e-306, 8.45603440e-307, 7.56593696e-307,
        4.45051101e-307],
       [2.22522053e-306, 5.11799242e-307, 1.33511562e-306,
        6.23060065e-307]])

In [85]:
def forward_propagation_test():
    L = 2
    n1 = 3
    n2 = 4
    n = 2
    m = 5
    activations = ["dumb", "dumb", "dumb"]

    X = np.round(np.random.rand(n, m)*10) % 10

    W = np.ndarray(shape=[L+1], dtype=object)
    b = np.ndarray(shape=[L+1], dtype=object)

    W[1] = np.round(np.random.randn(n1, n)*10) % 10
    W[2] = np.round(np.random.randn(n2, n1)*10) % 10
    b[1] = np.round(np.random.randn(n1, 1)*10) % 10
    b[2] = np.round(np.random.randn(n2, 1)*10) % 10

    A, Z = forward_propagation(X, W, b, activations)
    print("X")
    print(X)
    print("W")
    print(W)
    print("b")
    print(b)
    print("A")
    print(A)
    return A, Z

def backward_propagation_test():
    L = 2
    n1 = 3
    n2 = 4
    n = 2
    m = 5
    activations = ["dumb", "dumb", "dumb"]

    X = np.round(np.random.randn(n, m)*10) % 10
    Y = np.round(np.random.randn(n2, m)*10) % 10

    W = np.ndarray(shape=[L+1], dtype=object)
    b = np.ndarray(shape=[L+1], dtype=object)

    W[1] = np.round(np.random.randn(n1, n)*10) % 10
    W[2] = np.round(np.random.randn(n2, n1)*10) % 10
    b[1] = np.round(np.random.randn(n1, 1)*10) % 10
    b[2] = np.round(np.random.randn(n2, 1)*10) % 10

    A, Z = forward_propagation(X, W, b, activations)
    print("X")
    print(X)
    print("W")
    print(W)
    print("b")
    print(b)
    print("A")
    print(A)
    dA, db = backward_propagation(X, Z, A, W, b, Y, activations)
    print("dA")
    print(dA)
    print("db")
    print(db)


In [86]:
backward_propagation_test()

X
[[9. 2. 3. 7. 7.]
 [9. 1. 1. 2. 8.]]
W
[None array([[1., 3.],
       [3., 5.],
       [1., 4.]])
 array([[3., 6., 2.],
       [0., 2., 3.],
       [2., 5., 8.],
       [5., 7., 9.]])]
b
[None array([[4.],
       [6.],
       [8.]])
 array([[1.],
       [1.],
       [1.],
       [6.]])]
A
[None
 array([[40.,  9., 10., 17., 35.],
       [78., 17., 20., 37., 67.],
       [53., 14., 15., 23., 47.]])
 array([[ 695.,  158.,  181.,  320.,  602.],
       [ 316.,   77.,   86.,  144.,  276.],
       [ 895.,  216.,  241.,  404.,  782.],
       [1229.,  296.,  331.,  557., 1073.]])]
dA
[None
 array([[ 9950.,  2357.,  2622.,  4506.,  8672.],
       [17755.,  4186.,  4655.,  8042., 15455.],
       [20402.,  4879.,  5403.,  9197., 17805.]])
 None]
db
[None array([[ 5621.4],
       [10018.6],
       [11537.2]])
 array([[386.2],
       [175.2],
       [501.4],
       [692. ]])]


In [76]:
for i in range(3, 0):
    print(i)