In [2]:
# Handwritten Neural Network test: HandwrittenNN.ipynb
# Author: comtalyst

'''
TODO:
- continue on fundamentals structure (probably start with initialize variables)
- test backprop by gradient checking 
- test cost function by running grad
- generate test data if possible
'''

'\nTODO:\n- continue on fundamentals structure (probably start with initialize variables)\n- test backprop by gradient checking \n- test cost function by running grad\n- generate test data if possible\n'

In [3]:
import numpy as np
import matplotlib.pyplot as plt

In [45]:
### Activation functions

## Sigmoid
def sigmoid(Z):
    A = 1/(1 + np.exp(-Z))
    return A

def sigmoid_deriv(Z):
    '''
    (Coursera DL Notes, p. 8)
    '''
    A = sigmoid(Z)
    return np.multiply(A,(1 - A))

## ReLu
def relu(Z):
    A = np.max(0, Z)
    return A

def relu_deriv(Z):
    '''
    (Coursera DL Notes, p. 9)
    '''
    if Z < 0:
        return 0
    else:
        return 1

## Global interface
def activate(Z, activation):
    activation = lower(activation)
    if activation == "sigmoid":
        return sigmoid(Z)
    elif activation == "relu":
        return relu(Z)
    else:
        return Z

def activate_deriv(Z, activation):
    activation = lower(activation)
    if activation == "sigmoid":
        return sigmoid_deriv(Z)
    elif activation == "relu":
        return relu_deriv(Z)
    else:
        return 1

In [5]:
### Cost functions

# These are untested, will be tested when running descent
def compute_loss(Y_, Y):
    '''
    (Coursera DL Notes, p. 4)
    (C1W4 , Block 5)
    '''
    loss = -(np.dot(Y, np.log(Y_).T) + np.dot(1-Y, np.log(1-Y_).T) )
    return loss

def compute_cost(Y_, Y):
    cost = np.mean(compute_loss(Y_, Y))
    return cost

In [6]:
### Initialization

def initialize_parameters(layers):
    L = len(layers)-1

    W = np.ndarray(shape=[L+1], dtype=object)
    b = np.ndarray(shape=[L+1], dtype=object)
    for l in range(1, L+1):
        W[l] = np.random.randn(layers[l], layers[l-1])
        b[l] = np.zeros((layers[l], 1))

    return W, b

In [7]:
### Forward propagation

# X: input in [features x samples]
# W: learnable parameters in [layer x n(l) x n(l-1)]
# b: bias parameters in [layer x n(l) x 1]
# activations: an arraylist of string, size of l, denotes preferred activation for each layer
#   example: {relu, relu, sidmoid} means relu in l = 1,2, sigmoid in l = 3
def forward_propagation(X, W, b, activations):
    L = W.shape[0]-1        # layers
    n = X.shape[0]          # features
    m = X.shape[1]          # samples

    # initialize linear output
    '''
    This np.ndarray allow us to contruct an array with size initialized and can have any data type in it (from dtype=object)
    Therefore, we could use this to create histogram-like array for the uneven NN
    '''
    Z = np.ndarray(shape=[L+1], dtype=object)
    A = np.ndarray(shape=[L+1], dtype=object)
    
    # base case
    Z[1] = np.dot(W[1], X) + b[1]
    # activation
    A[1] = activate(Z[1], activations[0])

    # loop the layers 2 to L
    for l in range(2, L+1):
        Z[l] = np.dot(W[l], A[l-1]) + b[l]
        A[l] = activate(Z[l], activations[l-1])     # note that activations[]'s index is behind for the ease of user

    return A, Z
        

In [8]:
### Backward Propagation

# This is untested, will be tested using gradient checking
def backward_propagation(X, Z, A, W, b, Y, activations):
    L = W.shape[0]-1        # layers
    n = X.shape[0]          # features
    m = X.shape[1]          # samples

    dZ = np.ndarray(shape=Z.shape, dtype=object)
    dA = np.ndarray(shape=A.shape, dtype=object)
    dW = np.ndarray(shape=W.shape, dtype=object)
    db = np.ndarray(shape=b.shape, dtype=object)
    '''
    (Coursera DL Notes, p. 10)
    '''
    A[0] = X                # to make it work properly when l = 1 

    # base case
    dZ[L] = A[L] - Y
    dW[L] = (1/m)*np.dot(dZ[L], A[L-1].T)
    db[L] = (1/m)*np.sum(dZ[L], axis = 1, keepdims = True) 

    # loop the layers L-1 to 1
    for l in reversed(range(1, L)):
        dA[l] = np.dot(W[l+1].T, dZ[l+1])
        dZ[l] = np.multiply(dA[l], activate_deriv(Z[l], activations[l]) )
        dW[l] = (1/m)*np.dot(dZ[l], A[l-1].T)
        db[l] = (1/m)*np.sum(dZ[l], axis = 1, keepdims = True) 
    
    return dW, db


In [9]:
### Update paremeters

def update_parameters(W, b, dW, db, learning_rate=0.01):
    W -= learning_rate*dW
    b -= learning_rate*db
    return W, b

In [10]:
### Train model

def train_model(X, Y, layers, activations, learning_rate, iterations):
    W, b = initialize_parameters(layers)
    for i in range(0, iterations):
        A, Z = forward_propagation(X, W, b, activations)
        dW, db = backward_propagation(X, Z, A, W, b, Y, activations)
        W, b = update_parameters(W, b, dW, db, learning_rate)
    return W, b

In [11]:
##########      BELOW THIS LINE IS PERFORMANCE AREA      ##########

In [44]:
### Process raw data
## read file
path = "data/"
filename = "2d_2color_dots.txt"
f = open(path + filename, "r")

## generate a long, combined string from the file
fstr = f.read()
fstr = fstr.replace('\n', ' ')
fstr = fstr.strip()

## process file
flist = fstr.split(' ')
fnparray = np.array(flist)
m = len(flist)//3

## ready to put to our inputs
X = np.ndarray([2, m])
Y_r = np.ndarray([1, m])                        # Y here is not ready for training because it is not a 0-1 (correct-incorrect) array yet
X[0] = fnparray[0:m].astype(float)
X[1] = fnparray[m:2*m].astype(float)
Y_r = fnparray[2*m:].astype(int)

## convert Y to 0-1 2d array
# in this case, we have two possible outcomes (true-false)
Y = np.ndarray([2, m])
Y[0] = (Y_r == 0)
Y[1] = (Y_r == 1)

In [None]:
### Create and train a model from provided data
layers = [2, 3, 3, 3, 2]            # first and last is defined by input type (2D coords, true-false)
activations = ['']
W, b = train_model(X, Y, layers, activations, learning_rate, iterations)

In [16]:



##########      BELOW THIS LINE IS EXPERIMENTAL AREA, THE CODE MAY BE MESSY      ##########




In [7]:
dumb = np.ndarray(shape=[4, 5])
dumb = dumb.T
dumb

array([[6.23042070e-307, 2.22523004e-307, 1.69120145e-306,
        9.34583987e-307],
       [4.67296746e-307, 1.29062229e-306, 9.34598246e-307,
        1.24610723e-306],
       [1.69121096e-306, 1.69121367e-306, 9.34599604e-307,
        2.04722549e-306],
       [1.29061821e-306, 8.45603440e-307, 7.56593696e-307,
        4.45051101e-307],
       [2.22522053e-306, 5.11799242e-307, 1.33511562e-306,
        6.23060065e-307]])

In [31]:
def forward_propagation_test():
    L = 2
    n1 = 3
    n2 = 4
    n = 2
    m = 5
    activations = ["dumb", "dumb", "dumb"]

    X = np.round(np.random.rand(n, m)*10) % 10

    W = np.ndarray(shape=[L+1], dtype=object)
    b = np.ndarray(shape=[L+1], dtype=object)

    W[1] = np.round(np.random.randn(n1, n)*10) % 10
    W[2] = np.round(np.random.randn(n2, n1)*10) % 10
    b[1] = np.round(np.random.randn(n1, 1)*10) % 10
    b[2] = np.round(np.random.randn(n2, 1)*10) % 10

    A, Z = forward_propagation(X, W, b, activations)
    print("X")
    print(X)
    print("W")
    print(W)
    print("b")
    print(b)
    print("A")
    print(A)
    return A, Z

def backward_propagation_test():
    L = 2
    n1 = 3
    n2 = 4
    n = 2
    m = 5
    activations = ["dumb", "dumb", "dumb"]

    X = np.round(np.random.randn(n, m)*10) % 10
    Y = np.round(np.random.randn(n2, m)*10) % 10

    W = np.ndarray(shape=[L+1], dtype=object)
    b = np.ndarray(shape=[L+1], dtype=object)

    W[1] = np.round(np.random.randn(n1, n)*10) % 10
    W[2] = np.round(np.random.randn(n2, n1)*10) % 10
    b[1] = np.round(np.random.randn(n1, 1)*10) % 10
    b[2] = np.round(np.random.randn(n2, 1)*10) % 10

    A, Z = forward_propagation(X, W, b, activations)
    print("X")
    print(X)
    print("W")
    print(W)
    print("b")
    print(b)
    print("A")
    print(A)
    dA, db = backward_propagation(X, Z, A, W, b, Y, activations)
    print("dA")
    print(dA)
    print("db")
    print(db)

def initialize_parameters_test():
    layers = [2, 3, 4]
    m = 3

    W, b = initialize_parameters(layers)
    print("W: " + str(W))
    print("b: " + str(b))

    X = np.round(np.random.randn(layers[0], m)*10) % 10
    print("X: " + str(X))

    activations = ["dumb", "dumb", "dumb"]
    A, Z = forward_propagation(X, W, b, activations)
    print("A: " + str(A))


In [32]:
initialize_parameters_test()

W: [None array([[4., 4.],
       [0., 4.],
       [5., 1.]])
 array([[4., 8., 6.],
       [2., 0., 6.],
       [4., 7., 5.],
       [1., 5., 5.]])]
b: [None array([[0.],
       [0.],
       [0.]])
 array([[0.],
       [0.],
       [0.],
       [0.]])]
X: [[2. 8. 2.]
 [6. 9. 6.]]
A: [None
 array([[32., 68., 32.],
       [24., 36., 24.],
       [16., 49., 16.]])
 array([[416., 854., 416.],
       [160., 430., 160.],
       [376., 769., 376.],
       [232., 493., 232.]])]


In [76]:
for i in range(3, 0):
    print(i)