# Intro to Neural Networks
[Udacity](https://classroom.udacity.com/nanodegrees/nd101)

A Playground to explore the math and code behind the Intro to Neural networks lessons

In [2]:
import numpy as np

## The Simplest Neural network
[Lesson 9](https://classroom.udacity.com/nanodegrees/nd101/parts/2a9dba0b-28eb-4b0e-acfa-bdcf35680d90/modules/329a736b-1700-43d4-9bf0-753cc461bebc/lessons/dc37fa92-75fd-4d41-b23e-9659dde80866/concepts/53e1d894-17d0-40d3-be1d-314a683bd042)

In [9]:
def sigmoid(x):
    return 1/(1 + np.exp(-x))

inputs = np.array([0.7, -0.3])
print(inputs.shape, "One-dimensional array, e.g. (length, )")
inputs

(2,) One-dimensional array, e.g. (length, )


array([ 0.7, -0.3])

In [22]:
weights1 = np.array([0.1, 0.8])
weights2 = np.array([[0.1, 0.8]])
weights3 = np.array([0.1, 0.8, 0.3])
print(weights2.shape, "A 2-dimensional array, e.g. (Num records, Num attributes per record), (rows, columns)")
weights2

(1, 2) A 2-dimensional array, e.g. (Num records, Num attributes per record), (rows, columns)


array([[ 0.1,  0.8]])

In [20]:
#A dot product of two 1d arrays of the same length works as element-wise multiplication
dot = np.dot(inputs, weights1)
# input[0]*weights[0] + input[1]*weights[1] ... input[i]weights[i]
print("Result:", dot)

Result: -0.17


In [23]:
#However, the 1d arrays must have the same length for this to work

dot1 = np.dot(inputs, weights3)

ValueError: shapes (2,) and (3,) not aligned: 2 (dim 0) != 3 (dim 0)

In [24]:
# dot products from left to right must align dimensionally i.e. the number of records on 
# the left must match the number of attributes on the right and since Numpy stores a 1d array 
# as a single row, 1 record of multiple attributes, the following won't work.
dot2 = np.dot(inputs, weights2)

ValueError: shapes (2,) and (1,2) not aligned: 2 (dim 0) != 1 (dim 0)

In [29]:
# However, you can transform the array / matrix
inputs1 = inputs[:, None]  # Create a copy of the input array[rows, columns]
weights4 = weights2.T  # Transpose (flip) the matrix
print("inputs", inputs.shape)
print("inputs1", inputs1.shape)
print("weights2", weights2.shape)
print("weights4", weights4.shape)
dot3 = np.dot(inputs, weights4)
dot4 = np.dot(inputs1, weights2)
print(dot3.shape)

inputs (2,)
inputs1 (2, 1)
weights2 (1, 2)
weights4 (2, 1)
(1,)


In [30]:
bias = -0.1
# TODO: Calculate the output
output = sigmoid(dot + bias)

print('Output:')
print(output)

Output:
0.432907095035


# Gradient Descent
* [Lesson 10](https://classroom.udacity.com/nanodegrees/nd101/parts/2a9dba0b-28eb-4b0e-acfa-bdcf35680d90/modules/329a736b-1700-43d4-9bf0-753cc461bebc/lessons/dc37fa92-75fd-4d41-b23e-9659dde80866/concepts/7d480208-0453-4457-97c3-56c720c23a89)
* [Lesson 11: The Math](https://classroom.udacity.com/nanodegrees/nd101/parts/2a9dba0b-28eb-4b0e-acfa-bdcf35680d90/modules/329a736b-1700-43d4-9bf0-753cc461bebc/lessons/dc37fa92-75fd-4d41-b23e-9659dde80866/concepts/3156ccf8-9bd0-4019-83b9-ab39c53bf541)
* [Lesson 12: The Code](https://classroom.udacity.com/nanodegrees/nd101/parts/2a9dba0b-28eb-4b0e-acfa-bdcf35680d90/modules/329a736b-1700-43d4-9bf0-753cc461bebc/lessons/dc37fa92-75fd-4d41-b23e-9659dde80866/concepts/f7c2a82b-7a05-45ac-9e3b-b881a5fb29c1)

In [31]:

learnrate = 0.5
x = np.array([1, 2])
y = np.array(0.5)

# Initial weights
w = np.array([0.5, -0.5])

# Calculate one gradient descent step for each weight
# TODO: Calculate output of neural network
# x is (2,) and w is (2, 1), which numpy treats as elementwise multiplication
nn_output = sigmoid(np.dot(x, w)) 

# TODO: Calculate error of neural network
error = y - nn_output

# TODO: Calculate the error term
# Remember, the error term is a convenience function used to combine several related steps in 
# the gradient descent optimization, which can then be used in the 
# overall gradient descent equation calculated below
error_term = error * (sigmoid(np.dot(x, w)) * (1-sigmoid(np.dot(x, w))))

# TODO: Calculate change in weights
# 
del_w = [learnrate * error_term * x[0], learnrate * error_term * x[1]]

print('Neural Network output:')
print(nn_output)
print('Amount of Error:')
print(error)
print('Change in Weights:')
print(del_w)

Neural Network output:
0.377540668798
Amount of Error:
0.122459331202
Change in Weights:
[0.014389198713080189, 0.028778397426160379]


# Implementing Gradient Descent
[Lesson 13](https://classroom.udacity.com/nanodegrees/nd101/parts/2a9dba0b-28eb-4b0e-acfa-bdcf35680d90/modules/329a736b-1700-43d4-9bf0-753cc461bebc/lessons/dc37fa92-75fd-4d41-b23e-9659dde80866/concepts/4b167ce0-9d45-45e1-bfe6-891b2c68ac94)

In [32]:
from data_prep import features, targets, features_test, targets_test

In [43]:
# Use to same seed to make debugging easier
np.random.seed(42)

n_records, n_features = features.shape
last_loss = None

# Initialize weights
weights = np.random.normal(scale=1 / n_features**.5, size=n_features)
weights1 = weights[:, None]
weights2 = weights1.T

print("Weights", weights.shape)
print("Weights1", weights1.shape)
print("Weights2", weights2.shape)
print("Features", features.shape)

Weights (6,)
Weights1 (6, 1)
Weights2 (1, 6)
Features (360, 6)


In [47]:
# Exploring Matrix math
# in 2d Matrices, the number of columns (attributes)on the left
# must match the number of records(row) on the right
# Matrix multiplication transforms
dot = np.dot(features, weights) # (360, 6)x(6,)
dot1 = np.dot(features, weights1)  #(360, 6)x(6, 1)
dot2 = np.dot(weights2, features.T)  #(1, 6)x(6, 360)
print("dot", dot.shape)
print("dot1", dot1.shape)
print("dot2", dot2.shape)

dot (360,)
dot1 (360, 1)
dot2 (1, 360)


In [48]:
# Transposing the feature set only works on the right side of the dot product?
# Because this is now considered 6 records with 360 attributes each
features1 = features.T
print(features1.shape)

(6, 360)


In [34]:
# Neural Network hyperparameters
epochs = 1000
learnrate = 0.5

for e in range(epochs):
    del_w = np.zeros(weights.shape)
    for x, y in zip(features.values, targets):
        # Loop through all records, x is the input, y is the target

        # TODO: Calculate the output
        # For every record, multiply and add the attributes
        output = sigmoid(np.dot(x, weights))

        # TODO: Calculate the error
        error = y - output

        # TODO: Calculate the error gradient
        error_term = error * (output * (1 - output))

        # TODO: Calculate the change in weights for this sample
        #       and add it to the total weight change
        del_w += error_term * x

    # TODO: Update weights using the learning rate and the average change in weights
    weights += (learnrate * del_w) / len(targets)

    # Printing out the mean square error on the training set
    if e % (epochs / 10) == 0:
        out = sigmoid(np.dot(features, weights))
        loss = np.mean((out - targets) ** 2)
        if last_loss and last_loss < loss:
            print("Train loss: ", loss, "  WARNING - Loss Increasing")
        else:
            print("Train loss: ", loss)
        last_loss = loss


# Calculate accuracy on test data
tes_out = sigmoid(np.dot(features_test, weights))
predictions = tes_out > 0.5
accuracy = np.mean(predictions == targets_test)
print("Prediction accuracy: {:.3f}".format(accuracy))

Weights (6,)
Features (360, 6)
Train loss:  0.2627609385
Train loss:  0.209286194093
Train loss:  0.200842929081
Train loss:  0.198621564755
Train loss:  0.197798513967
Train loss:  0.197425779122
Train loss:  0.197235077462
Train loss:  0.197129456251
Train loss:  0.197067663413
Train loss:  0.197030058018
Prediction accuracy: 0.725


# Multilayer Perceptrons
[Lesson 14](https://classroom.udacity.com/nanodegrees/nd101/parts/2a9dba0b-28eb-4b0e-acfa-bdcf35680d90/modules/329a736b-1700-43d4-9bf0-753cc461bebc/lessons/dc37fa92-75fd-4d41-b23e-9659dde80866/concepts/7d0a1958-be25-4efb-ab81-360d9aa4f764)

In [49]:
# Network size
N_input = 4
N_hidden = 3
N_output = 2

np.random.seed(42)
# Make some fake data
X = np.random.randn(4)
print("Features", X.shape)
weights_input_to_hidden = np.random.normal(0, scale=0.1, size=(N_input, N_hidden))
weights_hidden_to_output = np.random.normal(0, scale=0.1, size=(N_hidden, N_output))
print("Input Weights:", weights_input_to_hidden.shape)
print("Hidden Weights:", weights_hidden_to_output.shape)

Features (4,)
Input Weights: (4, 3)
Hidden Weights: (3, 2)


In [50]:
# TODO: Make a forward pass through the network

hidden_layer_in = np.dot(X, weights_input_to_hidden)
hidden_layer_out = sigmoid(hidden_layer_in)

print('Hidden-layer Output:')
print(hidden_layer_out.shape)
print(hidden_layer_out)

output_layer_in = np.dot(hidden_layer_out, weights_hidden_to_output)
output_layer_out = sigmoid(output_layer_in)

print('Output-layer Output:')
print(output_layer_out.shape)
print(output_layer_out)

Hidden-layer Output:
(3,)
[ 0.41492192  0.42604313  0.5002434 ]
Output-layer Output:
(2,)
[ 0.49815196  0.48539772]


# Backpropagation
[Lesson 15](https://classroom.udacity.com/nanodegrees/nd101/parts/2a9dba0b-28eb-4b0e-acfa-bdcf35680d90/modules/329a736b-1700-43d4-9bf0-753cc461bebc/lessons/dc37fa92-75fd-4d41-b23e-9659dde80866/concepts/87d85ff2-db15-438b-9be8-d097ea917f1e)

In [53]:
x = np.array([0.5, 0.1, -0.2])
target = 0.6
learnrate = 0.5

weights_input_hidden = np.array([[0.5, -0.6],
                                 [0.1, -0.2],
                                 [0.1, 0.7]])

weights_hidden_output = np.array([0.1, -0.3])

## Forward pass
hidden_layer_input = np.dot(x, weights_input_hidden)
hidden_layer_output = sigmoid(hidden_layer_input)
print("Hidden Shape", hidden_layer_output.shape)
print("Hidden Output", hidden_layer_output)

output_layer_in = np.dot(hidden_layer_output, weights_hidden_output)
output = sigmoid(output_layer_in)
print("Output Shape:", output.shape)
print("Output", output)

Hidden Shape (2,)
Hidden Output [ 0.55971365  0.38698582]
Output Shape: ()
Output 0.48497343085


In [55]:
## Backwards pass
## TODO: Calculate error
error = (target-output)

# TODO: Calculate error gradient for output layer
del_err_output = error * (output * (1-output))
print("Error Grad Out", del_err_output)
# TODO: Calculate error gradient for hidden layer
del_err_hidden = np.dot(del_err_output, weights_hidden_output) *\
                    (hidden_layer_output * (1-hidden_layer_output))
print("Error Grad Out", del_err_hidden)
# TODO: Calculate change in weights for hidden layer to output layer
delta_w_h_o = learnrate * del_err_output * hidden_layer_output

# TODO: Calculate change in weights for input layer to hidden layer
delta_w_i_h = learnrate * del_err_hidden * x[:,  None]

print('Change in weights for hidden layer to output layer:')
print(delta_w_h_o)
print('Change in weights for input layer to hidden layer:')
print(delta_w_i_h)

Error Grad Out 0.0287306695435
[ 0.00070802 -0.00204471]
Change in weights for hidden layer to output layer:
[ 0.00804047  0.00555918]
Change in weights for input layer to hidden layer:
[[  1.77005547e-04  -5.11178506e-04]
 [  3.54011093e-05  -1.02235701e-04]
 [ -7.08022187e-05   2.04471402e-04]]


# Implementing Backpropagation
[Lesson 16](https://classroom.udacity.com/nanodegrees/nd101/parts/2a9dba0b-28eb-4b0e-acfa-bdcf35680d90/modules/329a736b-1700-43d4-9bf0-753cc461bebc/lessons/dc37fa92-75fd-4d41-b23e-9659dde80866/concepts/b2bbdc9a-9f48-4735-b408-71cf67f5b000)

In [56]:
# Hyperparameters
n_hidden = 2  # number of hidden units
epochs = 900
learnrate = 0.005

n_records, n_features = features.shape
last_loss = None
# Initialize weights
weights_input_hidden = np.random.normal(scale=1 / n_features ** .5,
                                        size=(n_features, n_hidden))
weights_hidden_output = np.random.normal(scale=1 / n_features ** .5,
                                         size=n_hidden)

for e in range(epochs):
    del_w_input_hidden = np.zeros(weights_input_hidden.shape)
    del_w_hidden_output = np.zeros(weights_hidden_output.shape)
    for x, y in zip(features.values, targets):
        ## Forward pass ##
        # TODO: Calculate the output
        # (Num Records, Features)x(Num features, Transformations)
        hidden_input = np.dot(x, weights_input_hidden)  
        hidden_output = sigmoid(hidden_input)
       
        output = sigmoid(np.dot(hidden_output, weights_hidden_output))
        
        ## Backward pass ##
        # TODO: Calculate the network's prediction error
        error = y - output

        # TODO: Calculate error gradient in output unit
        # (this is the error term from before)
        output_error = error * output * (1 - output)

        # TODO: propagate errors to hidden layer
        hidden_error = np.dot(output_error, weights_hidden_output) *\
							hidden_output * (1 - hidden_output)

        # TODO: Update the change in weights
        del_w_hidden_output += output_error * hidden_output
        del_w_input_hidden += hidden_error * x[:, None]

    # TODO: Update weights
    weights_input_hidden += learnrate * del_w_input_hidden / n_records
    weights_hidden_output += learnrate * del_w_hidden_output / n_records

    # Printing out the mean square error on the training set
    if e % (epochs / 10) == 0:
        hidden_output = sigmoid(np.dot(x, weights_input_hidden))
        out = sigmoid(np.dot(hidden_output,
                             weights_hidden_output))
        loss = np.mean((out - targets) ** 2)

        if last_loss and last_loss < loss:
            print("Train loss: ", loss, "  WARNING - Loss Increasing")
        else:
            print("Train loss: ", loss)
        last_loss = loss

# Calculate accuracy on test data
hidden = sigmoid(np.dot(features_test, weights_input_hidden))
out = sigmoid(np.dot(hidden, weights_hidden_output))
predictions = out > 0.5
accuracy = np.mean(predictions == targets_test)
print("Prediction accuracy: {:.3f}".format(accuracy))


Train loss:  0.23743809064
Train loss:  0.236854616464
Train loss:  0.23628925276
Train loss:  0.235741461878
Train loss:  0.235210719736
Train loss:  0.234696515706
Train loss:  0.234198352486
Train loss:  0.233715745948
Train loss:  0.233248224968
Train loss:  0.23279533124
Prediction accuracy: 0.750
