In [2]:
# Package imports
import matplotlib
import matplotlib.pyplot as plt
import sklearn
import sklearn.datasets
import pandas as pd
import numpy as np


In [3]:
# Use pandas to read the CSV file as a dataframe
df = pd.read_csv("moons400.csv")
# The y values are those labelled 'Class': extract their values
y = df['Class'].values

# The x values are all other columns
del df['Class']    # drop the 'Class' column from the dataframe
X = df.as_matrix() # convert the remaining columns to a numpy array
# Some examples of working with the data, to look at rows/columns
print ("len(X):", len(X))            # outer array: one per sample
print ("len(X[0]):", len(X[0]))      # each inner array is the attributes of one sample
print ("len(X[:,0]):", len(X[:,0]))  # select column 0 from array

# np.shape returns all dimensions of the array
(nsamples, nattribs) = np.shape(X)
print ("X: nsamples =", nsamples, ", nattribs =", nattribs)

len(X): 400
len(X[0]): 2
len(X[:,0]): 400
X: nsamples = 400 , nattribs = 2


In [4]:
datasubset_x = X[0:1]
(dsamples, dattribs) = np.shape(datasubset_x)
print("datasubset: dsamples =", dsamples, ", dattribs =", dattribs)
print(datasubset_x)


datasubset: dsamples = 1 , dattribs = 2
[[ 2.07106946  0.41152931]]


In [5]:
datasubset_y = y[0:1]
print(datasubset_y)

[1]


In [6]:
#layer 1 weights
W11_1 = -0.1
W12_1 = -0.1
W21_1 = -0.1
W22_1 = -0.1
#layer 1 bias
b1_1 = 1
b2_1 = 0.9
#layer 2 weights
W11_2 = 0.1
W12_2 = 0.1
#layer 2 bias
b_2 = 0.5


In [7]:
# a1_2 = f(w11_1*x1 + w12_1*x2 +b1_1 )
#print(datasubset_x[0,0])

a1_2 = (W11_1 * datasubset_x[0,0] + W12_1 * datasubset_x[0,1] + b1_1 * 1)
print("a1_2: ",a1_2)

a1_2 = 1/1 - np.exp(a1_2)
print("a1_2exp: ",a1_2)

a2_2 = (W21_1 * datasubset_x[0,0] + W22_1 * datasubset_x[0,1] + b2_1 * 1)
print("a2_2: ",a2_2)
a2_2 = 1/1 - np.exp(a2_2)
print("a2_2exp: ",a2_2)


a1_3 = (W11_2 * a1_2 + W12_2 * a2_2 + b_2 * 1 )
print("a1_3: ",a1_3)
#hW,b(X) = a1_3
a1_3 = 1/1 - np.exp(a1_3)
print("a1_3exp: ",a1_3)


a1_2:  0.751740122949
a1_2exp:  -1.12068706395
a2_2:  0.651740122949
a2_2exp:  -0.918877007408
a1_3:  0.296043592864
a1_3exp:  -0.344528767414


In [8]:
# Calculate the Total Error SSE = ∑ 1/2(Y-YP)^2 
#E_total = 1/2(target_01 - out_01)^2 

# sum of squared errors of prediction
sse = 1/2 * np.power((datasubset_y - a1_3), 2)
print("SSE: ",sse)



SSE:  [ 0.9038788]


In [17]:
#The Backwards back propagation

# The goal with backpropagation is to update each of the weights in the network so that 
# they cause the actual output to be closer the target output, thereby minimizing the error 
# for each output neuron and the network as a whole.

#Consider W11_2. We want to know how much a change in W11_2 affects the total error, 
#aka the partial derivative of a1_3 with respect to W11_2 or the gradient with respect to W11_2

#First, how much does the total error change with respect to the output?
#E_total = 1/2(target_01 - out_01)^2 + 1/2(target_02 - out_02)^2 
print("SSE: ",sse)

# The partial derivitive of the total error with respect to the output a1_3
# ∂SSE/∂a1_3 = 2 * 1/2(target- a1_3)^2-1 * -1 + 0 
derivitive_a1_3 = 2 * 1/2 * np.power((datasubset_y - a1_3), 2-1) * -1 + 0
print("derivitive_a1_3: ",derivitive_a1_3)

## results in same output as above
pd_sse_a1_3 = -(datasubset_y - a1_3)
print("Partial derivitive of total error with respect to the output of a1_3: ",pd_sse_a1_3)

#The partial derivative of the logistic function is the output multiplied by 1 minus the output:
# partial derivitive of output a1_3 with respect to the net a1_3
# out_o1 (1 - out_01)
# a1_3 (1- a1_3)
pd_a1_3 = a1_3 * (1 - a1_3)
print("partial_derivitive_output a1_3: ",pd_a1_3)


# ∂SSE / ∂a1_3 = sse(1 - sse)
derivitive_sse = sse * (1 - sse) 

print("derivitive_sse: ",derivitive_sse)

# How much does the total net input of a1_3 change with respect to W11_2
# ∂a1_3/∂W11_2 = 1 * a1_2 * W11_2^(1-1) + 0 + 0
pd_a1_2 = 1 * a1_2 * np.power(W11_2,(1-1)) + 0 + 0
print("Partial derivitive of a1_2 with respect to w11_2: ",pd_a1_2)

# putting it all together
# ∂SSE/∂W11_2 = ∂SSE / ∂a1_3 * ∂a1_3/∂net_a1_3 * ∂net_a1_3/∂W11_2
pd_sse_w11_2 = pd_sse_a1_3 * pd_a1_3 * pd_a1_2
print("Partial derivitive of total SSE with respect to w11_2: ",pd_sse_w11_2)


SSE:  [ 0.9038788]
derivitive_a1_3:  [-1.34452877]
Partial derivitive of total error with respect to the output of a1_3:  [-1.34452877]
partial_derivitive_output a1_3:  -0.46322883899
derivitive_sse:  [ 0.08688191]
Partial derivitive of a1_2 with respect to w11_2:  -1.12068706395
Partial derivitive of total SSE with respect to w11_2:  [-0.69799136]


In [None]:
#delta rule
#delta_o1 = -(target_{o1} - out_{o1}) * out_{o1}(1 - out_{o1})

#partial E_total\partial w_5 = delta_o1*out_h1