# A Few Basic Examples of Neural Networks

### Short Version

In [2]:
import numpy as np
X = np.array([ [0,0,1],[0,1,1],[1,0,1],[1,1,1] ])
y = np.array([[0,1,1,0]]).T
syn0 = 2*np.random.random((3,4)) - 1
syn1 = 2*np.random.random((4,1)) - 1
for j in range(60000):
    l1 = 1/(1+np.exp(-(np.dot(X,syn0))))
    l2 = 1/(1+np.exp(-(np.dot(l1,syn1))))
    l2_delta = (y - l2)*(l2*(1-l2))
    l1_delta = l2_delta.dot(syn1.T) * (l1 * (1-l1))
    syn1 += l1.T.dot(l2_delta)
    syn0 += X.T.dot(l1_delta)

### Longer Version w/ Explanation

In [3]:
import numpy as np

# sigmoid function
def nonlin(x,deriv=False):
    if(deriv==True):
        return x*(1-x)
    return 1/(1+np.exp(-x))
    
# input dataset
X = np.array([  [0,0,1],
                [0,1,1],
                [1,0,1],
                [1,1,1] ])
    
# output dataset            
y = np.array([[0,0,1,1]]).T

# seed random numbers to make calculation
# deterministic (just a good practice)
np.random.seed(1)

# initialize weights randomly with mean 0
syn0 = 2*np.random.random((3,1)) - 1

for iter in range(10000):

    # forward propagation
    l0 = X
    l1 = nonlin(np.dot(l0,syn0))

    # how much did we miss?
    l1_error = y - l1

    # multiply how much we missed by the 
    # slope of the sigmoid at the values in l1
    l1_delta = l1_error * nonlin(l1,True)

    # update weights
    syn0 += np.dot(l0.T,l1_delta)

print ("Output After Training:")
print (l1)
print(syn0)

Output After Training:
[[0.00966449]
 [0.00786506]
 [0.99358898]
 [0.99211957]]
[[ 9.67299303]
 [-0.2078435 ]
 [-4.62963669]]


#### Explanation

<table style='font-size:15px'>
    <tr>
        <th>Variable</th>
        <th>Definition</th>
    </tr>
    <tr>
        <td>X</td>
        <td>Input training dataset where each row is a training example</td>
    </tr>
    <tr>
        <td>y</td>
        <td>Output training dataset where each row is a training example</td>
    </tr>
    <tr>
        <td>l0</td>
        <td>First Layer of the Network, specified by training data</td>
    </tr>
    <tr>
        <td>l1</td>
        <td>Second Layer of the Network, also known as the hidden layer</td>
    </tr>
    <tr>
        <td>syn0</td>
        <td>Synapse 0, the set of weights between and connecting l0 and l1</td>
    </tr>
    <tr>
        <td>*</td>
        <td>Elementwise multiplication, aka two vectors multiplying 1-to-1 producing a vector of equal size</td>
    </tr>
    <tr>
        <td>-</td>
        <td>Elementwive subtraction, aka two vectors subtracting 1-to-1 producing a vector of equal size</td>
    </tr>
    <tr>
        <td>X.dot(y)</td>
        <td>Given X and y as vectors, this would be the vector dot product. If they are matricies, it would be a matrix-matrix multiplication. If only one is a matrix, then it's matrix-vector multiplication.</td>
    </tr>
</table>

In [4]:
def nonlin(x,deriv=False):
    if(deriv==True):
        return x*(1-x)
    return 1/(1+np.exp(-x))

<p style='font-size:15px'>The above function is the "nonlinearity" function, which can convert numbers to probabilities for us. It can also return the derivative of the sigmoid, when deriv=True - this is very easy, given the derivate of the sigmoid for f(x) is just x\*(x-1), making things very efficient. (The derivative is of course the slope of the sigmoid at a point on the line; practically, the derivative gives the slope of the sigmoid for point x as x\*(x-1). The sigmoid's function is f(x) = 1/(1+e^(-x))</p>

In [5]:
# input dataset
X = np.array([  [0,0,1],
                [0,1,1],
                [1,0,1],
                [1,1,1] ])
    
# output dataset            
y = np.array([[0,0,1,1]]).T
print(X)
print(y)

[[0 0 1]
 [0 1 1]
 [1 0 1]
 [1 1 1]]
[[0]
 [0]
 [1]
 [1]]


<p style='font-size:15px'>The above lines of code are simply initializing the input and output datasets. The first array is the input dataset, and the second array is the output dataset. Each row of the input and outputs corresponds to each other - row one for X and y are together the inputs which correspond to the output; this pattern continues on.</p>

In [6]:
# seed random numbers to make calculation
# deterministic (just a good practice)
np.random.seed(1)

<p style='font-size:15px'>The above code is useful when randomizing but wanting consistent results, because it makes the np.random.random return randomly distributed but consistently randomly distributed numbers.<b>A good practice in general.</b></p>

In [12]:
# initialize weights randomly with mean 0
syn0 = 2*np.random.random((3,1)) - 1
print(syn0)

[[ 0.34093502]
 [-0.1653904 ]
 [ 0.11737966]]


<p style='font-size:15px'>This initializes synapse 0 to a shape (3, 1) array of weights. Since the second layer is the output, which is only 1 node, all we need is a set of weights to connect the 3 nodes of the first layer to the 1 node of the second, hence 3 weights. The weights themselves are random and distributed around a mean of 0. <b>Initializing weights with a mean of zero is a best practice</b></p>

In [13]:
for iter in range(10000):

    # forward propagation
    l0 = X
    l1 = nonlin(np.dot(l0,syn0))

    # how much did we miss?
    l1_error = y - l1

    # multiply how much we missed by the 
    # slope of the sigmoid at the values in l1
    l1_delta = l1_error * nonlin(l1,True)

    # update weights
    syn0 += np.dot(l0.T,l1_delta)

<body>
    <p style='font-size:15px'>Finally, the actual training part of the code is above. The very first line just loops the code 10,000 times to optimize the code to the dataset.</p>
    <p style='font-size:15px'>The next line of code set l0 to X, blatently setting up the first layer as the inputs.</p>
    <p style='font-size:15px'>The next line of code lets up the second layer, l0, as the dot product of l0 and syn0, put through the sigmoid function. This does the multiplication/propagation while also getting the probabiliy through the sigmoid.</p>
    <p style='font-size:15px'>The next line calculates the error between the prediction and actual output. This is useful for adjusting later.</p>
    <p style='font-size:15px'>The next line is the part where the recommended adjustment is calculated, aka the l1 delta (change in l1). This is simply the previous line's error calculation multiplied by the derivate of the first layer's values. This is (I think) a one on one multiplication, with the first row's values mutliplyed by each other, so-on and so-forth.</p>
    <p style='font-size:15px'>This final line updates syn0, or the weights. It gets the update per weight by performing the dot product of l0.T and l1_delta; in other words, the weights are updated by the dot product of the transposed inputs and the recommended adjustment.</p>
</body>

## Following Youtube Series
https://www.youtube.com/playlist?list=PLQVvvaa0QuDcjD5BAw2DxE6OF2tius3V3

#### Neuron, Essentially

In [51]:
inputs = [1, 2, 3, 2.5]
weights = [0.2, 0.8, -0.5, 1]
bias = 2


output = inputs[0]*weights[0] + inputs[1]*weights[1] + inputs[2]*weights[2] + inputs[3]*weights[3] + bias

print(output)

4.8


#### Now, An Entire Layer

In [52]:
inputs = [1, 2, 3, 2.5]

weights1 = [0.2,0.8, -0.5, 1]
weights2 = [0.5,-0.91, 0.26, -0.5]
weights3 = [-0.26, -0.27, 0.17, 0.87]

bias1 = 2
bias2 = 3
bias3 = 0.5

output = [inputs[0]*weights1[0] + inputs[1]*weights1[1] + inputs[2]*weights1[2] + inputs[3]*weights1[3] + bias1,
          inputs[0]*weights2[0] + inputs[1]*weights2[1] + inputs[2]*weights2[2] + inputs[3]*weights2[3] + bias2,
          inputs[0]*weights3[0] + inputs[1]*weights3[1] + inputs[2]*weights3[2] + inputs[3]*weights3[3] + bias3]

print(output)

[4.8, 1.21, 2.385]


#### Using Numpy

In [53]:
# cleaner version in python
inputs = [1, 2, 3, 2.5]

weights = [[0.2,0.8, -0.5, 1],
           [0.5,-0.91, 0.26, -0.5],
           [-0.26, -0.27, 0.17, 0.87]]


biases = [2, 3, 0.5]

layer_outputs = []

for neuron_weights, neuron_bias in zip(weights, biases):
    neuron_output = 0
    for n_input, weight in zip(inputs, neuron_weights):
        neuron_output += n_input*weight
    neuron_output += neuron_bias
    layer_outputs.append(neuron_output)

print(layer_outputs)

[4.8, 1.21, 2.385]


In [54]:
#single neuron - undestand dot product
import numpy as np

inputs = [1, 2, 3, 2.5]

weights = [0.2,0.8, -0.5, 1]

bias = 2

# in the case of vector * vector dot product, the order of weights & inputs doesn't matter -
# however, with matrix * vector dot product (which it is with mutiple neurons) it does
output = np.dot(weights, inputs) + bias

print(output)

4.8


In [55]:
# layer dot product
inputs = [1, 2, 3, 2.5]                   # the number of inputs is the same

weights = [[0.2,0.8, -0.5, 1],            # each one of these list of numbers within
           [0.5,-0.91, 0.26, -0.5],       # the list are the weights for one neuron
           [-0.26, -0.27, 0.17, 0.87]]    # in total 4 weights per input, for each neuron


biases = [2, 3, 0.5]                      # also need a bias per neuron, hence the list of biases

# doing np.dot(inputs, weights) doesn't even work - throws error b/c can't multiply vector by matrix,
# has to be the other way around
output = np.dot(weights, inputs) + biases

print(output)

[4.8   1.21  2.385]


#### Batches, Layers, and Objects

In [56]:
# using batches, 1 layer
inputs = [[1, 2, 3, 2.5],                 # now we are passing multiple input lists
          [2.0, 5.0, -1.0, 2.0],
          [-1.5, 2.7, 3.3, -0.8]]

weights = [[0.2,0.8, -0.5, 1],
           [0.5,-0.91, 0.26, -0.5],
           [-0.26, -0.27, 0.17, 0.87]]


biases = [2, 3, 0.5]

# Here we need to do inputs first, and then weights because we are transposing the weights
# so that matrix-matrix multiplication works - need to do a (3, 4) array times a (4, 3) array
outputs = np.dot(inputs, np.array(weights).T) + biases      # to return the final (3, 3) array

print(outputs)

[[ 4.8    1.21   2.385]
 [ 8.9   -1.81   0.2  ]
 [ 1.41   1.051  0.026]]


In [57]:
# using batches, 2 layers
inputs = [[1, 2, 3, 2.5],
          [2.0, 5.0, -1.0, 2.0],
          [-1.5, 2.7, 3.3, -0.8]]

weights = [[0.2,0.8, -0.5, 1],
           [0.5,-0.91, 0.26, -0.5],
           [-0.26, -0.27, 0.17, 0.87]]

biases = [2, 3, 0.5]

weights2 = [[0.1, -0.14, 0.5],
           [-0.5, 0.12, 0.33],
           [-0.44, 0.73, -0.13]]

biases2 = [-1, 2, -0.5]


layer1_outputs = np.dot(inputs, np.array(weights).T) + biases
layer2_outputs = np.dot(layer1_outputs, np.array(weights2).T) + biases2

print(layer2_outputs)

[[ 0.5031   0.53225 -2.03875]
 [ 0.2434  -2.6012  -5.7633 ]
 [-0.99314  1.4297  -0.35655]]


In [71]:
np.random.seed(0)

# now as an object (with inputs as X - training dataset)
X = [[1, 2, 3, 2.5],
     [2.0, 5.0, -1.0, 2.0],
     [-1.5, 2.7, 3.3, -0.8]]

class Layer_Dense:
    def __init__(self, n_inputs, n_neurons):
        self.weights = 0.1*np.random.randn(n_inputs, n_neurons)
        self.biases = np.zeros((1, n_neurons))
    def forward(self, inputs):
        self.output = np.dot(inputs, self.weights) + self.biases
    
    
layer1 = Layer_Dense(4, 5)
layer2 = Layer_Dense(5, 2)

layer1.forward(X)
#print(layer1.output)
layer2.forward(layer1.output)
print(layer2.output)

[[ 0.148296   -0.08397602]
 [ 0.14100315 -0.01340469]
 [ 0.20124979 -0.07290616]]


#### Finally: Activation Function

In [74]:
# rectified linear example (ReLU)

np.random.seed(0)


X = [[1, 2, 3, 2.5],
     [2.0, 5.0, -1.0, 2.0],
     [-1.5, 2.7, 3.3, -0.8]]

inputs = [0, 2, -1, 3.3, -2.7, 1.1, 2.2, -100]
output = []

'''
for i in inputs:
    if i>0:
        output.append(i)
    if i<=0:
        output.append(0)
'''
# or instead:
for i in inputs:
    output.append(max(0, i))

print(output)

[0, 2, 0, 3.3, 0, 1.1, 2.2, 0]


In [None]:
# now adding the activation function as an object
np.random.seed(0)

X = [[1, 2, 3, 2.5],
     [2.0, 5.0, -1.0, 2.0],
     [-1.5, 2.7, 3.3, -0.8]]

class Layer_Dense:
    def __init__(self, n_inputs, n_neurons):
        self.weights = 0.1*np.random.randn(n_inputs, n_neurons)
        self.biases = np.zeros((1, n_neurons))
    def forward(self, inputs):
        self.output = np.dot(inputs, self.weights) + self.biases
    
class Activation_ReLU:
    def forward(self, inputs):
        self.outputs = np.max(0, inputs)


layer1 = Layer_Dense(4, 5)
layer2 = Layer_Dense(5, 2)

layer1.forward(X)
#print(layer1.output)
layer2.forward(layer1.output)
print(layer2.output)