In [1]:
# ----------
# 
# In this exercise, you will put the finishing touches on a perceptron class.
#
# Finish writing the activate() method by using np.dot to compute signal
# strength and then add in a threshold for perceptron activation.
#
# ----------


import numpy as np


class Perceptron0(object):
    """
    This class models an artificial neuron with step activation function.
    """

    def __init__(self, weights = np.array([1]), threshold = 0):
        """
        Initialize weights and threshold based on input arguments. Note that no
        type-checking is being performed here for simplicity.
        """
        self.weights = weights
        self.threshold = threshold
    
    def activate(self,inputs):
        """
        Takes in @param inputs, a list of numbers equal to length of weights.
        @return the output of a threshold perceptron with given inputs based on
        perceptron weights and threshold.
        """ 

        # First calculate the strength with which the perceptron fires
        strength = np.dot(inputs,self.weights)
        
        # Then return 0 or 1 depending on strength compared to threshold  
        
        # return int(strength > self.threshold) # Fancy way casting boolean variables into int
        if strength > self.threshold:
            return 1
        else:
            return 0


def test():
    """
    A few tests to make sure that the perceptron class performs as expected.
    Nothing should show up in the output if all the assertions pass.
    """
    p1 = Perceptron0(np.array([1, 2]), 0.)
    assert p1.activate(np.array([ 1,-1])) == 0 # < threshold --> 0
    assert p1.activate(np.array([-1, 1])) == 1 # > threshold --> 1
    assert p1.activate(np.array([ 2,-1])) == 0 # on threshold --> 0
    return True

if test():
    print "All tests completed successfully"

All tests completed successfully


### Question:
What are the advantages of using some threshold and step function rather than just outputting the weighted inputs (dot product)?
### Answer(s)
There's not really one right answer here. One thought is that it creates discrete outputs needed for classification. It also creates a tunable hyperparameter that can control the sensitivity of the perceptron unit. More mathmatically, the threshold moves the position of the decision boundry. I'm sure there are other answers as well.

### Question
What _parameter_ is learnable in a perceptron? In other words, what can be modified to allow the perceptron or a network of perceptrons to model an arbitrary function. 
### Answer
Weights. We could modify the thresholds, but we can also include threshold changes in the weights.

### Question
What does the input to a network of perceptrons look like?
A) Tensor network of weights
B) Matrix of numerical values
C) Matrix of classifcations 
D) Matrix of numerical values and classifications for each row
### Answer
D) Matrix of numerical values and classifications for each row


### Question
Are Neural Networks used for classification or regression?
### Answer
Trick question, NN can be used for both

In [2]:
# ----------
#
# In this exercise, you will update the perceptron class so that it can update
# its weights.
#
# Finish writing the update() method so that it updates the weights according
# to the perceptron update rule. Updates should be performed online, revising
# the weights after each data point.
# 
# ----------


class Perceptron1(Perceptron0):
    """
    This class models an artificial neuron with step activation function.
    """

    def __init__(self, *args):
        """
        Initialize weights and threshold based on input arguments. Note that no
        type-checking is being performed here for simplicity.
        """
        super(Perceptron1,self).__init__(*args )


    def update(self, X, y, eta=.1):
        """
        Takes in a 2D array @param X consisting of a LIST of inputs and a
        1D array @param y, consisting of a corresponding list of expected
        outputs. Updates internal weights according to the perceptron training
        rule using these values and an optional learning rate, @param eta.
        """

        
        for i, x_i in enumerate(X):
            y_hat = self.activate(x_i)
            delta_w = (eta * (y[i] - y_hat)) * x_i
            self.weights = self.weights + delta_w

def test():
    """
    A few tests to make sure that the perceptron class performs as expected.
    Nothing should show up in the output if all the assertions pass.
    """
    def sum_almost_equal(array1, array2, tol = 1e-6):
        return sum(abs(array1 - array2)) < tol

    p1 = Perceptron1(np.array([1,1,1]),0)
    p1.update(np.array([[2,0,-3]]), np.array([1]))
    assert sum_almost_equal(p1.weights, np.array([1.2, 1, 0.7]))

    p2 = Perceptron1(np.array([1,2,3]),0)
    p2.update(np.array([[3,2,1],[4,0,-1]]),np.array([0,0]))
    assert sum_almost_equal(p2.weights, np.array([0.7, 1.8, 2.9]))

    p3 = Perceptron1(np.array([3,0,2]),0)
    p3.update(np.array([[2,-2,4],[-1,-3,2],[0,2,1]]),np.array([0,1,0]))
    assert sum_almost_equal(p3.weights, np.array([2.7, -0.3, 1.7]))
    return True
if test():
    print "All tests completed sucessfully"

All tests completed sucessfully


### Layered Network
Layered networks consist of some input layer, some number hidden nodes, and some output layer that outputs the classifcation or regression results. 

Given the network structure below with weights on the edges of the graph. What will be the output of this network.

![](Network1.png "NN")

```
[
[ [1], [2], [3] ]     # Input layer (these are not weights, but input values)
[[1,1,-5],[3,-4,2] ]  # Hidden layer
[ [2,-1] ]            # Output layer
]
```

## Answer
Inputs * hidden layer weights
$$ \begin{pmatrix} 1 & 2 & 3 \end{pmatrix}  \begin{pmatrix} 1  & 3 \\ 1  & -4 \\ -5 & 2 \end{pmatrix} = \begin{pmatrix} -12 & 1 \end{pmatrix} $$

hidden layer outputs * output layer weights = final output
$$ \begin{pmatrix} -12 & 1 \end{pmatrix}  \begin{pmatrix} 2 \\ -1  \end{pmatrix} = -25 $$



In [3]:
# ----------
#
# In this exercise, you will create a network of perceptrons that can represent
# the XOR function, using a network structure like those shown in the previous
# quizzes.
#
# You will need to do two things:
# First, create a network of perceptrons with the correct weights
# Second, define a procedure EvalNetwork() which takes in a list of inputs and
# outputs the value of this network.
#
# ----------

            
Network = [
    # input layer, declare input layer perceptrons here
    [ Perceptron0(np.array([1,1]),1)], \
    # output node, declare output layer perceptron here
    [ Perceptron0(np.array([2,2,-4]),1) ]
]

# Part 2: Define a procedure to compute the output of the network, given inputs
def EvalNetwork(inputValues, Network):
    
    # Be sure your output values are single numbers
    
    #return OutputValues
    inputValues = inputValues.tolist()
    print "Inputs", inputValues
    print "AND activation", Network[0][0].activate(inputValues)
    inputValues.append(Network[0][0].activate(inputValues))
    print "Inputs plus output from AND,", inputValues
    OutputValue = Network[1][0].activate(inputValues)
    print "Output Value:", OutputValue
    print '\n\n'
    return OutputValue
            
    # Be sure your output value is a single number


def test():
    """
    A few tests to make sure that the perceptron class performs as expected.
    """
    EvalNetwork(np.array([0,0]), Network) # 0 XOR 0 = 0
    EvalNetwork(np.array([0,1]), Network) # 0 XOR 1 = 1
    EvalNetwork(np.array([1,0]), Network) # 1 XOR 0 = 1
    EvalNetwork(np.array([1,1]), Network) # 1 XOR 1 = 0

test()


Inputs [0, 0]
AND activation 0
Inputs plus output from AND, [0, 0, 0]
Output Value: 0



Inputs [0, 1]
AND activation 0
Inputs plus output from AND, [0, 1, 0]
Output Value: 1



Inputs [1, 0]
AND activation 0
Inputs plus output from AND, [1, 0, 0]
Output Value: 1



Inputs [1, 1]
AND activation 1
Inputs plus output from AND, [1, 1, 1]
Output Value: 0





## Discretion

The outputs of perceptron units are discrete. Consider a network with the structure [2,2,1], that is 2 input nodes, two hidden nodes, and 1 output node. How many possible outputs to this network are there? _Hint: The answer is NOT two_

### Answer
4 - The output of the last node is a combination of the previous two hidden nodes, each of which can take on two possible values. Note that the output is not binary because each hidden node can take output any two numerical values (since they are combined with weights). Thus, there is a maximum of 4 differernt values that can be sent to the final node.

## Continuity

In [4]:

def activate(strength):
    # Try out different functions here. Input strength will be a number, with
    # another number as output.
    return np.power(strength,2)
    return 1/ (1 + np.exp(-strength)) # sigmoid
    return 2 / (1 + np.exp(-2*3)) -1  # tanh
    return 0 if strength < 0 else strength # ReLu

    
def activation_derivative(activate, strength):
    #numerically approximate
    return (activate(strength+1e-5)-activate(strength-1e-5))/(2e-5)


## Sigmoid

In [5]:
# ----------
# 
# As with the previous perceptron exercises, you will complete some of the core
# methods of a sigmoid unit class.
#
# There are two functions for you to finish:
# First, in activate(), write the sigmoid activation function.
# Second, in update(), write the gradient descent update rule. Updates should be
#   performed online, revising the weights after each data point.
# 
# ----------


class Sigmoid:
    """
    This class models an artificial neuron with sigmoid activation function.
    """

    def __init__(self, weights = np.array([1])):
        """
        Initialize weights based on input arguments. Note that no type-checking
        is being performed here for simplicity of code.
        """
        self.weights = weights

        # NOTE: You do not need to worry about these two attribues for this
        # programming quiz, but these will be useful for if you want to create
        # a network out of these sigmoid units!
        self.last_input = 0 # strength of last input
        self.delta      = 0 # error signal
    
    def logistic(self,x):
        return 1/ (1 + np.exp(-x))

    def activate(self, values):
        """
        Takes in @param values, a list of numbers equal to length of weights.
        @return the output of a sigmoid unit with given inputs based on unit
        weights.
        """
        
        # YOUR CODE HERE
        
        # First calculate the strength of the input signal.
        strength = np.dot(values, self.weights)
        self.last_input = strength
        
        # TODO: Modify strength using the sigmoid activation function and
        # return as output signal.
        # HINT: You may want to create a helper function to compute the
        #   logistic function since you will need it for the update function.
        
        return self.logistic(strength)
    
    def update(self, values, train, eta=.1):
        """
        Takes in a 2D array @param values consisting of a LIST of inputs and a
        1D array @param train, consisting of a corresponding list of expected
        outputs. Updates internal weights according to gradient descent using
        these values and an optional learning rate, @param eta.
        """

        # TODO: for each data point...
        for x_i, y_true in zip(values, train):
            # obtain the output signal for that point
            y_pred = self.activate(x_i)

            # YOUR CODE HERE
            ddx = y_pred * (1 - y_pred)
            delta_w = eta *(y_true - y_pred) * ddx * x_i
            self.weights = self.weights + delta_w

            # TODO: update self.weights based on learning rate, signal accuracy,
            # function slope (derivative) and input value
            

def test():
    """
    A few tests to make sure that the perceptron class performs as expected.
    Nothing should show up in the output if all the assertions pass.
    """
    def sum_almost_equal(array1, array2, tol = 1e-5):
        return sum(abs(array1 - array2)) < tol

    u1 = Sigmoid(weights=[3,-2,1])
    assert abs(u1.activate(np.array([1,2,3])) - 0.880797) < 1e-5
    
    u1.update(np.array([[1,2,3]]),np.array([0]))
    assert sum_almost_equal(u1.weights, np.array([2.990752, -2.018496, 0.972257]))

    u2 = Sigmoid(weights=[0,3,-1])
    u2.update(np.array([[-3,-1,2],[2,1,2]]),np.array([1,0]))
    assert sum_almost_equal(u2.weights, np.array([-0.030739, 2.984961, -1.027437]))
    return True
if  test():
    print "All tests completed sucessfully"

All tests completed sucessfully
