# Perceptrons

In [None]:
from typing import List

# the simplest from-scratch approach is to represent vectors as lists of numbers
Vector = List[float]

def dot(v: Vector, w: Vector) -> float:

    assert len(v) == len(w) # vectors mu be same length

    return sum(v_i * w_i for v_i, w_i in zip(v, w))

The perceptron computes a weighted sum of its inputs and “fires” if that weighted sum is 0 or greater:

In [None]:
def step_function(x: float) -> float:
    return 1.0 if x >= 0 else 0.0


def perceptron_output(weights: Vector, bias: float, x: Vector) -> float:
    """Returns 1 if the perceptron 'fires', 0 if not"""
    calculation = dot(weights, x) + bias
    return step_function(calculation)

# Feed-Forward Neural Networks

In order to train a neural network, we need to use calculus, and in order to use calculus, we need smooth functions.
`step_function` isn’t even continuous, and sigmoid is a good smooth approximation of it.

In [None]:
import math

def sigmoid (t: float) -> float:
    return 1 / (1 + math.exp(-t))

In [None]:
import numpy as np

def sigmoid2 (t: float) -> float:
    return 1 / (1 + np.exp(-t))

We can represent a neural network as a list of layers, where each layer is just a list of the neurons (vectors of weights) in that layer.

Neural Network = list (layers) of lists (neurons) of vectors (weights).

In [None]:
def neuron_output(weights: Vector, inputs: Vector) -> float:
        return sigmoid(dot(weights, inputs))

## Feed-Forward

In [None]:
def feed_forward(neural_network: List[List[Vector]],
                 input_vector: Vector) -> List[Vector]:
    """
    Feeds the input vector through the neural network.
    Returns the outputs of all layers (not just the last one).
    """
    outputs: List[Vector] = []

    for layer in neural_network:
        input_with_bias = input_vector + [1]              # Add a constant.
        output = [neuron_output(neuron, input_with_bias)  # Compute the output
                  for neuron in layer]                    # for each neuron.
        outputs.append(output)                            # Add to results.

        # Then the input to the next layer is the output of this one
        input_vector = output

    return outputs

In [None]:
xor_network = [# hidden layer
               [[20., 20, -30],      # 'and' neuron
                [20., 20, -10]],     # 'or'  neuron
               # output layer
               [[-60., 60, -30]]]

In [None]:
feed_forward(xor_network, [0, 0])

In [None]:
# feed_forward returns the outputs of all layers, so the [-1] gets the
# final output, and the [0] gets the value out of the resulting vector
feed_forward(xor_network, [0, 0])[-1][0]

In [None]:
feed_forward(xor_network, [0, 1])[-1][0]

## Backpropagation

We use data to train neural networks. The typical approach is an algorithm called backpropagation, 
which uses the gradient descent algorithm.

Imagine we have a training set that consists of input vectors and corresponding target
output vectors. For example, in our previous xor_network example, the input vector
[1, 0] corresponded to the target output [1]. Imagine that our network has some set
of weights. We then adjust the weights using the following algorithm:

1. Run feed_forward on an input vector to produce the outputs of all the neurons
in the network.
2. We know the target output, so we can compute a loss that’s the sum of the
squared errors.
3. Compute the gradient of this loss as a function of the output neuron’s weights.
4. “Propagate” the gradients and errors backward to compute the gradients with
respect to the hidden neurons’ weights.
5. Take a gradient descent step or update the weights.

In [None]:
def gradients(network: List[List[Vector]],
              input_vector: Vector,
              target_vector: Vector) -> List[List[Vector]]:
    """
    Given a neural network, an input vector, and a target vector,
    make a prediction and compute the gradient of the squared error
    loss with respect to the neuron weights.
    """
    # forward pass
    hidden_outputs, outputs = feed_forward(network, input_vector)

    # gradients with respect to output neuron pre-activation outputs
    # the derivative of a sigmoid function is output * (1 - output)
    output_deltas = [output * (1 - output) * (output - target)
                     for output, target in zip(outputs, target_vector)]

    # gradients with respect to output neuron weights
    output_grads = [[output_deltas[i] * hidden_output
                     for hidden_output in hidden_outputs + [1]]
                    for i, output_neuron in enumerate(network[-1])]

    # gradients with respect to hidden neuron pre-activation outputs
    # the derivative of a sigmoid function is output * (1 - output)
    hidden_deltas = [hidden_output * (1 - hidden_output) *
                         dot(output_deltas, [n[i] for n in network[-1]])
                     for i, hidden_output in enumerate(hidden_outputs)]

    # gradients with respect to hidden neuron weights
    hidden_grads = [[hidden_deltas[i] * input for input in input_vector + [1]]
                    for i, hidden_neuron in enumerate(network[0])]

    return [hidden_grads, output_grads]

Typically we run this algorithm many times for our entire training set until the network
converges.

In [None]:
def sum_of_squares(v: Vector) -> float:
    """Returns v_1 * v_1 + ... + v_n * v_n"""
    return dot(v, v)


def magnitude(v: Vector) -> float:
    """Returns the magnitude (or length) of v"""
    return math.sqrt(sum_of_squares(v))   # math.sqrt is square root function


def subtract(v: Vector, w: Vector) -> Vector:
    """Subtracts corresponding elements"""
    assert len(v) == len(w), "vectors must be the same length"

    return [v_i - w_i for v_i, w_i in zip(v, w)]


def distance(v: Vector, w: Vector) -> float:
    return magnitude(subtract(v, w))


def scalar_multiply(c: float, v: Vector) -> Vector:
    """Multiplies every element by c"""
    return [c * v_i for v_i in v]


def add(v: Vector, w: Vector) -> Vector:
    """Adds corresponding elements"""
    assert len(v) == len(w), "vectors must be the same length"

    return [v_i + w_i for v_i, w_i in zip(v, w)]

In [None]:
def gradient_step(v: Vector, gradient: Vector, step_size: float) -> Vector:
    """Moves `step_size` in the `gradient` direction from `v`"""
    assert len(v) == len(gradient)
    step = scalar_multiply(step_size, gradient)
    return add(v, step)

In [None]:
import tqdm

In [None]:
def main():
    import random
    random.seed(0)
    
    # training data
    xs = [[0., 0], [0., 1], [1., 0], [1., 1]]
    ys = [[0.], [1.], [1.], [0.]]
    
    # start with random weights
    network = [ # hidden layer: 2 inputs -> 2 outputs
                [[random.random() for _ in range(2 + 1)],   # 1st hidden neuron
                 [random.random() for _ in range(2 + 1)]],  # 2nd hidden neuron
                # output layer: 2 inputs -> 1 output
                [[random.random() for _ in range(2 + 1)]]   # 1st output neuron
              ]
        
    learning_rate = 1.0
    
    for epoch in tqdm.trange(20000, desc="neural net for xor"):
        for x, y in zip(xs, ys):
            gradients = gradients(network, x, y)
    
            # Take a gradient step for each neuron in each layer
            network = [[gradient_step(neuron, grad, -learning_rate)
                        for neuron, grad in zip(layer, layer_grad)]
                       for layer, layer_grad in zip(network, gradients)]