# Feed-forward and backprop from scratch

### Feed-forward

<div>
<img src="feedforward.png" width="500"/>
</div>

In [1]:
import matplotlib.pyplot as plt
import numpy as np

#### Step 1: Create training data

In [2]:
from sklearn.datasets import make_moons

In [3]:
X, y = make_moons(n_samples=50, noise=0.2, random_state=42)

#### Step 2: Take a look at the data

In [6]:
X.shape

(50, 2)

In [7]:
y.shape

(50,)

In [None]:
plt.scatter(X[:,0], X[:,1], c=y)

#### Step 3: Add a bias (parameter b) column to X

In [None]:
def add_bias(X):
    '???'

In [None]:
X = add_bias(X)

#### Step 4: calculate the sigmoid activation function

In [None]:
def sigmoid(x):
    '???'

In [None]:
a = np.array([-10.0, -1.0, 0.0, 1.0, 10.0])
expected = np.array([0.0, 0.27, 0.5, 0.73, 1.0])
assert np.all(sigmoid(a).round(2) == expected)

#### Step 5: Initialize the weights

In [None]:
n_neurons = 2

In [None]:
'???'

#### Step 6: Put it all together

In [None]:
def feed_forward(X, weights):

    """
    1. Multiply the input matrix X
       with the weights of the first layer.

    2. Apply the sigmoid function on the result.

    3. Append an extra column of ones to the result (i.e. the bias).

    4. Multiply the output of the previous step
       with the weights of the second (i.e. outer) layer.

    5. Apply the sigmoid function on the result.

    6. Return all intermediate results (i.e. anything that is outputted
       by an activation function).
    """

    return output1, output2

In [None]:
out1, out2 = feed_forward(X, w)
assert out1.shape == (50, 2)
assert out2.shape == (50, 1)

Xref = np.array([[1.0, 2.0, 1.0]])
whidden = np.array([[1.0, 2.0, 0.0],
                 [-1.0, -2.0, 0.0]
                    ]).T
wout = np.array([[1.0, -1.0, 0.5]]).T

out1, out2 = feed_forward(Xref, [whidden, wout])
assert np.all(out1.round(2) == np.array([[0.99, 0.01]]))
assert np.all(out2.round(2) == np.array([[0.82]]))

### Backpropagation

<div>
<img src="neuron_w_backprop.png" width="500"/>
</div>

**Let's talk about loss function!**

* Here we'll use log loss as a loss function:
$$ loss = -(y_{true} log(y_{pred}) + (1-y_{true}) log(1-y_{pred})) $$

#### Step 1: Run feed-forward and make sure it works

Check!

#### Step 2: Write a Loss Function

##### 2a: Log-loss

$$ loss = -(y_{true} log(y_{pred}) + (1-y_{true}) log(1-y_{pred})) $$

In [None]:
def log_loss(ytrue, ypred):
    loss = ___ 
    return loss

In [None]:
ytrue = np.array([0.0, 0.0, 1.0, 1.0])
ypred = np.array([0.01, 0.99, 0.01, 0.99])
expected = np.array([0.01, 4.61, 4.61, 0.01])
assert np.all(log_loss(ytrue, ypred).round(2) == expected)

##### 2b: Log-loss derivative

_Formula_

In [None]:
def log_loss_deriv(ytrue, ypred):
    loss_deriv = ___  
    return loss_deriv

In [None]:
a = np.array([0.5, 0.3, 0.99, 0.2])
b = np.array([0.4, 0.2, 0.10, 0.3])
expected = np.array([-0.42, -0.62, -9.89, 0.48])
assert np.all(log_loss_deriv(a, b).round(2) == expected)

##### Extra — 2c: Sigmoid derivative

_Formula_

In [None]:
def sigmoid_deriv(X):
    return ___

#### Step 3: Calculate initial loss

In [None]:
out1, out2 = feed_forward(X, w)
ytrue = y.reshape(-1, 1)
log_loss(___, ___) #which arrays do we need to compare?

In [None]:
loss = log_loss(ytrue, out2)

#### Step 4: Write a backpropagation function

Fill in the blanks of the following function, which transcribes the equations from earlier (equations A - E) to run one iteration of the backpropagation algorithm. It takes in a handful of arguments:
* the initial weights,
* the outputs from the feed-forward process (i.e. both the hidden output and the final output),
* the true labels,
* the input data,
* and the learning rates (we’ll have a separate learning rate for each layer of the network).

The function (representing a single iteration of the backpropagation algorithm), should return the modified hidden weights and the modified outer weights.

In [None]:
def backprop(weights,
             output1,
             output2,
             ytrue,
             X_input,
             LR):

    wH = weights[0]
    wO = weights[1]

    '''EQUATION A:'''
    error = log_loss_deriv(___ , ___)

    '''EQUATION B:'''
    # don't forget the bias!
    hidden_out_with_bias = add_bias(___)
    # derivative of the sigmoid function with respect to the
    # hidden output * weights
    sig_deriv_1 = sigmoid_deriv( ___ )

    y_grad = sig_deriv_1 * error

    '''EQUATION C:'''
    delta_wo = -np.dot( ___.T, hidden_out_with_bias ) * LR

    #and finally, old weights + delta weights -> new weights!
    wO_new = wO + ___.T

    '''EQUATION D:'''
    sig_deriv_2 = sigmoid_deriv( ___ )
    #exclude the bias (last column) of the outer weights,
    #since it is not backpropagated!
    H_grad = ___  * np.dot(y_grad , ___[:-1].T)

    '''EQUATION E:'''
    delta_wH = -np.___(H_grad.T, ___ ) * ___
    #old weights + delta weights -> new weights!
    wH_new = wH + ___.T

    # new hidden weights, new output weights
    return ___, ___

#### Step 5: Run the Backpropagation Algorithm

Run your backpropagation algorithm in a loop! Inside the loop:
* Run your feed-forward function with the X data and the starting weights (which are initially random!).
* Collect the total sum of the log-loss values into a list, so we can track them over time.
* Run your backprop function to get the modified weights.
* At the end of the loop, make your modified weights the new weights for the next cycle.

In [None]:
# setup
X, y = make_moons(n_samples=200, noise=0.1, random_state=42)
X = add_bias(X)
y = y.reshape(-1, 1)
weights = [
   np.random.normal(size=(3, 2)),
   np.random.normal(size=(3, 1))
]

# train
LOSS_VEC = []

for i in range(1000):
    out1, out2 = feed_forward(X, weights)
    LOSS_VEC.append(___.sum())
    new_weights = backprop(___, ___, ___, ___, ___, ___)
    weights = ___

#### Step 6: Plot results

##### 6a: Plot loss function

In [None]:
plt.plot(LOSS_VEC)

##### 6b: Plot decision boundary

In [None]:
# create a grid of values
x = np.linspace(-3, 3, 200)
X_vis = np.array([(x1, x2) for x1 in x for x2 in x])
# add the bias column
X_vis = add_bias(X_vis)

# calculate the (random) predictions
_, y_pred = feed_forward(X_vis, weights)

In [None]:
# reshape the predictions for visualization
Z = y_pred.reshape((len(x), len(x)), order='F')

In [None]:
# draw a contour plot
fig,ax=plt.subplots(1,1)
cp = ax.contourf(x, x, Z, alpha=0.8, cmap='coolwarm')
ax.contour(x, x, Z, levels=[0.5])
fig.colorbar(cp) # Add a colorbar to a plot

# draw the original data
X, y = make_moons(n_samples=200, noise=0.1, random_state=42)
ax.scatter(X[:,0], X[:,1], c=y, cmap='coolwarm')