In [61]:
import matplotlib.pyplot as plt

import numpy as np
from sklearn.datasets import make_moons
import math
from sklearn.metrics import accuracy_score

X,y = make_moons(n_samples=50, noise=0.2, random_state=42)

def sigmoid(x):
    return 1/(1+np.exp(-x))


In [62]:
def ffnn (X, w0, w1):
    '''
    Feed-Forward Neural Network
    Input needed:
        X: data
        w0: weights of the first layer
        w1: weights of the second layer
    Output:
        ypred0: results from the first layer
        ypred: results from the final layer
    '''
    input_layer = np.hstack((X,np.ones((X.shape[0],1)))) # shape (50,3)
    weight_matrix_layer = w0 # shape (3,2)
    dot_product1 = np.dot(input_layer,weight_matrix_layer) # shape (50,2)
    hidden_layer =  np.hstack((sigmoid(dot_product1),np.ones((X.shape[0],1)))) # shape (50,3)
    weight_matrix_layer_2 = w1 # shape (3,1)
    dot_product2 = np.dot(hidden_layer,weight_matrix_layer_2) # shape (50,1)
    output_layer = sigmoid(dot_product2) # shape (50,1)
    ypred0 = hidden_layer # shape (50,3)
    ypred = output_layer # shape (50,1)

    return ypred0, ypred

In [64]:
def backprop(weights,output1,output2,ytrue,X_input, LR_O, LR_H):
    #separate learning rates for outer and inner weights.
    wH = weights[0]
    w0 = weights[1]
    #STEP A:
    ytrue = ytrue.reshape(-1, 1)
    error = (output2.reshape(-1,1) - ytrue) * loss(ytrue , output2.reshape(-1,1))
    #STEP B: may have to reshape or not
    sig_deriv = output2 * ( 1 - output2)
    #derivative of the sigmoid function with respect to the hidden output * weights
    y_grad = sig_deriv * error
    #STEP C:
    hidden_out_with_bias = np.hstack([output1,np.ones((output1.shape[0] ,1))]) #don't forget the bias!
    delta_wo = np.dot( -y_grad.transpose(), hidden_out_with_bias ) * LR_O
    #and finally, old weights + delta weights -> new weights!
    w0_new = w0 + delta_wo.transpose()
    #STEP D:
    sig_deriv_2 = output1 * ( 1 - output1)
    H_grad = sig_deriv_2 * np.dot(y_grad , w0_new[:2].transpose())
    #exclude the bias (3rd column) of the outer weights, since it is not backpropagated!
    #STEP E:
    delta_wH = np.dot(-H_grad.transpose(), X_input ) * LR_H # with or without bias?
    wH_new = wH + delta_wH.transpose() #old weights + delta weights -> new weights!
    return wH_new, w0_new

In [68]:
# Intial random weights instansiation
w0 = np.random.random([3,2])
w1 = np.random.random([3,1])
w = [w0, w1]

In [69]:
# Predict with the initial random weights
ypred0 = ffnn(X,w0,w1)[0]
ypred = ffnn(X,w0,w1)[1]

In [70]:
# Loop and store all calculated loss results
LOSS_VEC = []
for i in range(500):
    ypred0, ypred = ffnn(X, w0, w1)
    LOSS_VEC.append(np.sum(loss(y, ypred)))
    w1 = backprop(w, ypred0, ypred, X, y, 0.1, 0.01)


plt.plot(LOSS_VEC)
plt.title('Loss values over time')


ValueError: operands could not be broadcast together with shapes (50,1) (100,1) 