In [1]:
import numpy as np
import pandas as pd

# Lab 9 - Multi-layer Perceptron Forward Pass & Backpropagation

## Part I
For this exercise you will implement a simple 2-layer perceptron with the forward pass and the backpropagation to learn the weights

For the first part you'll build and train a 2-layer neural network that predicts the prices of houses, using the usual Boston housing dataset.

In [2]:
boston = pd.read_csv('data/BostonHousing.txt')
boston.head(5)

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,b,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


As usual, consider the MEDV as your target variable. 
* Split the data into training, validation and testing (70,15,15)%
* Experiment with different number of neurons per layer for your network, using the validation set

In [3]:
# your code goes here
from sklearn.model_selection import train_test_split

X = boston.drop('medv', axis=1)
y = boston['medv']

X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.30, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.50, random_state=42)

X_train.head(5)

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,b,lstat
427,37.6619,0.0,18.1,0,0.679,6.202,78.7,1.8629,24,666,20.2,18.82,14.52
490,0.20746,0.0,27.74,0,0.609,5.093,98.0,1.8226,4,711,20.1,318.43,29.68
429,9.33889,0.0,18.1,0,0.679,6.38,95.6,1.9682,24,666,20.2,60.72,24.08
327,0.24103,0.0,7.38,0,0.493,6.083,43.7,5.4159,5,287,19.6,396.9,12.79
97,0.12083,0.0,2.89,0,0.445,8.069,76.0,3.4952,2,276,18.0,396.9,4.21


In [16]:
# your code goes here
class Layer:
    def __init__(self):
        self.input = None
        self.output = None

    # computes the output Y of a layer for a given input X
    def forward_propagation(self, input):
        raise NotImplementedError

    # computes dE/dX for a given dE/dY (and update parameters if any)
    def backward_propagation(self, output_error, learning_rate):
        raise NotImplementedError

# inherit from base class Layer
class FCLayer(Layer):
    def __init__(self, input_size, output_size, activation, activation_prime):
        self.weights = np.random.rand(input_size, output_size) * 0.01
        self.bias = np.random.rand(1, output_size) * 0.01
        self.activation = activation
        self.activation_prime = activation_prime

    # returns output for a given input
    def forward_propagation(self, input_data):
        self.input = input_data.reshape(1, -1)
        self.output = self.activation(np.dot(self.input, self.weights) + self.bias)
        return self.output

    # computes dE/dW, dE/dB for a given output_error=dE/dY. Returns input_error=dE/dX.
    def backward_propagation(self, grad_output, learning_rate):
        grad_input = np.dot(grad_output, self.weights.T)
        grad_weights = np.dot(self.input.T, grad_output)
        grad_weights = self.activation_prime(grad_weights) * grad_output

        # update parameters
        # print("Erro da camada: ", grad_weights, "\n")
        self.weights -= learning_rate * grad_weights
        self.bias -= learning_rate * grad_output
        return grad_input

In [17]:
# your code goes here
class Network:
    def __init__(self):
        self.layers = []
        self.loss = None
        self.loss_prime = None

    # add layer to network
    def add(self, layer):
        self.layers.append(layer)

    # set loss to use
    def use(self, loss, loss_prime):
        self.loss = loss
        self.loss_prime = loss_prime

    # predict output for given input
    def predict(self, input_data):
        # sample dimension first
        result = []
        
        if isinstance(input_data, pd.DataFrame): input_data = input_data.values

        # run network over all samples
        for output in input_data:
            # forward propagation
            for layer in self.layers:
                output = layer.forward_propagation(output)
            result.append(output)

        return result

    # train the network
    def fit(self, X_train, y_train, epochs, learning_rate):
        # sample dimension first
        samples = len(X_train)
        
        if isinstance(X_train, pd.DataFrame): X_train = X_train.values

        # training loop
        for i in range(epochs):
            err = 0
            for output, y_real in zip(X_train, y_train):
                
                if not isinstance(y_real, np.ndarray): y_real = np.array(y_real)
                
                # forward propagation
                for layer in self.layers:
                    output = layer.forward_propagation(output)

                # compute loss (for display purpose only)
                err += self.loss(y_real, output)

                # backward propagation
                error = self.loss_prime(y_real, output)
                for layer in reversed(self.layers):
                    error = layer.backward_propagation(error, learning_rate)

            # calculate average error on all samples
            err /= samples
            print("epoch %d/%d   error=%f" % (i+1, epochs, err))
            
            

In [6]:
def mse(y_true, y_pred):
    return np.mean(np.power(y_true-y_pred, 2))

def mse_prime(y_true, y_pred):
    return 2*(y_pred-y_true)/y_true.size

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_prime(x):
    return sigmoid(x) * (1 - sigmoid(x))

def identity(x):
    return x

def identity_prime(x):
    return 1

In [15]:
net = Network()
net.add(FCLayer(len(X_train.iloc[0]), 20, sigmoid, sigmoid_prime))
net.add(FCLayer(20, 1, identity, identity_prime))

# train
net.use(mse, mse_prime)
net.fit(X_train, y_train, epochs=10, learning_rate=0.12)

# test
out = net.predict(X_train)
print(out)

epoch 1/10   error=60176011353.643959
epoch 2/10   error=1006.197168
epoch 3/10   error=1001.140626
epoch 4/10   error=1001.140628
epoch 5/10   error=1001.140628
epoch 6/10   error=1001.140628
epoch 7/10   error=1001.140628
epoch 8/10   error=1001.140628
epoch 9/10   error=1001.140628
epoch 10/10   error=1001.140628
[array([[42.22738236]]), array([[42.22738236]]), array([[42.22738236]]), array([[42.22738236]]), array([[42.22738236]]), array([[42.22738236]]), array([[42.22738236]]), array([[42.22738236]]), array([[42.22738236]]), array([[42.22738236]]), array([[42.22738236]]), array([[42.22738236]]), array([[42.22738236]]), array([[42.22738236]]), array([[42.22738236]]), array([[42.22738236]]), array([[42.22738236]]), array([[42.22738236]]), array([[42.22738236]]), array([[42.22738236]]), array([[42.22738236]]), array([[42.22738236]]), array([[42.22738236]]), array([[42.22738236]]), array([[42.22738236]]), array([[42.22738236]]), array([[42.22738236]]), array([[42.22738236]]), array([[4

  return 1 / (1 + np.exp(-x))


## Part II 

For this exercise you will build and train a 2-layer neural network that predicts the exact digit from a hand-written image, using the MNIST dataset. 
For this exercise, add weight decay to your network.

In [8]:
from sklearn.datasets import load_digits

In [9]:
digits = load_digits()

In [10]:
X = digits.data
y = digits.target

In [11]:
X.shape

(1797, 64)

Again, you will split the data into training, validation and testing.

In [12]:
# your code goes here:


In [13]:
# your code goes here:
