In [153]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import warnings

# Lab 8 - Multi-layer Perceptron Forward Pass

## Part I
For this exercise you will implement a simple 2-layer perceptron (forward pass)

For the first part you'll write a function that computes the forward pass of a 2-layer perecptron that predicts the prices of houses, using the usual Boston housing dataset.

In [154]:
boston = pd.read_csv('BostonHousing.txt')
boston.head()

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,b,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


As usual, consider the MEDV as your target variable. 
* Split the data into training, validation and testing (70,15,15)% (you will need this for the next lab as we will build from this lab)

In [155]:
# your code goes here
x, t = boston.values[:, :-1], boston.values[:, -1]

x_train, x_temp, t_train, t_temp = train_test_split(x, t, train_size = 0.7)
x_valid, x_test, t_valid, t_test = train_test_split(x_temp, t_temp, train_size = 0.5)

Now you will write the function that computes the forward pass. 
* I provide here a structure that you can follow for your function, but again, feel free to modify it as you see fit.
* Use the sigmoid function as the activation of the hidden layer.
* Don't forget about the biases!
* *It is up to you to think what should be the activation for the output layer.*

In [156]:
def sigmoid_activation(z):
    # your code goes here
    return 1 / (1 + np.exp(-z))

In [157]:
def two_layer_perceptron(X: np.ndarray, activation_func, dim_input, dim_hidden, dim_output, output_func = None, apply_bias = True):
    """
    Implements the forward pass of a two-layer fully connected perceptron.
    
    Parameters
    ----------
    X : a 2-dimensional array
        the input data
    activation_func : function
        the activation function to be used for the hidden layer
    dim_input : int
        the dimensionality of the input layer
    dim_hidden : int
        the dimensionality of the hidden layer
    dim_output : int
        the dimensionality of the output layer
    output_func : function
        the activation function to be used for the output. if None, uses an "Identity" transformation 
    apply_bias : bool
        if True, apply a 'ones' column at the beggining of the data array, and updates the input dimension
    Returns
    -------
    y_pred : float
        the output of the computation of the forward pass of the network
    """
    # your code goes here
    if apply_bias:
        X = np.insert(X, 0, 1, axis=1)
        dim_input += 1
    
    # I'll generate random weights since we do not want any solid results here, only the application
    W1 = np.random.random((dim_input, dim_hidden))
    W2 = np.random.random((dim_hidden+1, dim_output))

    # Using forward propagation
    hidden1 = activation_func(X @ W1)
    hidden1 = np.insert(hidden1, 0, 1, axis = 1)

    # Applye the output activation function, if it exists
    if output_func == None:
        return hidden1 @ W2
    else:
        return output_func(hidden1 @ W2)

Calculate the RMSE of the forward pass. 

In [158]:
def root_mean_square(predict, target):
    result_array = (predict - target)**2
    RMS_error = np.sqrt(np.mean(result_array))
    return RMS_error

In [168]:
# your code goes here
warnings.filterwarnings('ignore')

y_pred = two_layer_perceptron(x_train, sigmoid_activation, x_train.shape[1], 5, 1)
print("RMSE =", root_mean_square(y_pred, t_train.reshape(-1,1)))

RMSE = 20.768116111153684


## Part II 

For this exercise you will write a function that calculates the forward pass of a 2-layer perceptron that predicts the exact digit from a hand-written image, using the MNIST dataset. 

In [169]:
from sklearn.datasets import load_digits

In [170]:
digits = load_digits()

In [171]:
X = digits.data
y = digits.target

Again, you will split the data into training, validation and testing.

In [174]:
# your code goes here:
x_train, x_temp, t_train, t_temp = train_test_split(X, y, train_size = 0.7)
x_valid, x_test, t_valid, t_test = train_test_split(x_temp, t_temp, train_size = 0.5)
t_train[:5]

array([3, 0, 4, 7, 2])

In [175]:
# I'll need to prepare the target to be compared with a probability vector
t_prob = np.zeros((t_train.shape[0], 10))

for i in range(t_train.shape[0]):
    t_prob[i, t_train[i]] = 1

print(t_prob[:5,:])

[[0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]]


Write a function that calculates the forward pass for this multi-class classification problem.
* You will use the sigmoid activation function for the hidden layer.
* For the output layer you will have to write the softmax activation function (you can check the slides)
* __Note:__ you can easily re-use the function that you coded for Part I if you do a simple modification and also include an input argument for the activation of the output layer.

In [176]:
def softmax_activation(z: np.ndarray):
    # your code goes here:
        return np.exp(z)/(np.sum(np.exp(z), axis=1)).reshape(-1,1)

def CE_multi(predict, target):
    return -np.sum(target * np.log(predict))

In [190]:
# your code goes here: 
# I can reuse the anterior function, especifying the output dimension as the number of classes
y_pred = two_layer_perceptron(x_train, sigmoid_activation, x_train.shape[1], 1, 10, softmax_activation)
y_pred[:5,:]

array([[0.14359725, 0.13299504, 0.11305141, 0.10786231, 0.04723124,
        0.12870463, 0.09101544, 0.09799319, 0.0525075 , 0.085042  ],
       [0.14359725, 0.13299504, 0.11305141, 0.10786231, 0.04723124,
        0.12870463, 0.09101544, 0.09799319, 0.0525075 , 0.085042  ],
       [0.14359725, 0.13299504, 0.11305141, 0.10786231, 0.04723124,
        0.12870463, 0.09101544, 0.09799319, 0.0525075 , 0.085042  ],
       [0.14359725, 0.13299504, 0.11305141, 0.10786231, 0.04723124,
        0.12870463, 0.09101544, 0.09799319, 0.0525075 , 0.085042  ],
       [0.14359725, 0.13299504, 0.11305141, 0.10786231, 0.04723124,
        0.12870463, 0.09101544, 0.09799319, 0.0525075 , 0.085042  ]])

Lastly, calculate the error of this forward pass using the cross-entropy loss.

In [191]:
# your code goes here:
error = CE_multi(y_pred, t_prob)
print("Multiclass Cross Entropy error =", error)

Multiclass Cross Entropy error = 2965.02692235057
