In [2]:
import numpy as np
import pandas as pd

# Lab 4 - Multi-layer Perceptron Forward Pass

## Part I
For this exercise you will implement a simple 2-layer perceptron (forward pass)

For the first part you'll write a function that computes the forward pass of a 2-layer perecptron that predicts the prices of houses, using the usual Boston housing dataset.

In [3]:
boston = pd.read_csv('data/BostonHousing.txt')
boston.head(5)

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,b,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


As usual, consider the MEDV as your target variable. 
* Split the data into training, validation and testing (70,15,15)% (you will need this for the next lab as we will build from this lab)

In [4]:
# your code goes here
from sklearn.model_selection import train_test_split

X = boston.drop('medv', axis=1)
y = boston['medv']

X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=13)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=13)

Now you will write the function that computes the forward pass. 
* I provide here a structure that you can follow for your function, but again, feel free to modify it as you see fit.
* Use the sigmoid function as the activation of the hidden layer.
* Don't forget about the biases!
* *It is up to you to think what should be the activation for the output layer.*

In [5]:
def sigmoid_activation(z):
    """
    Sigmoid activation function.

    Parameters
    ----------
    z : array-like
        The input to the activation function.

    Returns
    -------
    array-like
        Output of the sigmoid activation function.
    """
    # your code goes here
    
    return 1 / (1 + np.exp(-z))
    

In [6]:
def two_layer_perceptron(X, activation, dim_input, dim_hidden, dim_output):
    """
    Implements the forward pass of a two-layer fully connected perceptron.
    
    Parameters
    ----------
    X : a 2-dimensional array
        the input data
    activation : function
        the activation function to be used for the hidden layer
    dim_input : int
        the dimensionality of the input layer
    dim_hidden : int
        the dimensionality of the hidden layer
    dim_output : int
        the dimensionality of the output layer
    Returns
    -------
    y_pred : float
        the output of the computation of the forward pass of the network
    """
    # your code goes here
    np.random.seed(13)
    W1 = np.random.randn(dim_input + 1, dim_hidden) * 0.01  # +1 for the bias
    W2 = np.random.randn(dim_hidden + 1, dim_output) * 0.01  # +1 for the bias
    
    X_bias = np.insert(X, obj=0, values = 1, axis =1)

    # Forward pass, 1º layer
    A1 = np.dot(X_bias, W1)
    Z1 = activation(A1)
    
    Z1_bias = np.insert(Z1, obj=0, values = 1, axis =1)
    
    # Forward pass, output layer
    A2 = np.dot(Z1_bias, W2) 
    
    # Activation function = identity
    Z2 = A2

    return Z2
    

Calculate the RMSE of the forward pass. 

In [7]:
# your code goes here
def get_RMSE_vec(y_real, y_pred):
    """
    Calculate the Root Mean Squared Error (RMSE) between true and predicted values, without loops.

    Parameters
    ----------
    y_real: numpy.ndarray
        True values.
    y_pred: numpy.ndarray
        Predicted values.

    Returns
    ----------
    root_mean_squared_error: float
        Root Mean Squared Error (RMSE) score.
    """
    squared_errors = (np.array(y_pred) - np.array(y_real)) ** 2 #use numpy arrays to calculate the squared error for every instance, the result is an array
    
    mean_squared_error = np.mean(squared_errors) #calculate the mean of all values in the squared_errors array, representing the MSE
    
    root_mean_squared_error = np.sqrt(mean_squared_error) #root of MSE
    
    return root_mean_squared_error

dim_input = X_train.shape[1]

dim_hidden = dim_input + 2

dim_output = 1 

y_train_pred = two_layer_perceptron(X_train, sigmoid_activation, dim_input, dim_hidden, dim_output)

rmse_train = get_RMSE_vec(y_train, y_train_pred)

print("RMSE on training set:", rmse_train)

RMSE on training set: 24.60052249643941


## Part II 

For this exercise you will write a function that calculates the forward pass of a 2-layer perceptron that predicts the exact digit from a hand-written image, using the MNIST dataset. 

In [8]:
from sklearn.datasets import load_digits

In [9]:
digits = load_digits()

In [10]:
X = digits.data
y = digits.target

In [11]:
X.shape

(1797, 64)

Again, you will split the data into training, validation and testing.

In [12]:
# your code goes here:
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=13)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=13)

Write a function that calculates the forward pass for this multi-class classification problem.
* You will use the sigmoid activation function for the hidden layer.
* For the output layer you will have to write the softmax activation function (you can check the slides)
* __Note:__ you can easily re-use the function that you coded for Part I if you do a simple modification and also include an input argument for the activation of the output layer.

In [13]:
def softmax_activation(z):
    """
    Softmax activation function.

    Parameters
    ----------
    z : array-like
        The input to the activation function.

    Returns
    -------
    array-like
        Output of the softmax activation function.
    """
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True)) #scale the z's
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)

In [14]:
# your code goes here: 
def two_layer_perceptron(X, hidden_activation, output_activation, dim_input, dim_hidden, dim_output):
    """
    Implements the forward pass of a two-layer fully connected perceptron.

    Parameters
    ----------
    X : array-like
        The input data.
    hidden_activation : function
        The activation function to be used for the hidden layer.
    output_activation : function
        The activation function to be used for the output layer.
    dim_input : int
        The dimensionality of the input layer.
    dim_hidden : int
        The dimensionality of the hidden layer.
    dim_output : int
        The dimensionality of the output layer.

    Returns
    -------
    y_pred : array-like
        The output of the computation of the forward pass of the network.
    """
    np.random.seed(13)
    W1 = np.random.randn(dim_input + 1, dim_hidden) * 0.01
    W2 = np.random.randn(dim_hidden + 1, dim_output) * 0.01
    
    X_bias = np.insert(X, obj=0, values = 1, axis =1)

    # Forward pass, 1º layer
    A1 = np.dot(X_bias, W1)
    Z1 = hidden_activation(A1)
    
    Z1_bias = np.insert(Z1, obj=0, values = 1, axis =1)
    
    # Forward pass, output layer
    A2 = np.dot(Z1_bias, W2)
    Z2 = output_activation(A2)

    return Z2

Lastly, calculate the error of this forward pass using the cross-entropy loss.

In [26]:
# your code goes here:

def cross_entropy(y_true, y_pred_probs):
    """
    Calculate the Cross-Entropy loss between true and predicted values.

    Parameters
    ----------
    y_true: numpy.ndarray
        True labels in a vector or one-hot encoded format.
    y_pred: numpy.ndarray
        Predicted probabilities for each class.

    Returns
    -------
    cross_entropy_loss: float
        Cross-Entropy loss score.
    """
    if len(y_true.shape) == 1:
        num_classes = len(np.unique(y_train))
        y_true = np.eye(num_classes)[y_true]

    # Clip y_pred to avoid log(0) issues
    epsilon = 1e-15
    y_pred_clipped = np.clip(y_pred_probs, epsilon, 1 - epsilon)

    # Calculate cross-entropy
    cross_entropy_loss = -np.sum(y_true * np.log(y_pred_clipped)) / len(y_true)

    return cross_entropy_loss

dim_input = X_train.shape[1]

dim_hidden = dim_input + 2

dim_output = len(np.unique(y))

y_train_pred = two_layer_perceptron(X_train, sigmoid_activation, softmax_activation, dim_input, dim_hidden, dim_output)

entropy = cross_entropy(y_train, y_train_pred)

print("Cross_validtion on training set:", entropy)

Cross_validtion on training set: 2.3009819431549627
