In [48]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

# Lab 4 - Multi-layer Perceptron Forward Pass

## Part I
For this exercise you will implement a simple 2-layer perceptron (forward pass)

For the first part you'll write a function that computes the forward pass of a 2-layer perecptron that predicts the prices of houses, using the usual Boston housing dataset.

In [49]:
boston = pd.read_csv('BostonHousing.csv')
boston

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,b,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.0900,1,296,15.3,396.90,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.90,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.90,5.33,36.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0,0.573,6.593,69.1,2.4786,1,273,21.0,391.99,9.67,22.4
502,0.04527,0.0,11.93,0,0.573,6.120,76.7,2.2875,1,273,21.0,396.90,9.08,20.6
503,0.06076,0.0,11.93,0,0.573,6.976,91.0,2.1675,1,273,21.0,396.90,5.64,23.9
504,0.10959,0.0,11.93,0,0.573,6.794,89.3,2.3889,1,273,21.0,393.45,6.48,22.0


As usual, consider the MEDV as your target variable. 
* Split the data into training, validation and testing (70,15,15)% (you will need this for the next lab as we will build from this lab)

In [50]:
# Separate features (X) and target variable (y)
X = boston.drop('medv', axis=1).values
y = boston['medv'].values

# Split the data into training, validation, and testing sets (70%, 15%, 15%)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)


Now you will write the function that computes the forward pass. 
* I provide here a structure that you can follow for your function, but again, feel free to modify it as you see fit.
* Use the sigmoid function as the activation of the hidden layer.
* Don't forget about the biases!
* *It is up to you to think what should be the activation for the output layer.*

In [51]:
def sigmoid_activation(z):
    return 1 / (1 + np.exp(-z))

In [52]:
def two_layer_perceptron(X, activation, dim_input, dim_hidden, dim_output):
    """
    Implements the forward pass of a two-layer fully connected perceptron.
    
    Parameters
    ----------
    X : array-like
        The input data
    activation : function
        The activation function to be used for the hidden layer
    dim_input : int
        The dimensionality of the input layer
    dim_hidden : int
        The dimensionality of the hidden layer
    dim_output : int
        The dimensionality of the output layer
    
    Returns
    -------
    y_pred : array-like
        The output of the computation of the forward pass of the network
    """
    # Initialize weights and biases
    np.random.seed(42)  # For reproducibility
    W1 = np.random.randn(dim_input, dim_hidden)
    b1 = np.random.randn(dim_hidden)
    W2 = np.random.randn(dim_hidden, dim_output)
    b2 = np.random.randn(dim_output)

    # Compute the activation of the hidden layer
    z1 = np.dot(X, W1) + b1
    a1 = activation(z1)

    # Compute the activation of the output layer
    z2 = np.dot(a1, W2) + b2
    # Assuming linear activation for the output layer (for regression)
    # If you want to use a different activation function, you can modify it here
    a2 = z2

    # Return the predicted output
    return a2



Calculate the RMSE of the forward pass. 

In [53]:
def calculate_rmse(y_pred, y_true):
    """
    Calculate the Root Mean Squared Error (RMSE) between predicted and true values.

    Parameters
    ----------
    y_pred : array-like
        Predicted values
    y_true : array-like
        True values

    Returns
    -------
    rmse : float
        The RMSE value
    """
    rmse = np.sqrt(np.mean((y_pred - y_true)**2))
    return rmse

# Assuming you have the two_layer_perceptron function implemented and data prepared
# Call the two_layer_perceptron function to get the predicted output
y_pred = two_layer_perceptron(X_val, sigmoid_activation, dim_input=X_train.shape[1], dim_hidden=10, dim_output=1)

# Calculate the RMSE using the predicted output and true target values
rmse = calculate_rmse(y_pred, y_val)
print("RMSE:", rmse)

RMSE: 22.088469252143494


  return 1 / (1 + np.exp(-z))


## Part II 

For this exercise you will write a function that calculates the forward pass of a 2-layer perceptron that predicts the exact digit from a hand-written image, using the MNIST dataset. 

In [54]:
from sklearn.datasets import load_digits

In [55]:
digits = load_digits()

In [56]:
X = digits.data
y = digits.target

In [57]:
X.shape

(1797, 64)

Again, you will split the data into training, validation and testing.

In [58]:
# Load the dataset
digits = load_digits()
X = digits.data
y = digits.target

# Split the data into training, validation, and testing sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

print("Training set shape:", X_train.shape, y_train.shape)
print("Validation set shape:", X_val.shape, y_val.shape)
print("Testing set shape:", X_test.shape, y_test.shape)

Training set shape: (1437, 64) (1437,)
Validation set shape: (180, 64) (180,)
Testing set shape: (180, 64) (180,)


Write a function that calculates the forward pass for this multi-class classification problem.
* You will use the sigmoid activation function for the hidden layer.
* For the output layer you will have to write the softmax activation function (you can check the slides)
* __Note:__ you can easily re-use the function that you coded for Part I if you do a simple modification and also include an input argument for the activation of the output layer.

In [59]:
def softmax(x):
    """Softmax activation function"""
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True)) # Avoiding numerical instability
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

def forward_pass(X, W1, b1, W2, b2, output_activation='softmax'):
    """
    Perform the forward pass of a 2-layer perceptron
    
    Parameters:
        X (numpy.ndarray): Input data of shape (num_samples, num_features)
        W1 (numpy.ndarray): Weights of the first layer of shape (num_features, hidden_units)
        b1 (numpy.ndarray): Bias of the first layer of shape (hidden_units,)
        W2 (numpy.ndarray): Weights of the second layer of shape (hidden_units, num_classes)
        b2 (numpy.ndarray): Bias of the second layer of shape (num_classes,)
        output_activation (str): Activation function for the output layer. Options: 'sigmoid', 'softmax'
    
    Returns:
        numpy.ndarray: Output predictions of shape (num_samples, num_classes)
    """
    # Forward pass through the first layer
    z1 = np.dot(X, W1) + b1
    a1 = sigmoid(z1)
    
    # Forward pass through the second layer
    z2 = np.dot(a1, W2) + b2
    
    # Apply activation function for the output layer
    if output_activation == 'sigmoid':
        output = sigmoid(z2)
    elif output_activation == 'softmax':
        output = softmax(z2)
    else:
        raise ValueError("Invalid output_activation. Choose 'sigmoid' or 'softmax'.")
    
    return output


Lastly, calculate the error of this forward pass using the cross-entropy loss.

In [60]:
def cross_entropy_loss(y_pred, y_true):
    """
    Calculate the cross-entropy loss
    
    Parameters:
        y_pred (numpy.ndarray): Predicted probabilities of shape (num_samples, num_classes)
        y_true (numpy.ndarray): True labels of shape (num_samples,)
    
    Returns:
        float: Cross-entropy loss
    """
    # Clip probabilities to avoid log(0) = -inf
    y_pred = np.clip(y_pred, 1e-15, 1 - 1e-15)
    
    # Calculate cross-entropy loss
    loss = -np.mean(np.sum(y_true * np.log(y_pred), axis=1))
    
    return loss


# Assuming you have already performed the forward pass and obtained predictions
# y_pred = forward_pass(X_val, W1, b1, W2, b2, output_activation='softmax')

# Calculate cross-entropy loss on the validation set
loss = cross_entropy_loss(y_pred, y_val)
print("Cross-entropy loss on validation set:", loss)


Cross-entropy loss on validation set: 2309.553442617581
