In this notebook, we will implement a simple neural network from scratch using only Python and NumPy.
This will help you understand the fundamental concepts behind neural networks, including:
- Forward propagation
- Backward propagation
- Gradient descent

First of all, let us import the essential modules

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_digits

Let us start with a neural network used for binary classification.
For this, we create a synthetic dataset with two classes. We use matplotlib to plot the dataset in 2D

In [None]:
# Generate a simple dataset for binary classification
np.random.seed(0)
X = np.random.randn(400, 2)  # 300 data points with 2 features
y = (X[:, 0]**2 + X[:, 1]**2 < 1).astype(int)  # Circular decision boundary
y = y.reshape(-1, 1)

# Plot the dataset
plt.scatter(X[:, 0], X[:, 1], c=y.flatten(), cmap="viridis")
plt.title("Synthetic Dataset")
plt.show()

In [None]:
# Split the dataset: 80% for training, 20% for testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Print the shape of the resulting datasets
print(f"Training set: X_train shape = {X_train.shape}, y_train shape = {y_train.shape}")
print(f"Testing set: X_test shape = {X_test.shape}, y_test shape = {y_test.shape}")

### Question 1
How many inputs does our neural network require?
How many outputs does our neural network provide?

In [None]:
input_dim = 2    # Number of features
output_dim = 1   # Number of output units (binary classification)

We build a neural network with a given number of hidden layers. For the moment, we will use a single hidden layer with four neurons. Later, we will work on the impact of this number of the training.
### Question 2
Fill in the cell below with the missing information, using a list of integers to provide the architecture of the neural network

In [None]:
# Generalized architecture setup
layer_dims = [input_dim, 4, output_dim]  # Example: Input layer (2), 2 hidden layers (4 and 3 units), and output layer (1)

### Question 3
Before training the model, we need to initialize the weights and biases randomly. Fill in the cell below with the missing information to initialize the weights and biases

In [None]:
# Initialize weights and biases
def initialize_parameters(layer_dims):
    """
    Initialize parameters for a neural network with multiple layers.
    Arguments:
    - layer_dims: List containing the dimensions of each layer.

    Returns:
    - parameters: Dictionary containing initialized weights and biases for all layers.
    """
    np.random.seed(42)
    parameters = {}
    for l in range(1, len(layer_dims)):
        parameters[f"W{l}"] = np.random.randn(layer_dims[l-1], layer_dims[l]) * 0.01
        parameters[f"b{l}"] = np.zeros((1, layer_dims[l]))
    return parameters

parameters = initialize_parameters(layer_dims)


### Question 4
Write a Python function for a sigmoid activation function and its derivative

In [None]:
# Sigmoid activation function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# Derivative of the sigmoid function
def sigmoid_derivative(z):
    s = sigmoid(z)
    return s * (1 - s)

### Question 5
Write a Python function for a ReLU activation function and its derivative

In [None]:
# ReLU activation function
def relu(z):
    return np.maximum(0, z)

# Derivative of the ReLU function
def relu_derivative(z):
    return (z > 0).astype(float)

### Question 6
Now we need to write a function to evaluate the neural network. This is the so-called forward pass. Fill in the cell below with the missing information so that each neuron produces as output W.x+b, with x the output of the previous layer.

In [None]:
def forward_propagation(X, parameters):
    """
    Forward propagate through an arbitrary number of layers.
    Arguments:
    - X: Input data
    - parameters: Dictionary containing weights and biases for all layers.

    Returns:
    - activations: Dictionary of activations for all layers.
    """
    activations = {'A0': X}  # Input layer activation
    L = len(parameters) // 2  # Number of layers (parameters include W1, b1, ..., WL, bL)
    
    for l in range(1, L + 1):
        Z = np.dot(activations[f"A{l-1}"], parameters[f"W{l}"]) + parameters[f"b{l}"]
        if l == L:  # Output layer
            activations[f"A{l}"] = sigmoid(Z)
        else:  # Hidden layers
            activations[f"A{l}"] = relu(Z)
    
    return activations

### Question 7
The most important part of training a model is the backward pass, by which the parameters are updated using the gradient and the learning rate. Fill in the cell below with the missing information to update the parameters following a gradient descent algorithm

In [None]:
def backward_propagation(activations, y, parameters, learning_rate=0.01):
    """
    Perform backward propagation for an arbitrary number of layers.
    Arguments:
    - activations: Dictionary of activations for all layers.
    - y: True labels.
    - parameters: Dictionary containing weights and biases.
    - learning_rate: Learning rate for gradient descent.

    Returns:
    - Updated parameters.
    """
    m = y.shape[0]  # Number of training examples
    gradients = {}
    L = len(parameters) // 2  # Number of layers

    # Output layer gradients
    dZ = activations[f"A{L}"] - y
    gradients[f"dW{L}"] = np.dot(activations[f"A{L-1}"].T, dZ) / m
    gradients[f"db{L}"] = np.sum(dZ, axis=0, keepdims=True) / m

    # Backpropagate through hidden layers
    for l in range(L-1, 0, -1):
        dA = np.dot(dZ, parameters[f"W{l+1}"].T)
        dZ = dA * relu_derivative(activations[f"A{l}"])
        gradients[f"dW{l}"] = np.dot(activations[f"A{l-1}"].T, dZ) / m
        gradients[f"db{l}"] = np.sum(dZ, axis=0, keepdims=True) / m

    # Update parameters
    for l in range(1, L + 1):
        parameters[f"W{l}"] -= learning_rate * gradients[f"dW{l}"]
        parameters[f"b{l}"] -= learning_rate * gradients[f"db{l}"]
    
    return parameters

### Question 8
Once the forward and backward passes are correctly implemented. The idea is to train the model iteratively. Each iteration is called epoch. Fill in the cell below with the missing information. Choose in particular a number of epochs and a value for the learning rate. Select different number of epochs and learning rate and describe what you observe.

In [None]:
# Example: Training the generalized network
epochs = 30000
learning_rate = 0.1
train_losses = []
val_losses = []

for epoch in range(epochs):
    # Forward propagation on training data
    activations = forward_propagation(X_train, parameters)
    A_train_final = activations[f"A{len(layer_dims)-1}"]  # Output layer activation on training data
    
    # Compute training loss
    train_loss = -np.mean(y_train * np.log(A_train_final) + (1 - y_train) * np.log(1 - A_train_final))
    train_losses.append(train_loss)
    
    # Forward propagation on validation data
    val_activations = forward_propagation(X_test, parameters)
    A_val_final = val_activations[f"A{len(layer_dims)-1}"]  # Output layer activation on validation data
    
    # Compute validation loss
    val_loss = -np.mean(y_test * np.log(A_val_final) + (1 - y_test) * np.log(1 - A_val_final))
    val_losses.append(val_loss)
    
    # Backward propagation on training data
    parameters = backward_propagation(activations, y_train, parameters, learning_rate)
    
    # Print training and validation loss every 100 epochs
    if epoch % 100 == 0:
        print(f"Epoch {epoch}, Training Loss: {train_loss}, Validation Loss: {val_loss}")

### Question 9
The best way to assess the performance of the training and the model is by plotting as a function of the epochs the training and the validation losses. Write the Python lines in the cell below to plot these two curves for different values of epochs and learning rate.

In [None]:
# Plot training and validation loss
plt.plot(train_losses, label="Training Loss")
plt.plot(val_losses, label="Validation Loss")
plt.title("Training and Validation Loss Over Epochs")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.show()

### Question 10
Perform several training, increasing the complexity of the model. You can do that by increasing the number of neurons in each hidden layer and/or the number of hidden layers. Plot the training/validation losses as a function of the number of epochs. What do you observe?

In [None]:
layer_dims = [input_dim, 10, 10, output_dim]
parameters = initialize_parameters(layer_dims)
epochs = 30000
learning_rate = 0.1
train_losses = []
val_losses = []

for epoch in range(epochs):
    # Forward propagation on training data
    activations = forward_propagation(X_train, parameters)
    A_train_final = activations[f"A{len(layer_dims)-1}"]  # Output layer activation on training data
    
    # Compute training loss
    train_loss = -np.mean(y_train * np.log(A_train_final) + (1 - y_train) * np.log(1 - A_train_final))
    train_losses.append(train_loss)
    
    # Forward propagation on validation data
    val_activations = forward_propagation(X_test, parameters)
    A_val_final = val_activations[f"A{len(layer_dims)-1}"]  # Output layer activation on validation data
    
    # Compute validation loss
    val_loss = -np.mean(y_test * np.log(A_val_final) + (1 - y_test) * np.log(1 - A_val_final))
    val_losses.append(val_loss)
    
    # Backward propagation on training data
    parameters = backward_propagation(activations, y_train, parameters, learning_rate)
    
    # Print training and validation loss every 100 epochs
    if epoch % 100 == 0:
        print(f"Epoch {epoch}, Training Loss: {train_loss}, Validation Loss: {val_loss}")

In [None]:
# Plot training and validation loss
plt.plot(train_losses, label="Training Loss")
plt.plot(val_losses, label="Validation Loss")
plt.title("Training and Validation Loss Over Epochs")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.show()

### Question 11
Now we will use different trained models and will predict the decision boundary. For this, we will first write a function that plots the decision boundary for a given model. Fill in the cell below with the missing information.

In [None]:
def plot_decision_boundary(forward_propagation, X, y, parameters, layer_dims):
    """
    Plot the decision boundary for a given model.

    Arguments:
    - forward_propagation: Function to perform forward propagation.
    - X: Input data, used to define the feature space for plotting.
    - y: True labels, used for scatter plot coloring.
    - parameters: Dictionary of weights and biases for the neural network.
    - layer_dims: List defining the number of units in each layer.

    Returns:
    - None (displays the decision boundary plot).
    """
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    
    # Generate a grid of points to evaluate the model
    xx, yy = np.meshgrid(
        np.arange(x_min, x_max, 0.01),
        np.arange(y_min, y_max, 0.01)
    )
    grid = np.c_[xx.ravel(), yy.ravel()]  # Flatten and combine the meshgrid into input data shape
    
    # Predict for each point on the grid
    activations = forward_propagation(grid, parameters)
    predictions = activations[f"A{len(layer_dims) - 1}"]  # Final output layer activations
    predictions = (predictions > 0.5).astype(int)  # Convert probabilities to binary labels
    
    # Plot the decision boundary
    plt.contourf(xx, yy, predictions.reshape(xx.shape), alpha=0.7, cmap="viridis")
    plt.scatter(X[:, 0], X[:, 1], c=y.flatten(), edgecolors="k", cmap="viridis")
    plt.title("Decision Boundary")
    plt.show()

# Example call
plot_decision_boundary(forward_propagation, X, y, parameters, layer_dims)

The example we have considered is very simple and one would like to apply this to more complex boundaries. Let us therefore consider the examples of the moons.

In [None]:
# Generate a dataset with two interleaving moon shapes
X, y = make_moons(n_samples=1000, noise=0.15, random_state=0)
y = y.reshape(-1, 1)  # Reshape to match the required format

# Visualize the dataset
plt.scatter(X[:, 0], X[:, 1], c=y.flatten(), cmap="viridis")
plt.title("Dataset: Moons with Noise")
plt.show()

### Question 12
Perform the previous steps for this dataset. Compare the performance of the same model trained and evaluated with the synthetic dataset that we generated at the beginning and with the moon dataset.

In [None]:
# Split the dataset: 80% for training, 20% for testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Print the shape of the resulting datasets
print(f"Training set: X_train shape = {X_train.shape}, y_train shape = {y_train.shape}")
print(f"Testing set: X_test shape = {X_test.shape}, y_test shape = {y_test.shape}")

In [None]:
layer_dims = [input_dim, 20, 10, output_dim]
parameters = initialize_parameters(layer_dims)
epochs = 30000
learning_rate = 0.1
train_losses = []
val_losses = []

for epoch in range(epochs):
    # Forward propagation on training data
    activations = forward_propagation(X_train, parameters)
    A_train_final = activations[f"A{len(layer_dims)-1}"]  # Output layer activation on training data
    
    # Compute training loss
    train_loss = -np.mean(y_train * np.log(A_train_final) + (1 - y_train) * np.log(1 - A_train_final))
    train_losses.append(train_loss)
    
    # Forward propagation on validation data
    val_activations = forward_propagation(X_test, parameters)
    A_val_final = val_activations[f"A{len(layer_dims)-1}"]  # Output layer activation on validation data
    
    # Compute validation loss
    val_loss = -np.mean(y_test * np.log(A_val_final) + (1 - y_test) * np.log(1 - A_val_final))
    val_losses.append(val_loss)
    
    # Backward propagation on training data
    parameters = backward_propagation(activations, y_train, parameters, learning_rate)
    
    # Print training and validation loss every 100 epochs
    if epoch % 100 == 0:
        print(f"Epoch {epoch}, Training Loss: {train_loss}, Validation Loss: {val_loss}")

In [None]:
# Plot training and validation loss
plt.plot(train_losses, label="Training Loss")
plt.plot(val_losses, label="Validation Loss")
plt.title("Training and Validation Loss Over Epochs")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.show()

In [None]:
plot_decision_boundary(forward_propagation, X, y, parameters, layer_dims)