In [1]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler

In [2]:
# loading in the data, then one-hot encoding the target variable (because it's categorical) and scaling the input variables (because they're continuous and on different scales, so we want to normalise them), finally splitting the data into training and testing sets

iris = datasets.load_iris()
X = iris.data
y = iris.target

encoder = OneHotEncoder(sparse_output=False)
y_onehot = encoder.fit_transform(y.reshape(-1, 1))

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X, y_onehot, test_size=0.2, random_state=42
)

In [9]:
# we will need an activation function, I have decided to go with ReLU - the purpose of this function, in simple terms, is to basically round off all negative values to zero and keep the positive values as they are
def relu(x):
    return np.maximum(0, x)

# we will also need a softmax function for the output layer to get the probabilities of each class (3 in this case because of the iris dataset) - we use keepdims=True to keep the dimensions of the output the same as the input so that we can use it in the backpropagation step (which I will define later)
def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

# now we will define the forward propagation function which will take the input and return the output of the network, you can see that we are using the weights and biases that we initialised earlier and then each step is shown as per the architecture of the network and the defined activation functions for each layer
def forward_propagation(X):
    Z1 = np.dot(X, W1) + b1
    A1 = relu(Z1)
    Z2 = np.dot(A1, W2) + b2
    A2 = softmax(Z2)
    return A1, A2

# now we will define the loss function, which is the cross-entropy loss function in this case, which is used for classification problems
def compute_loss(y_true, y_pred):
    m = y_true.shape[0]
    loss = -np.sum(y_true * np.log(y_pred + 1e-8)) / m
    return loss

In [23]:
# we'll now define the architecture of MLP
input_size = X_train.shape[1]
hidden_size = 10
output_size = y_train.shape[1]

# also initialising weights and biases
# we use np.random.randn() to initialise the weights and np.zeros() to initialise the biases because it's a good practice to initialise the weights randomly and the biases to zero
W1 = np.random.randn(input_size, hidden_size)
b1 = np.zeros((1, hidden_size))
W2 = np.random.randn(hidden_size, output_size)
b2 = np.zeros((1, output_size))

In [24]:
# with the functions defined above, we can do an example forward pass to see the output of the network
A1, A2 = forward_propagation(X_train)
print("Output of forward pass:", A2)

# we can also calculate the loss of the network using the loss function defined above, but this will be a random value because the weights and biases are initialised randomly, it is setup to show that the loss function is working correctly and will be used later to optimise the network
loss = compute_loss(y_train, A2)
print("Cross-entropy loss:", loss)

Output of forward pass: [[1.00000000e+00 1.62049238e-10 1.08503351e-11]
 [9.70467781e-01 2.95322187e-02 1.01675898e-10]
 [8.40736681e-03 9.91174545e-01 4.18088133e-04]
 [9.99999996e-01 3.66163943e-09 6.78230390e-10]
 [1.00000000e+00 1.02661180e-10 7.63385131e-11]
 [9.18152382e-01 3.67866021e-02 4.50610158e-02]
 [5.86613847e-03 9.93484283e-01 6.49578504e-04]
 [9.99999984e-01 1.53033705e-08 7.21251001e-10]
 [9.99999994e-01 5.47681847e-09 2.34268368e-10]
 [9.99999163e-01 8.36703773e-07 4.25209602e-11]
 [7.12766285e-01 1.01394078e-01 1.85839636e-01]
 [3.55883745e-03 9.95613295e-01 8.27867556e-04]
 [5.51647493e-03 9.94180368e-01 3.03156982e-04]
 [9.99997975e-01 2.02452407e-06 1.75932654e-10]
 [9.99999895e-01 1.04781170e-07 6.63420967e-10]
 [9.85608755e-01 8.34115919e-04 1.35571290e-02]
 [2.93620260e-01 5.82515661e-01 1.23864079e-01]
 [6.19509787e-03 9.92915852e-01 8.89049720e-04]
 [4.26482318e-02 9.53854756e-01 3.49701198e-03]
 [1.25823886e-07 9.99999874e-01 9.45632506e-11]
 [4.73398487e-01