In [2]:
import numpy as np
np.set_printoptions(precision=4, suppress=True)

# Sigmoid and its derivative
def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))

def d_sigmoid(a):
    return a * (1.0 - a)

# Initialize weights, biases, inputs, outputs
def initialize():
    X = np.array([[0.05, 0.10]])           # (1,2)
    Y = np.array([[0.01, 0.99]])           # (1,2)
    W1 = np.array([[0.15, 0.20],
                   [0.25, 0.30]])          # (2,2)
    b1 = np.array([[0.35, 0.35]])          # (1,2)
    W2 = np.array([[0.40, 0.45],
                   [0.50, 0.55]])          # (2,2)
    b2 = np.array([[0.60, 0.60]])          # (1,2)
    return X, Y, W1, b1, W2, b2

def forward(X, Y, W1, b1, W2, b2):
    Z1 = X @ W1 + b1
    A1 = sigmoid(Z1)
    Z2 = A1 @ W2 + b2
    A2 = sigmoid(Z2)
    E = 0.5 * np.sum((A2 - Y) ** 2)   # Mean squared error * 0.5
    return A1, A2, E

def backward(X, Y, W1, b1, W2, b2, A1, A2, lr=0.5):
    # Output layer
    dE_dA2 = (A2 - Y)
    dA2_dZ2 = d_sigmoid(A2)
    dE_dZ2 = dE_dA2 * dA2_dZ2

    dE_dW2 = A1.T @ dE_dZ2
    dE_db2 = dE_dZ2

    # Hidden layer
    dE_dA1 = dE_dZ2 @ W2.T
    dA1_dZ1 = d_sigmoid(A1)
    dE_dZ1 = dE_dA1 * dA1_dZ1

    dE_dW1 = X.T @ dE_dZ1
    dE_db1 = dE_dZ1

    # Update weights & biases
    W2 -= lr * dE_dW2
    b2 -= lr * dE_db2
    W1 -= lr * dE_dW1
    b1 -= lr * dE_db1

    return W1, b1, W2, b2

def train(epochs=1000, lr=0.5, print_every=100):
    X, Y, W1, b1, W2, b2 = initialize()
    for i in range(1, epochs + 1):
        A1, A2, E = forward(X, Y, W1, b1, W2, b2)
        W1, b1, W2, b2 = backward(X, Y, W1, b1, W2, b2, A1, A2, lr)
        if i % print_every == 0 or i == 1:
            print(f"Epoch {i:>4} | Loss: {E:.6f} | Output: {A2}")

    print("\nFinal:")
    print("W1=\n", W1)
    print("b1=\n", b1)
    print("W2=\n", W2)
    print("b2=\n", b2)
    print("Final Output=", A2, " Desired=", Y, " Loss=", E)

# Run training
train(epochs=1000, lr=0.5, print_every=100)


Epoch    1 | Loss: 0.303658 | Output: [[0.7569 0.7677]]
Epoch  100 | Loss: 0.006229 | Output: [[0.0958 0.9186]]
Epoch  200 | Loss: 0.002514 | Output: [[0.0626 0.9425]]
Epoch  300 | Loss: 0.001470 | Output: [[0.0497 0.9531]]
Epoch  400 | Loss: 0.000998 | Output: [[0.0425 0.9594]]
Epoch  500 | Loss: 0.000736 | Output: [[0.0378 0.9636]]
Epoch  600 | Loss: 0.000571 | Output: [[0.0344 0.9667]]
Epoch  700 | Loss: 0.000459 | Output: [[0.0319 0.969 ]]
Epoch  800 | Loss: 0.000379 | Output: [[0.0298 0.9709]]
Epoch  900 | Loss: 0.000319 | Output: [[0.0282 0.9725]]
Epoch 1000 | Loss: 0.000273 | Output: [[0.0268 0.9738]]

Final:
W1=
 [[0.1725 0.2209]
 [0.295  0.3419]]
b1=
 [[0.8    0.7686]]
W2=
 [[-1.2021  1.2326]
 [-1.0943  1.3276]]
b2=
 [[-1.995   1.8341]]
Final Output= [[0.0268 0.9738]]  Desired= [[0.01 0.99]]  Loss= 0.00027304492283882663
