In [1]:
import numpy as np

# training data
X = np.array([[1], [2], [3], [4]])
y = np.array([[2], [4], [6], [8]])



In [2]:
np.random.seed(0)

# sizes
input_size = 1
hidden_size = 2
output_size = 1
lr = 0.01

# weights and biases
W1 = np.random.randn(input_size, hidden_size)
b1 = np.zeros((1, hidden_size))

W2 = np.random.randn(hidden_size, output_size)
b2 = np.zeros((1, output_size))


In [6]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def sigmoid_derivative(a):
    return a * (1 - a)


In [4]:
# hidden layer
z1 = np.dot(X, W1) + b1
a1 = sigmoid(z1)

# output layer
z2 = np.dot(a1, W2) + b2
y_pred = z2   # linear output


In [5]:
loss = np.mean((y - y_pred) ** 2)
print("Loss:", loss)


Loss: 9.951073387267853


# Backpropagation

In [7]:
dL_dy = -2 * (y - y_pred) / len(y)


In [9]:
dW2 = np.dot(a1.T, dL_dy)
db2 = np.sum(dL_dy, axis=0, keepdims=True)


In [10]:
da1 = np.dot(dL_dy, W2.T)
dz1 = da1 * sigmoid_derivative(a1)


In [11]:
dW1 = np.dot(X.T, dz1)
db1 = np.sum(dz1, axis=0, keepdims=True)


In [12]:
W1 -= lr * dW1
b1 -= lr * db1

W2 -= lr * dW2
b2 -= lr * db2


In [13]:
for epoch in range(1000):

    # forward
    z1 = np.dot(X, W1) + b1
    a1 = sigmoid(z1)
    z2 = np.dot(a1, W2) + b2
    y_pred = z2

    # loss
    loss = np.mean((y - y_pred) ** 2)

    # backward
    dL_dy = -2 * (y - y_pred) / len(y)

    dW2 = np.dot(a1.T, dL_dy)
    db2 = np.sum(dL_dy, axis=0, keepdims=True)

    da1 = np.dot(dL_dy, W2.T)
    dz1 = da1 * sigmoid_derivative(a1)

    dW1 = np.dot(X.T, dz1)
    db1 = np.sum(dz1, axis=0, keepdims=True)

    # update
    W1 -= lr * dW1
    b1 -= lr * db1
    W2 -= lr * dW2
    b2 -= lr * db2

    if epoch % 100 == 0:
        print(f"Epoch {epoch}, Loss {loss:.4f}")


Epoch 0, Loss 9.0003
Epoch 100, Loss 2.7812
Epoch 200, Loss 1.6771
Epoch 300, Loss 0.8981
Epoch 400, Loss 0.4768
Epoch 500, Loss 0.2524
Epoch 600, Loss 0.1379
Epoch 700, Loss 0.0803
Epoch 800, Loss 0.0512
Epoch 900, Loss 0.0360
