In [3]:
import numpy as np

def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return np.where(x > 0, 1, 0)

def linear(x):
    return x

def linear_derivative(x):
    return np.ones_like(x)

def initialize_parameters(input_size, hidden_size, output_size):
    np.random.seed(42)
    W1 = np.random.randn(input_size, hidden_size) * 0.01
    b1 = np.zeros((1, hidden_size))
    W2 = np.random.randn(hidden_size, output_size) * 0.01
    b2 = np.zeros((1, output_size))
    return W1, b1, W2, b2

def forward_propagation(X, W1, b1, W2, b2):
    Z1 = np.dot(X, W1) + b1
    A1 = relu(Z1)
    Z2 = np.dot(A1, W2) + b2
    A2 = linear(Z2)
    return Z1, A1, Z2, A2

def compute_loss(Y, A2):
    m = Y.shape[0]
    loss = (1/m) * np.sum((A2 - Y)**2)
    return loss

def backward_propagation(X, Y, Z1, A1, Z2, A2, W1, b1, W2, b2, learning_rate):
    m = X.shape[0]
    dZ2 = A2 - Y
    dW2 = (1/m) * np.dot(A1.T, dZ2)
    db2 = (1/m) * np.sum(dZ2, axis=0, keepdims=True)
    dA1 = np.dot(dZ2, W2.T)
    dZ1 = dA1 * relu_derivative(Z1)
    dW1 = (1/m) * np.dot(X.T, dZ1)
    db1 = (1/m) * np.sum(dZ1, axis=0, keepdims=True)
    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2
    return W1, b1, W2, b2

def train_neural_network(X, Y, hidden_size, learning_rate, epochs):
    input_size = X.shape[1]
    output_size = Y.shape[1]
    W1, b1, W2, b2 = initialize_parameters(input_size, hidden_size, output_size)
    for epoch in range(epochs):
        Z1, A1, Z2, A2 = forward_propagation(X, W1, b1, W2, b2)
        loss = compute_loss(Y, A2)
        W1, b1, W2, b2 = backward_propagation(X, Y, Z1, A1, Z2, A2, W1, b1, W2, b2, learning_rate)
        if epoch % 100 == 0:
            print(f'Epoch {epoch}, Loss: {loss}')
    return W1, b1, W2, b2

def predict(X, W1, b1, W2, b2):
    _, _, _, A2 = forward_propagation(X, W1, b1, W2, b2)
    return A2

np.random.seed(42)
X = np.random.rand(100, 1)
Y = 3 * X + 2 + np.random.randn(100, 1) * 0.1

hidden_size = 10
learning_rate = 0.01
epochs = 1000
W1, b1, W2, b2 = train_neural_network(X, Y, hidden_size, learning_rate, epochs)

predictions = predict(X, W1, b1, W2, b2)

final_loss = compute_loss(Y, predictions)
print(f'Final Loss: {final_loss}')


Epoch 0, Loss: 12.405581338022717
Epoch 100, Loss: 2.3239173369106996
Epoch 200, Loss: 0.9614894227290948
Epoch 300, Loss: 0.7731390816052459
Epoch 400, Loss: 0.7362796403659259
Epoch 500, Loss: 0.7108126450600815
Epoch 600, Loss: 0.6767555387503528
Epoch 700, Loss: 0.6291622743841474
Epoch 800, Loss: 0.5656454559805529
Epoch 900, Loss: 0.48621437448885096
Final Loss: 0.3945224514981141
