In [None]:
import numpy as np

In [None]:
class NeuralNetwork:
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size):
        np.random.seed(0)
        self.weights1 = np.random.randn(input_size, hidden_size1) * 0.1
        self.bias1 = np.zeros((1, hidden_size1))

        self.weights2 = np.random.randn(hidden_size1, hidden_size2) * 0.1
        self.bias2 = np.zeros((1, hidden_size2))

        self.weights3 = np.random.randn(hidden_size2, output_size) * 0.1
        self.bias3 = np.zeros((1, output_size))

    def forward(self, X):
        # Input to first hidden layer
        self.z1 = np.dot(X, self.weights1) + self.bias1
        self.a1 = np.maximum(0, self.z1)  # ReLU activation

        # First hidden layer to second hidden layer
        self.z2 = np.dot(self.a1, self.weights2) + self.bias2
        self.a2 = np.maximum(0, self.z2)  # ReLU activation

        # Second hidden layer to output layer
        self.z3 = np.dot(self.a2, self.weights3) + self.bias3
        return self.z3  # No activation for the output layer

    def backward(self, X, y, output):
        # Calculate gradients for the output layer
        dvalues = 2 * (output - y) / y.size
        self.dweights3 = np.dot(self.a2.T, dvalues)
        self.dbias3 = np.sum(dvalues, axis=0, keepdims=True)
        d_a2 = np.dot(dvalues, self.weights3.T)

        # Apply ReLU backward for the second hidden layer
        d_a2[self.a2 <= 0] = 0
        self.dweights2 = np.dot(self.a1.T, d_a2)
        self.dbias2 = np.sum(d_a2, axis=0, keepdims=True)
        d_a1 = np.dot(d_a2, self.weights2.T)

        # Apply ReLU backward for the first hidden layer
        d_a1[self.a1 <= 0] = 0
        self.dweights1 = np.dot(X.T, d_a1)
        self.dbias1 = np.sum(d_a1, axis=0, keepdims=True)

    def update_parameters(self, learning_rate):
        # Update the weights and biases using the gradients from backward
        self.weights1 -= learning_rate * self.dweights1
        self.bias1 -= learning_rate * self.dbias1
        self.weights2 -= learning_rate * self.dweights2
        self.bias2 -= learning_rate * self.dbias2
        self.weights3 -= learning_rate * self.dweights3
        self.bias3 -= learning_rate * self.dbias3


In [None]:
class ReLU:
    def forward(self, inputs):
        self.inputs = inputs
        self.output = np.maximum(0, inputs)
        return self.output

    def backward(self, dvalues):
        self.dinputs = dvalues.copy()
        self.dinputs[self.inputs <= 0] = 0
        return self.dinputs


In [None]:
class MSELoss:
    def forward(self, predictions, targets):
        # Calculate mean squared error
        self.predictions = predictions
        self.targets = targets
        sample_losses = np.mean((predictions - targets) ** 2, axis=-1)  # Mean squared error per sample
        return np.mean(sample_losses)  # Overall mean squared error

    def backward(self):
        # Gradient of MSE loss with respect to the predictions
        self.dinputs = 2 * (self.predictions - self.targets) / self.targets.size
        return self.dinputs



In [None]:
import numpy as np

# Data generation
np.random.seed(0)
X = np.random.rand(1000, 3)
y = X @ np.array([1.5, -2., 1.]) + 0.5
y = y.reshape(-1, 1)

input_size = X.shape[1]
hidden_size1 = 64
hidden_size2 = 64
output_size = 1
network = NeuralNetwork(input_size, hidden_size1, hidden_size2, output_size)

# Instantiate the loss function
loss_function = MSELoss()

# Define training parameters
learning_rate = 0.01
epochs = 1000

# Training loop
for epoch in range(epochs):
    # Forward pass
    predictions = network.forward(X)

    # Calculate loss
    loss = loss_function.forward(predictions, y)

    # Backward pass (calculate gradients)
    # Ensure backward method is corrected as discussed
    dvalues = loss_function.backward()  # This needs adjustment to match your implementation
    network.backward(X, y, predictions)

    # Update network parameters
    network.update_parameters(learning_rate)

    # Optionally print the loss at certain intervals
    if epoch % 100 == 0:
        print(f"Epoch {epoch}, Loss: {loss}")


Epoch 0, Loss: 1.2489425022776073
Epoch 100, Loss: 0.5324440026175669
Epoch 200, Loss: 0.3722095675461662
Epoch 300, Loss: 0.1666979420600178
Epoch 400, Loss: 0.044124444092422783
Epoch 500, Loss: 0.012941489608402777
Epoch 600, Loss: 0.006809485393597196
Epoch 700, Loss: 0.004621909598283907
Epoch 800, Loss: 0.003402765782158795
Epoch 900, Loss: 0.002614256636146956
