In [2]:
import numpy as np

In [3]:
class LinearLayer:
    def __init__(self, input_dim, output_dim, learning_rate=0.01):
        # Initialize weights and bias
        self.W = np.random.rand(input_dim, output_dim)
        self.B = np.random.rand(output_dim)
        self.learning_rate = learning_rate

    def forward(self, X):
        # Compute the output: Z = X * W + B
        self.X = X
        self.Z = X @ self.W + self.B
        return self.Z

    def backward(self, dL_dZ):
        # Compute derivatives for the backward pass
        self.dL_dW = self.X.T @ dL_dZ
        self.dL_dB = np.sum(dL_dZ, axis=0)
        self.dL_dX = dL_dZ @ self.W.T
        return self.dL_dX

    def update_parameters(self):
        # Parameter update: θ_new = θ − α * ∂L/∂θ
        self.W -= self.learning_rate * self.dL_dW
        self.B -= self.learning_rate * self.dL_dB

In [7]:
# Unit tests
def test_linear_layer():
    # Fixed values for inputs, weights, and learning rate
    X_test = np.array([[1.0, 2.0], [3.0, 4.0]])
    W_test = np.array([[0.5, -0.5], [1.0, -1.0]])
    B_test = np.array([0.1, -0.2])
    learning_rate = 0.01
    dL_dZ_test = np.array([[0.5, -0.5], [0.5, -0.5]])

    # Instantiate the layer and set fixed parameters
    layer = LinearLayer(input_dim=2, output_dim=2, learning_rate=learning_rate)
    layer.W = W_test
    layer.B = B_test

    # Test forward pass
    Z_expected = X_test @ W_test + B_test
    Z_output = layer.forward(X_test)
    assert np.allclose(Z_output, Z_expected), "Forward pass error"

    # Test backward pass
    dL_dX_expected = dL_dZ_test @ W_test.T
    dL_dW_expected = X_test.T @ dL_dZ_test
    dL_dB_expected = np.sum(dL_dZ_test, axis=0)
    
    dL_dX_output = layer.backward(dL_dZ_test)
    assert np.allclose(dL_dX_output, dL_dX_expected), "Backward pass error in dL/dX"
    assert np.allclose(layer.dL_dW, dL_dW_expected), "Backward pass error in dL/dW"
    assert np.allclose(layer.dL_dB, dL_dB_expected), "Backward pass error in dL/dB"

    # Test parameter update
    W_updated_expected = W_test - learning_rate * dL_dW_expected
    B_updated_expected = B_test - learning_rate * dL_dB_expected

    layer.update_parameters()
    assert np.allclose(layer.W, W_updated_expected), "Update error in W"
    assert np.allclose(layer.B, B_updated_expected), "Update error in B"

    print("All tests passed!")

# Run tests
test_linear_layer()

All tests passed!


In [8]:
class NeuralNetwork:
    def __init__(self, input_dim, hidden_dim, output_dim, learning_rate=0.01):
        self.layer1 = LinearLayer(input_dim, hidden_dim, learning_rate)
        self.layer2 = LinearLayer(hidden_dim, hidden_dim, learning_rate)
        self.layer3 = LinearLayer(hidden_dim, hidden_dim, learning_rate)
        self.output_layer = LinearLayer(hidden_dim, output_dim, learning_rate)

    def forward(self, X):
        self.Z1 = np.maximum(0, self.layer1.forward(X))  # ReLU activation
        self.Z2 = np.maximum(0, self.layer2.forward(self.Z1))  # ReLU activation
        self.Z3 = np.maximum(0, self.layer3.forward(self.Z2))  # ReLU activation
        self.output = self.softmax(self.output_layer.forward(self.Z3))
        return self.output

    def backward(self, dL_dY):
        dL_dZ3 = self.output_layer.backward(dL_dY)
        dL_dZ3[self.Z3 <= 0] = 0
        dL_dZ2 = self.layer3.backward(dL_dZ3)
        dL_dZ2[self.Z2 <= 0] = 0
        dL_dZ1 = self.layer2.backward(dL_dZ2)
        dL_dZ1[self.Z1 <= 0] = 0
        self.layer1.backward(dL_dZ1)

    def update_parameters(self):
        self.layer1.update_parameters()
        self.layer2.update_parameters()
        self.layer3.update_parameters()
        self.output_layer.update_parameters()

    def softmax(self, Z):
        expZ = np.exp(Z - np.max(Z, axis=1, keepdims=True))
        return expZ / expZ.sum(axis=1, keepdims=True)

    def cross_entropy_loss(self, Y_pred, Y_true):
        n_samples = Y_true.shape[0]
        log_likelihood = -np.log(Y_pred[range(n_samples), Y_true.argmax(axis=1)])
        return np.sum(log_likelihood) / n_samples

    def compute_accuracy(self, Y_pred, Y_true):
        predictions = np.argmax(Y_pred, axis=1)
        labels = np.argmax(Y_true, axis=1)
        return np.mean(predictions == labels)

# Training Loop
def train_network(network, X_train, Y_train, X_test, Y_test, epochs=100, batch_size=32):
    for epoch in range(epochs):
        # Shuffle the data
        indices = np.arange(X_train.shape[0])
        np.random.shuffle(indices)
        X_train = X_train[indices]
        Y_train = Y_train[indices]

        # Mini-batch training
        for start in range(0, X_train.shape[0], batch_size):
            end = start + batch_size
            X_batch = X_train[start:end]
            Y_batch = Y_train[start:end]

            # Forward pass
            Y_pred = network.forward(X_batch)

            # Compute loss and accuracy
            loss = network.cross_entropy_loss(Y_pred, Y_batch)
            accuracy = network.compute_accuracy(Y_pred, Y_batch)

            # Backward pass and parameter update
            dL_dY = Y_pred - Y_batch  # Derivative of loss with respect to predictions
            network.backward(dL_dY)
            network.update_parameters()

        # Evaluate on test data after each epoch
        Y_test_pred = network.forward(X_test)
        test_loss = network.cross_entropy_loss(Y_test_pred, Y_test)
        test_accuracy = network.compute_accuracy(Y_test_pred, Y_test)
        print(f"Epoch {epoch+1}/{epochs} - Loss: {loss:.4f}, Accuracy: {accuracy:.4f}, "
              f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

# Assuming X_train, Y_train, X_test, and Y_test are available
input_dim = 784  # For example, 28x28 images flattened
hidden_dim = 128
output_dim = 10  # 10 classes for digits 0-9
learning_rate = 0.01

network = NeuralNetwork(input_dim, hidden_dim, output_dim, learning_rate)
# Call train_network with training and test data.
# train_network(network, X_train, Y_train, X_test, Y_test)
