In [9]:
import math
import numpy as np
from typing import List


def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))


class SimpleNetwork:
    """A simple feedforward neural network with sigmoid activation."""

    @classmethod
    def random(cls, *layer_units: int):
        def xavier_init(n_in, n_out):
            epsilon = math.sqrt(6) / math.sqrt(n_in + n_out)
            return np.random.uniform(-epsilon, epsilon, size=(n_in, n_out))

        layer_weights = [xavier_init(i, o) for i, o in zip(layer_units, layer_units[1:])]
        return cls(*layer_weights)

    def __init__(self, *layer_weights: np.ndarray):
        self.layer_weights = list(layer_weights)

    def predict(self, input_matrix: np.ndarray) -> np.ndarray:
        activations = input_matrix
        for weights in self.layer_weights:
            activations = sigmoid(np.dot(activations, weights))
        return activations

    def predict_zero_one(self, input_matrix: np.ndarray) -> np.ndarray:
        return (self.predict(input_matrix) >= 0.5).astype(int)

    def gradients(self, input_matrix: np.ndarray, output_matrix: np.ndarray) -> List[np.ndarray]:
        activations = [input_matrix]
        weighted_sums = []

        # Forward pass
        for weights in self.layer_weights:
            weighted_sum = np.dot(activations[-1], weights)
            weighted_sums.append(weighted_sum)
            activations.append(sigmoid(weighted_sum))

        # Backward pass
        error = activations[-1] - output_matrix
        gradients = []

        for i in reversed(range(len(self.layer_weights))):
            g = error * sigmoid_derivative(weighted_sums[i])
            grad = np.dot(activations[i].T, g) / input_matrix.shape[0]
            gradients.insert(0, grad)
            error = np.dot(g, self.layer_weights[i].T)

        return gradients

    def train(self, input_matrix: np.ndarray, output_matrix: np.ndarray, iterations: int = 10000, learning_rate: float = 0.1):
        for _ in range(iterations):
            grads = self.gradients(input_matrix, output_matrix)
            for i in range(len(self.layer_weights)):
                self.layer_weights[i] -= learning_rate * grads[i]
