In [10]:
class Sequential:
    def __init__(self, layers=None):
        self.layers = layers if layers is not None else []
        self.loss = None
        self.optimiser = None  # TODO - Research Adam optimiser

    def add(self, layer):
        self.layers.append(layer)

    def forward_prop(self, inputs):
        for layer in self.layers:
            inputs = layer.forward_prop(inputs)
        return inputs

    def backward_prop(self, dvalues):
        for layer in reversed(self.layers):
            dvalues = layer.backward_prop(dvalues)

    def compile(self, loss, optimiser=None):
        self.loss = loss
        self.optimiser = optimiser
    
    def update_params(self, lr):
        for layer in self.layers:
            layer.W = layer.W - lr * layer.dW
            layer.B = layer.B - lr * layer.dB

    def fit(self, X, y, epochs=1):
        lr = 0.00001
        for epoch in range(epochs):
            # Forward propagation
            output = self.forward_prop(X)
            
            # Compute the loss
            loss = self.loss.loss(y, output)
            
            # Compute the gradient of the loss with respect to the output
            dvalues = self.loss.derivative(y, output)
            
            # Backward propagation
            self.backward_prop(dvalues)
            
            # Update the parameters
            self.update_params(lr)  # Learning rate has to be very small without an optimiser
            
            # Print the loss every 100 epochs
            if (epoch + 1) % 100 == 0:
                print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss:.6f}")

    def predict(self, X):
        # Evaluate the model
        return self.forward_prop(X)