In [9]:
import numpy as np

def relu(x):
    return np.maximum(0, x)


def relu_derivative(x):
    return np.where(x > 0, 1, 0)


def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))

def mean_squared_error(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

def binary_cross_entropy(y_true, y_pred):
    return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))



In [6]:
class DenseNetwork:
    def __init__(self, layers):
        self.layers = layers
    
    def forward(self, X):
        outputs = X
        for layer in self.layers:
            outputs = layer.forward(outputs)
        return outputs
    
    def backward(self, grad_output, learning_rate):
        for layer in reversed(self.layers):
            grad_output = layer.backward(grad_output, learning_rate)
    
    def train(self, X, y, epochs, learning_rate, loss_func):
        for epoch in range(epochs):
            outputs = self.forward(X)
            loss = loss_func(y, outputs)
            grad_output = outputs - y
            self.backward(grad_output, learning_rate)
            if epoch % 100 == 0:
                print(f"Epoch {epoch}/{epochs} - Loss: {loss:.4f}")
    
    def predict(self, X):
        return self.forward(X)

In [7]:
class DenseLayer:
    def __init__(self, input_size, output_size, activation=None):
        self.weights = np.random.randn(output_size, input_size)
        self.bias = np.random.randn(output_size, 1)
        self.activation = activation
        self.inputs = None
        self.outputs = None
        self.d_weights = None
        self.d_bias = None
    
    def forward(self, inputs):
        self.inputs = inputs
        self.outputs = np.dot(self.weights, inputs.T) + self.bias
        if self.activation:
            self.outputs = self.activation(self.outputs)
        return self.outputs
    
    def backward(self, grad_output, learning_rate):
        if self.activation:
            grad_output *= self.activation(self.outputs, derivative=True)
        self.d_weights = np.dot(grad_output, self.inputs)
        self.d_bias = np.sum(grad_output, axis=1, keepdims=True)
        grad_input = np.dot(self.weights.T, grad_output)
        self.weights -= learning_rate * self.d_weights.T
        self.bias -= learning_rate * self.d_bias
        return grad_input

In [8]:
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[0], [1], [1], [0]])

network = DenseNetwork([
    DenseLayer(2, 64, activation=relu),
    DenseLayer(64, 1, activation=sigmoid)])


network.train(X, y, epochs=1000, learning_rate=0.1, loss_func=mean_squared_error)

predictions = network.predict(X)
print("Predictions:", predictions)


ValueError: shapes (1,64) and (4,64) not aligned: 64 (dim 1) != 4 (dim 0)