# My Neural Network Library

In [14]:
import numpy as np

class Layer:
    """Base class for all layers in the neural network."""
    def forward(self, input_data):
        """Computes the forward pass."""
        raise NotImplementedError
    
    def backward(self, grad_output):
        """Computes the backward pass."""
        raise NotImplementedError

class LinearLayer(Layer):
    """Fully connected linear layer."""
    def __init__(self, input_dim, output_dim):
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.weights = np.random.randn(output_dim, input_dim) * np.sqrt(2 / input_dim)
        self.bias = np.zeros((output_dim, 1))

    def forward(self, input_data):
        """Performs forward pass: output = input * weights^T + bias"""
        self.input_data = input_data  # Store for use in backward pass
        return np.dot(self.weights, input_data.T).T + self.bias.T

    def backward(self, grad_output, learning_rate=0.01):
        """Computes gradients and updates parameters."""
        grad_input = np.dot(grad_output, self.weights)  # dL/dX
        grad_weights = np.dot(grad_output.T, self.input_data)  # dL/dW
        grad_bias = np.sum(grad_output, axis=0, keepdims=True).T  # dL/db
        
        # Update weights and biases
        self.weights -= learning_rate * grad_weights
        self.bias -= learning_rate * grad_bias
        
        return grad_input
    
class Sigmoid(Layer):
    """Sigmoid activation function."""
    def forward(self, input_data):
        self.output = 1 / (1 + np.exp(-input_data))
        return self.output
    
    def backward(self, grad_output):
        return grad_output * self.output * (1 - self.output)
    
class Tanh(Layer):
    """Tanh activation function."""
    def forward(self, input_data):
        self.output = np.tanh(input_data)
        return self.output
    
    def backward(self, grad_output):
        return grad_output * (1 - self.output ** 2)
    
class ReLU(Layer):
    """ReLU activation function."""
    def forward(self, input_data):
        self.input_data = input_data
        return np.maximum(0, input_data)
    
    def backward(self, grad_output):
        return grad_output * (self.input_data > 0)

class BinaryCrossEntropyLoss(Layer):
    """Binary Cross-Entropy Loss Function."""
    def forward(self, predictions, targets):
        self.predictions = np.clip(predictions, 1e-12, 1 - 1e-12)  # Avoid log(0)
        self.targets = targets
        return -np.mean(targets * np.log(self.predictions) + (1 - targets) * np.log(1 - self.predictions))
    
    def backward(self):
        return (self.predictions - self.targets) / (self.targets.shape[0])

class Sequential(Layer):
    """Sequential model to stack multiple layers."""
    def __init__(self):
        self.layers = []
    
    def add(self, layer):
        """Adds a new layer to the model."""
        self.layers.append(layer)
    
    def forward(self, input_data):
        """Performs a forward pass through all layers."""
        for layer in self.layers:
            input_data = layer.forward(input_data)
        return input_data
    
    def backward(self, grad_output, learning_rate=0.01):
        """Performs a backward pass through all layers."""
        for layer in reversed(self.layers):
            grad_output = layer.backward(grad_output, learning_rate) if isinstance(layer, LinearLayer) else layer.backward(grad_output)
    
    def save_weights(self, filename):
        """Saves model weights to a file."""
        weights = [layer.weights for layer in self.layers if isinstance(layer, LinearLayer)]
        biases = [layer.bias for layer in self.layers if isinstance(layer, LinearLayer)]
        np.savez(filename, *weights, *biases)
    
    def load_weights(self, filename):
        """Loads model weights from a file."""
        data = np.load(filename)
        num_layers = len(self.layers) // 2  # Since every LinearLayer has a pair (weights, bias)
        for i, layer in enumerate(self.layers):
            if isinstance(layer, LinearLayer):
                layer.weights = data[f"arr_{i}"]
                layer.bias = data[f"arr_{i + num_layers}"]


# Testing Library Against XOR Problem

In [15]:
# XOR Problem Setup
np.random.seed(0)
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[0], [1], [1], [0]])

# Define the model
model = Sequential()
model.add(LinearLayer(2, 2))  # Hidden layer with 2 neurons
model.add(Tanh())
model.add(LinearLayer(2, 1))  # Output layer
model.add(Tanh())

# Training
epochs = 10000
learning_rate = 0.1
loss_function = BinaryCrossEntropyLoss()

for epoch in range(epochs):
    # Forward pass
    predictions = model.forward(X)
    loss = loss_function.forward(predictions, y)
    
    # Backward pass
    grad_output = loss_function.backward()
    model.backward(grad_output, learning_rate)
    
    # Print loss every 1000 epochs
    if epoch % 1000 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")

# Save trained model weights
model.save_weights("XOR_solved.w")

# Test the model
predictions = model.forward(X)
print("Final Predictions:")
print(predictions)

Epoch 0, Loss: 7.2639
Epoch 1000, Loss: 0.3836
Epoch 2000, Loss: 0.0517
Epoch 3000, Loss: 0.0263
Epoch 4000, Loss: 0.0193
Epoch 5000, Loss: 0.0158
Epoch 6000, Loss: 0.0136
Epoch 7000, Loss: 0.0121
Epoch 8000, Loss: 0.0110
Epoch 9000, Loss: 0.0101
Final Predictions:
[[4.67735818e-04]
 [9.82337554e-01]
 [9.81713307e-01]
 [9.25608756e-04]]


## XOR Problem Results

Looks like using the tanh activations got the right predictions, couldn't get it to work well with sigmoid activations.