# Building a Deep Neural Network from Scratch Using NumPy

## Activation Functions

**Sigmoid**: Useful for probabilities but may suffer from vanishing gradients.

**ReLU**: Commonly used due to simplicity and efficiency.

In [1]:
import numpy as np

# Activation Functions
def sigmoid(x):
    return 1 / (1 + np.exp(-x))


In [2]:
def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))

In [3]:
def relu(x):
    return np.maximum(0, x)


In [4]:
def relu_derivative(x):
    return (x > 0).astype(float)


## Loss Function

In [5]:
# Loss Function
def mean_squared_error(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)


In [6]:
def mse_derivative(y_true, y_pred):
    return -2 * (y_true - y_pred) / y_true.size

## Neural Network

In [7]:
# Neural Network Class
class DeepNeuralNetwork:
    def __init__(self, layers, activation="relu"):
        self.layers = layers
        self.activation = activation
        self.weights = []
        self.biases = []

        # Initialize weights and biases
        for i in range(len(layers) - 1):
            self.weights.append(np.random.randn(layers[i], layers[i + 1]) * 0.1)
            self.biases.append(np.zeros((1, layers[i + 1])))

    def _activate(self, x):
        return relu(x) if self.activation == "relu" else sigmoid(x)

    def _activate_derivative(self, x):
        return relu_derivative(x) if self.activation == "relu" else sigmoid_derivative(x)

    def forward(self, x):
        activations = [x]
        zs = []

        for w, b in zip(self.weights, self.biases):
            z = np.dot(activations[-1], w) + b
            zs.append(z)
            activation = self._activate(z)
            activations.append(activation)

        return activations, zs

    def backward(self, activations, zs, y_true):
        grad_weights = []
        grad_biases = []

        # Compute output layer error
        delta = mse_derivative(y_true, activations[-1]) * self._activate_derivative(zs[-1])

        # Backpropagate
        for i in range(len(self.layers) - 2, -1, -1):
            grad_weights.insert(0, np.dot(activations[i].T, delta))
            grad_biases.insert(0, np.sum(delta, axis=0, keepdims=True))
            if i != 0:
                delta = np.dot(delta, self.weights[i].T) * self._activate_derivative(zs[i - 1])

        return grad_weights, grad_biases

    def update_params(self, grad_weights, grad_biases, lr):
        for i in range(len(self.weights)):
            self.weights[i] -= lr * grad_weights[i]
            self.biases[i] -= lr * grad_biases[i]

    def fit(self, x, y, epochs=1000, lr=0.01):
        for epoch in range(epochs):
            activations, zs = self.forward(x)
            loss = mean_squared_error(y, activations[-1])
            grad_weights, grad_biases = self.backward(activations, zs, y)
            self.update_params(grad_weights, grad_biases, lr)

            if epoch % 100 == 0:
                print(f"Epoch {epoch}, Loss: {loss:.4f}")

    def predict(self, x):
        activations, _ = self.forward(x)
        return activations[-1]

## Trying

In [10]:
np.random.seed(42)
x = np.random.rand(500, 2)
y = (x[:, 0] + x[:, 1] > 1).astype(int).reshape(-1, 1)

In [12]:
# Normalize input data
x = (x - x.mean(axis=0)) / x.std(axis=0)

In [13]:
# Define and train the model
model = DeepNeuralNetwork(layers=[2, 8, 8, 1], activation="relu")
model.fit(x, y, epochs=1000, lr=0.01)

Epoch 0, Loss: 0.4649
Epoch 100, Loss: 0.2484
Epoch 200, Loss: 0.2414
Epoch 300, Loss: 0.2360
Epoch 400, Loss: 0.2269
Epoch 500, Loss: 0.2099
Epoch 600, Loss: 0.1788
Epoch 700, Loss: 0.1335
Epoch 800, Loss: 0.0910
Epoch 900, Loss: 0.0694


In [14]:
# Make predictions
predictions = model.predict(x)
predictions = (predictions > 0.5).astype(int)
accuracy = np.mean(predictions == y)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.97
