In [1]:
import numpy as np

# Helper function to create a spiral dataset
def create_spiral_data(samples, classes):
    """
    Creates a non-linear spiral dataset for classification.
    """
    X = np.zeros((samples*classes, 2))
    y = np.zeros(samples*classes, dtype='uint8')
    for class_number in range(classes):
        ix = range(samples*class_number, samples*(class_number+1))
        r = np.linspace(0.0, 1, samples)
        t = np.linspace(class_number*4, (class_number+1)*4, samples) + np.random.randn(samples)*0.2
        X[ix] = np.c_[r*np.sin(t*2.5), r*np.cos(t*2.5)]
        y[ix] = class_number
    return X, y

# ==============================================================================
# Component 1: The Dense Layer
# ==============================================================================
class DenseLayer:
    """
    A class representing a single, fully-connected (dense) layer in a neural network.
    """
    def __init__(self, n_inputs, n_neurons):
        # Initialize weights with small random values. Shape: (n_inputs, n_neurons).
        self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)
        # Initialize biases as zeros. Shape: (1, n_neurons).
        self.biases = np.zeros((1, n_neurons))

    def forward(self, inputs):
        # Store inputs for backpropagation
        self.inputs = inputs
        # Calculate the layer's output: (inputs • weights) + biases
        self.output = np.dot(inputs, self.weights) + self.biases
        return self.output

# ==============================================================================
# Component 2: The ReLU Activation Function
# ==============================================================================
class Activation_ReLU:
    """
    A class representing the ReLU (Rectified Linear Unit) activation function.
    """
    def forward(self, inputs):
        # Store inputs for backpropagation
        self.inputs = inputs
        # Apply the ReLU function: output = max(0, input)
        self.output = np.maximum(0, inputs)
        return self.output

# ==============================================================================
# Component 3: The Softmax Activation Function (for the output layer)
# ==============================================================================
class Activation_Softmax:
    """
    A class representing the Softmax activation function for the output layer.
    """
    def forward(self, inputs):
        # Store inputs for backpropagation
        self.inputs = inputs
        # Calculate exponentiated values (with numerical stability trick)
        exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
        # Normalize the values to get probabilities
        probabilities = exp_values / np.sum(exp_values, axis=1, keepdims=True)
        self.output = probabilities
        return self.output

# ==============================================================================
# Component 4: The Loss Function
# ==============================================================================
class Loss_CategoricalCrossentropy:
    """
    A class to calculate the Categorical Cross-Entropy loss. ⚖️
    """
    def calculate(self, y_pred, y_true):
        # Get the number of samples in the batch
        n_samples = len(y_pred)
        # Clip data to prevent division by 0
        y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)
        # Get the predicted probabilities for the correct classes
        correct_confidences = y_pred_clipped[range(n_samples), y_true]
        # Calculate the negative log likelihoods
        negative_log_likelihoods = -np.log(correct_confidences)
        # Calculate the average loss for the batch
        data_loss = np.mean(negative_log_likelihoods)
        return data_loss

# ==============================================================================
# Setup and Execution
# ==============================================================================

# 1. Create the Dataset
# We'll create a dataset with 100 samples for each of the 3 classes.
X, y = create_spiral_data(samples=100, classes=3)

# 2. Create the Network Components
# A 2-layer neural network.
# Layer 1: Takes 2 inputs (from our dataset), has 64 neurons.
dense1 = DenseLayer(2, 64) 
activation1 = Activation_ReLU()

# Layer 2 (Output Layer): Takes 64 inputs (from dense1), has 3 output classes.
dense2 = DenseLayer(64, 3)
activation2 = Activation_Softmax()

# 3. Create the Loss Function
loss_function = Loss_CategoricalCrossentropy()

# 4. Perform the Forward Pass
# Pass data through the first dense layer
dense1.forward(X)
# Pass the result through the first activation function
activation1.forward(dense1.output)

# Pass the result of the first activation through the second dense layer
dense2.forward(activation1.output)
# Pass the result through the second activation function (Softmax)
activation2.forward(dense2.output)

# The output of activation2 now contains the network's predictions (probabilities)
predictions = activation2.output

# 5. Calculate the Loss
# Compare the network's predictions with the true labels to get the loss.
loss = loss_function.calculate(predictions, y)

# 6. Print the Results
print("--- First 5 Predictions (Probabilities) ---")
print(predictions[:5])
print("\n--- Network Loss ---")
print(f"Loss: {loss}")

# The initial loss should be approximately -log(1/n_classes).
# For 3 classes, -log(1/3) is about 1.0986. A value close to this indicates
# the network is starting with random, uninformed predictions, which is expected.

--- First 5 Predictions (Probabilities) ---
[[0.33333333 0.33333333 0.33333333]
 [0.33333194 0.33333415 0.33333391]
 [0.33333113 0.3333347  0.33333418]
 [0.33332924 0.33333608 0.33333468]
 [0.33332765 0.33333684 0.3333355 ]]

--- Network Loss ---
Loss: 1.098616900184321
