In [3]:
import numpy as np

# Helper function to create a spiral dataset
def create_spiral_data(samples, classes):
    X = np.zeros((samples*classes, 2))
    y = np.zeros(samples*classes, dtype='uint8')
    for class_number in range(classes):
        ix = range(samples*class_number, samples*(class_number+1))
        r = np.linspace(0.0, 1, samples)
        t = np.linspace(class_number*4, (class_number+1)*4, samples) + np.random.randn(samples)*0.2
        X[ix] = np.c_[r*np.sin(t*2.5), r*np.cos(t*2.5)]
        y[ix] = class_number
    return X, y

# ==============================================================================
# Component 1: Dense Layer (with forward and backward passes)
# ==============================================================================
class DenseLayer:
    def __init__(self, n_inputs, n_neurons):
        self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)
        self.biases = np.zeros((1, n_neurons))

    def forward(self, inputs):
        self.inputs = inputs
        self.output = np.dot(inputs, self.weights) + self.biases
        return self.output

    def backward(self, dvalues):
        self.dweights = np.dot(self.inputs.T, dvalues)
        self.dbiases = np.sum(dvalues, axis=0, keepdims=True)
        self.dinputs = np.dot(dvalues, self.weights.T)

# ==============================================================================
# Component 2: ReLU Activation (with forward and backward passes)
# ==============================================================================
class Activation_ReLU:
    def forward(self, inputs):
        self.inputs = inputs
        self.output = np.maximum(0, inputs)
        return self.output
        
    def backward(self, dvalues):
        # Create a copy since we will modify the original dvalues
        self.dinputs = dvalues.copy()
        # Zero gradient where input values were negative
        self.dinputs[self.inputs <= 0] = 0

# ==============================================================================
# Component 3: Softmax Activation (forward pass only)
# ==============================================================================
class Activation_Softmax:
    def forward(self, inputs):
        exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
        probabilities = exp_values / np.sum(exp_values, axis=1, keepdims=True)
        self.output = probabilities
        return self.output

# ==============================================================================
# Component 4: Combined Loss and Softmax Backward Pass
# ==============================================================================
class Activation_Softmax_Loss_CategoricalCrossentropy():
    def __init__(self):
        self.activation = Activation_Softmax()
        self.loss = Loss_CategoricalCrossentropy()

    def forward(self, inputs, y_true):
        # Pass through Softmax activation
        self.output = self.activation.forward(inputs)
        # Calculate loss
        return self.loss.calculate(self.output, y_true)

    def backward(self, y_pred, y_true):
        n_samples = len(y_pred)
        # Convert sparse labels to one-hot encoding if necessary
        if len(y_true.shape) == 1:
            y_true = np.eye(len(y_pred[0]))[y_true]
        
        # Calculate the gradient
        self.dinputs = (y_pred - y_true) / n_samples

# ==============================================================================
# Component 5: The Loss Function (calculation only)
# ==============================================================================
class Loss_CategoricalCrossentropy:
    def calculate(self, y_pred, y_true):
        n_samples = len(y_pred)
        y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)
        
        # If true labels are sparse, get correct confidences
        if len(y_true.shape) == 1:
            correct_confidences = y_pred_clipped[range(n_samples), y_true]
        # If true labels are one-hot encoded
        elif len(y_true.shape) == 2:
            correct_confidences = np.sum(y_pred_clipped * y_true, axis=1)
            
        negative_log_likelihoods = -np.log(correct_confidences)
        data_loss = np.mean(negative_log_likelihoods)
        return data_loss

# ==============================================================================
# Component 6: The Optimizer
# ==============================================================================
class Optimizer_SGD:
    def __init__(self, learning_rate=1.0):
        self.learning_rate = learning_rate

    def update_params(self, layer):
        layer.weights += -self.learning_rate * layer.dweights
        layer.biases += -self.learning_rate * layer.dbiases

# ==============================================================================
# Main Training Loop
# ==============================================================================

# 1. Create the Dataset
X, y = create_spiral_data(samples=100, classes=3)

# 2. Create Network and Optimizer
dense1 = DenseLayer(2, 64)
activation1 = Activation_ReLU()
dense2 = DenseLayer(64, 3)
loss_activation = Activation_Softmax_Loss_CategoricalCrossentropy()
optimizer = Optimizer_SGD(learning_rate=0.85)

# 3. Train for 10,001 epochs
for epoch in range(20001):
    # --- Forward Pass ---
    dense1.forward(X)
    activation1.forward(dense1.output)
    dense2.forward(activation1.output)
    loss = loss_activation.forward(dense2.output, y)

    # --- Accuracy Calculation ---
    predictions = np.argmax(loss_activation.output, axis=1)
    if len(y.shape) == 2: # If y is one-hot
        y_labels = np.argmax(y, axis=1)
    else: # If y is sparse
        y_labels = y
    accuracy = np.mean(predictions == y_labels)

    # Print progress
    if epoch % 1000 == 0:
        print(f'epoch: {epoch}, acc: {accuracy:.3f}, loss: {loss:.3f}')

    # --- Backward Pass ---
    loss_activation.backward(loss_activation.output, y)
    dense2.backward(loss_activation.dinputs)
    activation1.backward(dense2.dinputs)
    dense1.backward(activation1.dinputs)

    # --- Update Weights and Biases ---
    optimizer.update_params(dense1)
    optimizer.update_params(dense2)

epoch: 0, acc: 0.323, loss: 1.099
epoch: 1000, acc: 0.467, loss: 1.022
epoch: 2000, acc: 0.480, loss: 0.948
epoch: 3000, acc: 0.563, loss: 0.892
epoch: 4000, acc: 0.623, loss: 0.768
epoch: 5000, acc: 0.687, loss: 0.640
epoch: 6000, acc: 0.687, loss: 0.611
epoch: 7000, acc: 0.687, loss: 0.622
epoch: 8000, acc: 0.697, loss: 0.591
epoch: 9000, acc: 0.663, loss: 0.618
epoch: 10000, acc: 0.697, loss: 0.592
epoch: 11000, acc: 0.733, loss: 0.544
epoch: 12000, acc: 0.810, loss: 0.525
epoch: 13000, acc: 0.740, loss: 0.560
epoch: 14000, acc: 0.757, loss: 0.534
epoch: 15000, acc: 0.787, loss: 0.490
epoch: 16000, acc: 0.753, loss: 0.553
epoch: 17000, acc: 0.763, loss: 0.532
epoch: 18000, acc: 0.750, loss: 0.544
epoch: 19000, acc: 0.773, loss: 0.529
epoch: 20000, acc: 0.777, loss: 0.528
