In [1]:
import numpy as np
import nnfs
from nnfs.datasets import spiral_data

nnfs.init()	

# Dense layer
class Layer_Dense:

    # Layer initialization
    def __init__(self, n_inputs, n_neurons):
        # Initialize weights and biases
        self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)
        self.biases = np.zeros((1, n_neurons))
        
    # Forward pass
    def forward(self, inputs):
        # Remember input values
        self.inputs = inputs
        # Calculate output values from inputs, weights and biases
        self.output = np.dot(inputs, self.weights) + self.biases
        
    # Backward pass
    def backward(self, dvalues):
        # Gradients on parameters
        self.dweights = np.dot(self.inputs.T, dvalues)
        self.dbiases = np.sum(dvalues, axis=0, keepdims=True)
        # Gradient on values
        self.dinputs = np.dot(dvalues, self.weights.T)

# ReLU activation
class Activation_ReLU:
    
    # Forward pass
    def forward(self, inputs):
        # Remember input values
        self.inputs = inputs
        # Calculate output values from inputs
        self.output = np.maximum(0, inputs)
    
    # Backward pass
    def backward(self, dvalues):
        # Since we need to modify original variable,
        # let's make a copy of values first
        self.dinputs = dvalues.copy()
        
        # Zero gradient where input values were negative
        self.dinputs[self.inputs <= 0] = 0

# Softmax activation
class Activation_Softmax:

    # Forward pass
    def forward(self, inputs):
        # Remember input values
        self.inputs = inputs
        
        # Get unnormalized probabilities
        exp_values = np.exp(inputs - np.max(inputs, axis=1, 
                                            keepdims=True))
        
        # Normalize them for each sample
        probabilities = exp_values / np.sum(exp_values, axis=1,
                                            keepdims=True)
                                            
        self.output = probabilities
        
    # Backward pass    
    def backward(self, dvalues):
    
        # Create uninitialized array
        self.dinputs = np.empty_like(dvalues)
        
        # Enumerate outputs and gradients
        for index, (single_output, single_dvalues) in \
                enumerate(zip(self.output, dvalues)):
            # Flatten output array
            single_output = single_output.reshape(-1, 1)
            # Calculate Jacobian matrix of the output
            jacobian_matrix = np.diagflat(single_output) - \
                              np.dot(single_output, single_output.T)
            # Calculate sample-wise gradient
            # and add it to the array of sample gradients
            self.dinputs[index] = np.dot(jacobian_matrix,
                                         single_dvalues)
# SGD optimizer
class Optimizer_SGD:

    # Initialize optimizer - set settings
    # learning rate of 1. is defalut for this optimizer
    def __init__(self,learning_rate=1.0):
        self.learning_rate = learning_rate
  
    # Update parameters
    def update_params(self, layer):
        layer.weights += -self.learning_rate * layer.dweights
        layer.biases+= -self.learning_rate * layer.dbiases

# Common loss class
class Loss:

    # Calculates the data and regularization losses
    # given model output and ground truth values
    def calculate(self, output, y):
    
        # Calculate sample losses
        sample_losses = self.forward(output, y)
        
        # Calculate mean loss
        data_loss = np.mean(sample_losses)
        
        # Return loss
        return data_loss

# Cross-entropy loss
class Loss_CategoricalCrossentropy(Loss):

    # Forward pass
    def forward(self,y_pred,y_true):
    
        # Number of samples in a batch
        samples = len(y_pred)
        
        # Clip data to prevent division by 0
        # Clip both sides to not drag mean towards any value
        y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)
        
        # Probabilities for target values -
        # only if categorical labels
        if len(y_true.shape) == 1:
            correct_confidences = y_pred_clipped[
                range(samples),
                y_true]
        # Mask values - only for one-hot encoded labels
        elif len(y_true.shape) == 2:
            correct_confidences = np.sum(
                y_pred_clipped * y_true,
                axis=1)
        
        # Losses
        negative_log_likelihoods = -np.log(correct_confidences)
        return negative_log_likelihoods

    # Backward pass
    def backward(self, dvalues, y_true):
    
        # Number of samples
        samples = len(dvalues)
        # Number of labels in every sample
        # We'll use the first sample to count them
        labels = len(dvalues[0])
        
        # If labels are sparse, turn them into one-hot vector
        if len(y_true.shape) == 1:
            y_true=np.eye(labels)[y_true]
        
        # Calculate gradient
        self.dinputs = -y_true / dvalues
        # Normalize gradient
        self.dinputs = self.dinputs / samples

# Softmax classifier - combined Softmax activation
# and cross-entropy loss for faster backward step
class Activation_Softmax_Loss_CategoricalCrossentropy():
 
    # Creates activation and loss function objects
    def __init__(self):
        self.activation = Activation_Softmax()
        self.loss = Loss_CategoricalCrossentropy()
        
    # Forward pass
    def forward(self,inputs,y_true):
        # Output layer's activation function
        self.activation.forward(inputs)
        # Set the output
        self.output=self.activation.output
        # Calculate and return loss value
        return self.loss.calculate(self.output, y_true)
    
    # Backward pass
    def backward(self,dvalues,y_true):
       
        # Number of samples
        samples=len(dvalues)
        
        # If labels are one-hot encoded,
        # turn them into discrete values
        if len(y_true.shape) == 2:
            y_true = np.argmax(y_true, axis=1)
        
        # Copy so we can safely modify
        self.dinputs = dvalues.copy()
        # Calculate gradient
        self.dinputs[range(samples),y_true] -= 1
        # Normalize gradient
        self.dinputs=self.dinputs/samples

    
X, y=spiral_data(samples=100,classes=3)

dense1=Layer_Dense(2,64)
activation1=Activation_ReLU()
dense2=Layer_Dense(64,3)
loss_activation=Activation_Softmax_Loss_CategoricalCrossentropy()

optimizer=Optimizer_SGD()

for epoch in range(10001):

    dense1.forward(X)
    activation1.forward(dense1.output)
    dense2.forward(activation1.output)
    loss=loss_activation.forward(dense2.output,y)

    predictions=np.argmax(loss_activation.output,axis=1)
    if len(y.shape)==2:
        y=np.argmax(y,axis=1)
    accuracy=np.mean(predictions==y)
    
    if not epoch % 100:
        print(f'epoch:{epoch}, '+
              f'acc:{accuracy:.3f}, '+
              f'loss:{loss:.3f}')
              

    # Backward pass
    loss_activation.backward(loss_activation.output, y)
    dense2.backward(loss_activation.dinputs)
    activation1.backward(dense2.dinputs)
    dense1.backward(activation1.dinputs)

    optimizer.update_params(dense1)
    optimizer.update_params(dense2)
    
    

epoch:0, acc:0.360, loss:1.099
epoch:100, acc:0.400, loss:1.087
epoch:200, acc:0.417, loss:1.077
epoch:300, acc:0.413, loss:1.076
epoch:400, acc:0.400, loss:1.074
epoch:500, acc:0.400, loss:1.071
epoch:600, acc:0.417, loss:1.067
epoch:700, acc:0.437, loss:1.062
epoch:800, acc:0.423, loss:1.055
epoch:900, acc:0.387, loss:1.064
epoch:1000, acc:0.400, loss:1.063
epoch:1100, acc:0.443, loss:1.063
epoch:1200, acc:0.403, loss:1.061
epoch:1300, acc:0.390, loss:1.053
epoch:1400, acc:0.447, loss:1.060
epoch:1500, acc:0.420, loss:1.043
epoch:1600, acc:0.430, loss:1.032
epoch:1700, acc:0.403, loss:1.048
epoch:1800, acc:0.450, loss:1.041
epoch:1900, acc:0.427, loss:1.025
epoch:2000, acc:0.417, loss:1.038
epoch:2100, acc:0.463, loss:1.021
epoch:2200, acc:0.490, loss:1.007
epoch:2300, acc:0.450, loss:1.002
epoch:2400, acc:0.480, loss:0.994
epoch:2500, acc:0.467, loss:0.994
epoch:2600, acc:0.497, loss:0.975
epoch:2700, acc:0.443, loss:1.002
epoch:2800, acc:0.523, loss:0.965
epoch:2900, acc:0.533, los