In [40]:
from typing import Type, Tuple

import numpy as np
import nnfs
from nnfs.datasets import spiral_data, vertical_data

In [5]:
nnfs.init()

In [205]:
# Activation functions
class ActivationFunction:
    def __init__(self) -> None:
        self.inputs: np.ndarray
        self.output: np.ndarray

    def forward(self, inputs: np.ndarray) -> None:
        self.inputs = inputs

class Activation_ReLu(ActivationFunction):
    def __init__(self) -> None:
        super().__init__()
    
    def forward(self, inputs) -> None:
        self.inputs = inputs
        self.output = np.maximum(0, inputs)
        
    def backward(self, dvalues):
        self.dinputs = dvalues.copy()
        self.dinputs[self.inputs <= 0] = 0


class Activation_Softmax(ActivationFunction):
    def __init__(self) -> None:
        super().__init__()
        
    def forward(self, inputs: np.ndarray) -> None:
        self.inputs = inputs
        
        exponientiated = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
        self.output =  exponientiated / np.sum(exponientiated, axis=1, keepdims=True)

In [132]:
# Loss
class Loss:
    def __init__(self) -> None:
        self.inputs: np.ndarray
        self.output: np.ndarray
        
    def forward(self, y_pred: np.ndarray, y: np.ndarray) -> None:
        self.inputs = y_pred
    
    def calculate(self, output: np.ndarray, y: np.ndarray) -> float:
        self.forward(self, output, y)
        return np.mean(self.output)
    
    
class CategoricalCrossEntropy(Loss):
    def __init__(self) -> None:
        super().__init__()
        
    def forward(self, y_pred: np.ndarray, y_true: np.ndarray):
        
        y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)
        
        if len(y_true.shape) == 1:
            correct_confidences = y_pred_clipped[range(len(y_pred)), y_true]
        
        elif len(y_true.shape) == 2:
            correct_confidences = np.sum(y_pred_clipped * y_true, axis=1)
            
        self.output = -np.log(correct_confidences)

In [134]:
class Activation_Softmax_Loss_CategoricalCrossEntropy:
    def __init__(self) -> None:
        self.activation_function = Activation_Softmax
        self.loss = CategoricalCrossEntropy
        
    def forward(self, inputs: np.ndarray, y_true: np.ndarray) -> float:
        self.activation_function.forward(self.activation_function, inputs)
        
        self.output = self.activation_function.output
        
        return self.loss.calculate(self.loss, self.output, y_true)
    
    def backward(self, dvalues: np.ndarray, y_true: np.ndarray) -> None:
        samples = len(dvalues)
        
        if len(y_true.shape) == 2:
            y_true = np.argmax(y_true, axis=1)
            
        self.dinputs = dvalues.copy()
        
        self.dinputs[range(samples), y_true] -= 1
        
        self.dinputs = self.dinputs / samples

In [179]:
class Layer_Dense:
    def __init__(self, size: Tuple[int, int]):
        self.size = size
        
        self.weights = np.random.rand(*size)
        self.biases = np.zeros((1, size[1]))
        
        self.inputs: np.ndarray
        self.output: np.ndarray
        
    def forward(self, inputs: np.ndarray):
        self.inputs = inputs
        self.output = np.dot(inputs, self.weights) + self.biases
        
        
    def backward(self, dvalues) -> None:
        self.dweights = np.dot(self.inputs.T, dvalues)
        self.dbiases = np.sum(dvalues, axis=0, keepdims=True)
        self.dinputs = np.dot(dvalues, self.weights.T)
        
        

In [201]:
class Optimiser_SGD:
    def __init__(self, learning_rate=1.0) -> None:
        self.learning_rate = learning_rate
        
    def update_params(self, layer: Layer_Dense )-> None:
        layer.weights += -self.learning_rate * layer.dweights
        layer.biases += -self.learning_rate * layer.dbiases

In [180]:
def accuracy(y_pred: np.ndarray, y_true: np.ndarray) -> float:
    
    if len(y_true.shape) == 2:
        y_true = np.argmax(y_true, axis=1)
        
    return np.mean(y_pred == y_true)

In [208]:
X, y = spiral_data(100, 3)


dense1 = Layer_Dense((2, 64))
activation1 = Activation_ReLu()
dense2 = Layer_Dense((64, 3))
loss_activation = Activation_Softmax_Loss_CategoricalCrossEntropy()
optmiser = Optimiser_SGD()

for epoch in range(10001):
    dense1.forward(X)
    activation1.forward(dense1.output)
    dense2.forward(activation1.output)
    loss = loss_activation.forward(dense2.output, y)


    loss_activation.backward(loss_activation.output, y)
    dense2.backward(loss_activation.dinputs)
    activation1.backward(dense2.dinputs)
    dense1.backward(activation1.dinputs)

    optmiser.update_params(dense1)
    optmiser.update_params(dense2)


    predictions = np.argmax(loss_activation.output, axis=1)
    
    if epoch % 1000 == 0:
        print(f"{epoch}. Acc: {accuracy(predictions, y)} | Loss: {loss}")


0. Acc: 0.3233333333333333 | Loss: 1.177062749862671
1000. Acc: 0.47333333333333333 | Loss: 0.9750047922134399
2000. Acc: 0.5166666666666667 | Loss: 0.9293308258056641
3000. Acc: 0.5466666666666666 | Loss: 0.8958826661109924
4000. Acc: 0.56 | Loss: 0.8663973808288574
5000. Acc: 0.5633333333333334 | Loss: 0.8581197261810303
6000. Acc: 0.5166666666666667 | Loss: 0.9957481026649475
7000. Acc: 0.54 | Loss: 0.8755113482475281
8000. Acc: 0.64 | Loss: 0.723547637462616
9000. Acc: 0.64 | Loss: 0.7193661332130432
10000. Acc: 0.66 | Loss: 0.6661862134933472
