# Scratch Artificial Neural Network

In [1208]:
# Import libraries
# External libraries
import torch

# Python built-in libraries
from functools import lru_cache
from enum import Enum


In [1209]:
# Disable __grad based on assistant direction
torch.set_grad_enabled(False)

<torch.autograd.grad_mode.set_grad_enabled at 0x323dec4d0>

# Classes for Artificial Neural Network

In [1210]:
class ActivationFunction:
    """
    ActivationFunction Class.
    
    This class is used to define the activation function.
    """
    
    @staticmethod
    @lru_cache(maxsize=None)
    def linear(x):
        return x
    
    @staticmethod
    @lru_cache(maxsize=None)
    def relu(x):
        return torch.relu(x)

    @staticmethod
    @lru_cache(maxsize=None)
    def sigmoid(x):
        return torch.sigmoid(x)

    @staticmethod
    @lru_cache(maxsize=None)
    def tanh(x):
        return torch.tanh(x)

    @staticmethod
    @lru_cache(maxsize=None)
    def softmax(x):
        return torch.softmax(x, dim=0)
    
    @staticmethod
    def derivative_linear(_):
        return 1
    
    @staticmethod
    def derivative_relu(x):
        return torch.where(x > 0, 1, 0)
    
    @staticmethod
    def derivative_sigmoid(x):
        return torch.sigmoid(x) * (1 - torch.sigmoid(x))
    
    @staticmethod
    def derivative_tanh(x):
        return (2 / torch.exp(x) - torch.exp(-x)) ** 2

    @staticmethod
    def derivative_softmax(x):
        s = torch.softmax(x, dim=0).reshape(-1, 1)
        return torch.diagflat(s) - torch.mm(s, s.T)

In [1211]:
class LossFunction:
    """
    LossFunction Class.
    
    This class is used to define the loss function.
    """

    @staticmethod
    def mean_squared_error(y_pred, y_true):
        return torch.mean((y_pred - y_true) ** 2)

    @staticmethod
    def binary_cross_entropy(y_pred, y_true):
        return -torch.sum(y_true * torch.log(y_pred) + (1 - y_true) * torch.log(1 - y_pred))

    @staticmethod
    def categorical_cross_entropy(y_pred, y_true):
        return -torch.sum(y_true * torch.log(y_pred))

In [1212]:
class InitializerType(Enum):
    """
    InitializerType Enum.
    
    This enum is used to define the type of weight initialization.
    Attributes:
        ZERO: Zero initialization
        RANDOM_DIST_UNIFORM: Random distribution uniform initialization
        RANDOM_DIST_NORMAL: Random distribution normal initialization
    """
    ZERO = 0
    RANDOM_DIST_UNIFORM = 1
    RANDOM_DIST_NORMAL = 2

In [1213]:
class Initializer:
    """
    Initializer Class.
    
    This class is used to initialize weights and biases.
    """

    @staticmethod
    def init_weights(weight_init : InitializerType, param_1, param_2, size: int):
        """
        Initialize weights
        :param weight_init: Type of weight initialization
        :param param_1: Lower bound or mean
        :param param_2: Upper bound or standard deviation
        :param size: Size of the weight
        :return: Initialized weights
        """
        if weight_init == InitializerType.ZERO:
            return Initializer.zero_init(size)
        elif weight_init == InitializerType.RANDOM_DIST_UNIFORM:
            return Initializer.random_dist_uniform(size, param_1, param_2)
        elif weight_init == InitializerType.RANDOM_DIST_NORMAL:
            return Initializer.random_dist_normal(size, param_1, param_2)

    @staticmethod
    def init_bias(bias_init: InitializerType, param_1, param_2):
        """
        Initialize Bias
        :param bias_init: Type of bias initialization
        :param param_1: Lower bound or mean
        :param param_2: Upper bound or standard deviation
        :return: Initialized bias
        """
        if bias_init == InitializerType.ZERO:
            return 0
        elif bias_init == InitializerType.RANDOM_DIST_UNIFORM:
            return torch.rand(1) * (param_2 - param_1) + param_1
        elif bias_init == InitializerType.RANDOM_DIST_NORMAL:
            return torch.randn(1) * param_2 + param_1


    @staticmethod
    def zero_init(size):
        return torch.zeros(size)

    @staticmethod
    def random_dist_uniform(size, lower_bound, upper_bound):
        return torch.rand(size) * (upper_bound - lower_bound) + lower_bound

    @staticmethod
    def random_dist_normal(size, mean, std):
        return torch.randn(size) * std + mean

In [1214]:
class Neuron:
    """
    Neuron Class.
    
    Consist of weights and bias for every input feature.
    """
    def __init__(self, weight_init : InitializerType, bias_init : InitializerType, input_size, param_1, param_2):
        self.weights = Initializer.init_weights(weight_init, param_1, param_2, input_size)
        self.bias = Initializer.init_bias(bias_init, param_1, param_2)
        self.cost_weight = torch.zeros(input_size)
        self.cost_bias = torch.zeros(input_size)
        self.error_node = None

    def forward(self, x):
        return torch.sum(x * self.weights) + self.bias

    def weight_update(self):
        self.weights += self.cost_weight
        
    def bias_update(self):
        self.bias += self.cost_bias

In [1215]:
class Utils:
    """
    Encoder Class

    Encode values
    """
    @staticmethod
    def output_minus_target(output, target):
        """
        Calculate output minus target
        :param output: Tensor Array of output
        :param target: Int target
        :return: Tensor Array of output minus target
        """
        target_array = torch.zeros_like(output)
        target_array[target] = 1
        return output - target_array

In [1216]:
class Layer:
    """
    Layer Class.

    Consist of many neurons of Type Layer.
    """

    def __init__(self, weight_init : InitializerType, bias_init : InitializerType, input_size, output_size, param_1, param_2, activation = ActivationFunction.linear, layer_name = None):
        self.layer_name = layer_name
        self.neurons = [Neuron(weight_init, bias_init, input_size, param_1, param_2) for _ in range(output_size)]
        self.activation_func = activation
        self.sum = None
        self.output = None
        self.derivative_activation = None
        self.error_node = None

        match activation:
            case ActivationFunction.linear:
                self.derivative_activation = ActivationFunction.derivative_linear
            case ActivationFunction.relu:
                self.derivative_activation = ActivationFunction.derivative_relu
            case ActivationFunction.sigmoid:
                self.derivative_activation = ActivationFunction.derivative_sigmoid
            case ActivationFunction.tanh:
                self.derivative_activation = ActivationFunction.derivative_tanh
            case ActivationFunction.softmax:
                self.derivative_activation = ActivationFunction.derivative_softmax

    def forward(self, x):
        self.sum = torch.stack([neuron.forward(x) for neuron in self.neurons])
        self.output = self.activation_func(self.sum)
        return self.output

    def backward(self, lr, prev_layer, target=None):
        # Iterate through all neurons
        self.error_node = torch.zeros_like(self.output)
        for i, neuron in enumerate(self.neurons):
            # Calculate error node
            sum_of_weight = torch.sum(neuron.weights * prev_layer.error_node)
            error_node = self.output[i] * (1 - self.output[i]) * sum_of_weight
            neuron.error_node = error_node
            self.error_node[i] = error_node
            # Calculate cost weight
            self.neurons[i].cost_weight = -lr * error_node * self.output[i]
            # Calculate cost bias
            self.neurons[i].cost_bias = -lr * error_node * self.neurons[i].bias

    def update_weight(self):
        for neuron in self.neurons:
            neuron.weight_update()

    def update_bias(self):
        for neuron in self.neurons:
            neuron.bias_update()

    def __str__(self):
        return f"Layer Name: {self.layer_name}\nNeurons: {len(self.neurons)}\n"

In [1217]:
class OutputLayer(Layer):
    """
    OutputLayer Class.
    
    Consist of many neurons of Output Layer
    """
    def __init__(self, weight_init : InitializerType, bias_init : InitializerType, input_size, output_size, param_1, param_2, activation = ActivationFunction.linear, layer_name = None):
        super().__init__(weight_init, bias_init, input_size, output_size, param_1, param_2, activation, layer_name)
        
    def forward(self, x):
        self.sum = torch.stack([neuron.forward(x) for neuron in self.neurons])
        self.output = ActivationFunction.sigmoid(self.sum)
        return self.output

    def backward(self, lr, target = None, layer = None):
        self.error_node = torch.zeros_like(self.output)
        target_min_output = Utils.output_minus_target(self.output, target)
        for i, neuron in enumerate(self.neurons):
            target_delta = target_min_output[i]
            error_node = self.output[i] * (1 - self.output[i]) * target_delta
            self.error_node[i] = error_node
            neuron.error_node = error_node
            self.neurons[i].cost_weight = -lr * error_node * self.output[i]
            self.neurons[i].cost_bias = -lr * error_node



In [1218]:
class InputLayer:
    """
    InputLayer Class.

    Consist of many neurons of Input Layer
    This layer only store input without weight.
    """

    def __init__(self, input_size, layer_name = None):
        self.input_size = input_size
        self.input = torch.zeros(input_size)
        self.layer_name = layer_name

    def forward(self, x):
        self.input = x
        return x

    def __str__(self):
        return f"Layer Name: {self.layer_name}\nNeurons: {self.input_size}\n"

In [1219]:
class ArtificialNeuralNetwork:
    """
    ArtificialNeuralNetwork Class.
    
    Consist of many layers of Type Layer.
    """
    def __init__(self, input_size, output_size, hidden_layers, hidden_size, weight_init, bias_init, param_1, param_2, seeds = 0, activation = ActivationFunction.linear, loss_func = LossFunction.mean_squared_error):
        self.layers = []
        self.layers.append(InputLayer(input_size, "Input Layer"))
        for i in range(hidden_layers):
            if i == 0:
                self.layers.append(Layer(weight_init, bias_init, input_size, hidden_size, param_1, param_2, activation, f"Hidden Layer {i}"))
            else:
                self.layers.append(Layer(weight_init, bias_init, hidden_size, hidden_size, param_1, param_2, activation, f"Hidden Layer {i}"))
        self.layers.append(OutputLayer(weight_init, bias_init, hidden_size, output_size, param_1, param_2, activation, "Output Layer"))
        self.loss_func = loss_func
        torch.manual_seed(seeds)
        self.target = None
        self.target_min_output = None

    def forward(self, x):
        for layer in self.layers:
            x = layer.forward(x)
        return x

    def backward(self, lr, target):
        index = len(self.layers) - 1
        for layer in reversed(self.layers):
            if isinstance(layer, InputLayer):
                for layer_up in self.layers:
                    if isinstance(layer_up, InputLayer):
                        continue
                    layer_up.update_bias()
                    layer_up.update_weight()
            else:
                if isinstance(layer, OutputLayer):
                    layer.backward(lr, target)
                else:
                    layer.backward(lr, self.layers[index], target)
            index -= 1


    def train(self, x, y, lr, epochs, loss_func, verbose = False):
        for epoch in range(epochs):
            loss = 0
            iter = 1
            for x_input, y_input in zip(x, y):
                y_pred = self.forward(x_input)
                loss = loss_func(y_pred, y_input)
                self.target_min_output = Utils.output_minus_target(y_pred, y_input)
                self.backward(lr, y_input)
                if iter % 1000 == 0 and verbose:
                    print(f"Iter {iter} - Loss: {loss}")
                iter += 1
            if verbose:
                print(f"Epoch {epoch} - Loss: {loss}")

# Pipeline for testing MNIST dataset

In [1220]:
# Const variables
input_size = 784
hidden_layers = 1
output_size = 10
learning_rate = 0.01
param_1 = 1
param_2 = 1

In [1221]:
# Import Libraries
import numpy as np
import pandas as pd

In [1222]:
# Import Dataset
train = pd.read_csv("data/train.csv")

In [1223]:
# Data Preprocessing
data = np.array(train)
m, n = data.shape
np.random.shuffle(data)

data_dev = data[0:1000].T
Y_dev = data_dev[0]
X_dev = data_dev[1:n]
X_dev = X_dev / 255.

data_train = data[1000:m].T
Y_train = data_train[0]
X_train = data_train[1:n]
X_train = X_train / 255.
_,m_train = X_train.shape
X_train = X_train.T

In [1224]:
# Initialize Artificial Neural Network
ann = ArtificialNeuralNetwork(input_size, output_size, hidden_layers, 128, InitializerType.RANDOM_DIST_NORMAL, InitializerType.RANDOM_DIST_NORMAL, param_1, param_2, 0, ActivationFunction.sigmoid, LossFunction.mean_squared_error)

In [1225]:
# Train Artificial Neural Network
ann.train(torch.tensor(X_train), torch.tensor(Y_train), learning_rate, 100, LossFunction.mean_squared_error, verbose = True)

Iter 1000 - Loss: 1.0
Iter 2000 - Loss: 4.0
Iter 3000 - Loss: 0.0
Iter 4000 - Loss: 0.0
Iter 5000 - Loss: 1.0
Iter 6000 - Loss: 64.0
Iter 7000 - Loss: 64.0
Iter 8000 - Loss: 1.0
Iter 9000 - Loss: 0.0
Iter 10000 - Loss: 49.0
Iter 11000 - Loss: 64.0
Iter 12000 - Loss: 49.0
Iter 13000 - Loss: 1.0
Iter 14000 - Loss: 0.0
Iter 15000 - Loss: 0.0
Iter 16000 - Loss: 49.0
Iter 17000 - Loss: 1.0
Iter 18000 - Loss: 25.0
Iter 19000 - Loss: 64.0
Iter 20000 - Loss: 16.0
Iter 21000 - Loss: 36.0
Iter 22000 - Loss: 49.0
Iter 23000 - Loss: 16.0
Iter 24000 - Loss: 25.0
Iter 25000 - Loss: 49.0
Iter 26000 - Loss: 1.0
Iter 27000 - Loss: 25.0
Iter 28000 - Loss: 9.0
Iter 29000 - Loss: 4.0
Iter 30000 - Loss: 9.0
Iter 31000 - Loss: 36.0
Iter 32000 - Loss: 25.0
Iter 33000 - Loss: 1.0
Iter 34000 - Loss: 64.0
Iter 35000 - Loss: 25.0
Iter 36000 - Loss: 0.0
Iter 37000 - Loss: 9.0
Iter 38000 - Loss: 4.0
Iter 39000 - Loss: 25.0
Iter 40000 - Loss: 1.0
Iter 41000 - Loss: 9.0
Epoch 0 - Loss: 9.0
Iter 1000 - Loss: 1.0
Iter

KeyboardInterrupt: 