# Scratch Artificial Neural Network

In [1549]:
# Import libraries
# External libraries
import torch

# Python built-in libraries
from functools import lru_cache
from enum import Enum

In [1550]:
# Disable __grad based on assistant direction
torch.set_grad_enabled(False)

<torch.autograd.grad_mode.set_grad_enabled at 0x19290a76a30>

# Classes for Artificial Neural Network

In [1551]:
class ActivationFunction:
    """
    ActivationFunction Class.
    
    This class is used to define the activation function.
    """
    
    @staticmethod
    @lru_cache(maxsize=None)
    def linear(x):
        return x
    
    @staticmethod
    @lru_cache(maxsize=None)
    def relu(x):
        return torch.relu(x)

    @staticmethod
    @lru_cache(maxsize=None)
    def sigmoid(x):
        return torch.sigmoid(x)

    @staticmethod
    @lru_cache(maxsize=None)
    def tanh(x):
        return torch.tanh(x)

    @staticmethod
    @lru_cache(maxsize=None)
    def softmax(x):
        return torch.softmax(x, dim=0)
    
    @staticmethod
    def derivative_linear(_):
        return 1
    
    @staticmethod
    def derivative_relu(x):
        return torch.where(x > 0, 1, 0)
    
    @staticmethod
    def derivative_sigmoid(x):
        return torch.sigmoid(x) * (1 - torch.sigmoid(x))
    
    @staticmethod
    def derivative_tanh(x):
        return (2 / torch.exp(x) - torch.exp(-x)) ** 2

    @staticmethod
    def derivative_softmax(x):
        s = torch.softmax(x, dim=0).reshape(-1, 1)
        return torch.diagflat(s) - torch.mm(s, s.T)

In [1552]:
class LossFunction:
    """
    LossFunction Class.
    
    This class is used to define the loss function.
    """

    @staticmethod
    def mean_squared_error(y_pred, y_true):
        return torch.mean((y_pred - y_true) ** 2)

    @staticmethod
    def binary_cross_entropy(y_pred, y_true):
        return -torch.mean(y_true * torch.log(y_pred) + (1 - y_true) * torch.log(1 - y_pred))

    @staticmethod
    def categorical_cross_entropy(y_pred, y_true):
        return -torch.sum(y_true * torch.log(y_pred))

In [1553]:
class InitializerType(Enum):
    """
    InitializerType Enum.
    
    This enum is used to define the type of weight initialization.
    Attributes:
        ZERO: Zero initialization
        RANDOM_DIST_UNIFORM: Random distribution uniform initialization
        RANDOM_DIST_NORMAL: Random distribution normal initialization
    """
    ZERO = 0
    RANDOM_DIST_UNIFORM = 1
    RANDOM_DIST_NORMAL = 2

In [1554]:
class Initializer:
    """
    Initializer Class.
    
    This class is used to initialize weights and biases.
    """
    @staticmethod
    def init_weights(weight_init : InitializerType, param_1, param_2, size: int, seeds):
        """
        Initialize weights
        :param weight_init: Type of weight initialization
        :param param_1: Lower bound or mean
        :param param_2: Upper bound or standard deviation
        :param size: Size of the weight
        :param seeds: Random seed
        :return: Initialized weights
        """
        if weight_init == InitializerType.ZERO:
            return Initializer.zero_init(size)
        elif weight_init == InitializerType.RANDOM_DIST_UNIFORM:
            return Initializer.random_dist_uniform(size, param_1, param_2, seeds)
        elif weight_init == InitializerType.RANDOM_DIST_NORMAL:
            return Initializer.random_dist_normal(size, param_1, param_2, seeds)

    @staticmethod
    def zero_init(size):
        return torch.zeros(size)

    @staticmethod
    def random_dist_uniform(size, lower_bound, upper_bound, seeds):
        torch.manual_seed(seeds)
        return torch.rand(size) * (upper_bound - lower_bound) + lower_bound
    
    @staticmethod
    def random_dist_normal(size, mean, std, seeds):
        torch.manual_seed(seeds)
        return torch.randn(size) * std + mean

In [1555]:
class Layer:
    """
    Layer Class.
    
    Consist of many neurons of Type Layer.
    """

    def __init__(self, weight_init : InitializerType, bias_init : InitializerType, input_size, output_size, param_1, param_2, seeds = 0, activation = ActivationFunction.linear, layer_name = None):
        self.layer_name = layer_name
        self.neurons = [Neuron(weight_init, bias_init, input_size, param_1, param_2, seeds) for _ in range(output_size)]
        self.activation_func = activation
        self.sum = None
        self.output = None
    
    def forward(self, x):
        self.sum = torch.stack([neuron.forward(x) for neuron in self.neurons])[0]
        self.output = self.activation_func(self.sum)
        return self.output
    
    def weight_update(self, lr, grad):
        for neuron in self.neurons:
            neuron.weight_update(lr, grad)
    
    def bias_update(self, lr, grad):
        for neuron, grad in zip(self.neurons, grad):
            neuron.bias_update(lr, grad)
    
    def __str__(self):
        return f"Layer Name: {self.layer_name}\nNeurons: {len(self.neurons)}\n"

In [1556]:
class Neuron:
    """
    Neuron Class.
    
    Consist of weights and bias for every input feature.
    """
    def __init__(self, weight_init : InitializerType, bias_init : InitializerType, input_size, param_1, param_2, seeds = 0):
        self.weights = Initializer.init_weights(weight_init, param_1, param_2, input_size, seeds)
        self.bias = Initializer.init_weights(bias_init, param_1, param_2, input_size, seeds)

    def forward(self, x):
        res = 0
        for weight, x in zip(self.weights, x):
            res += weight * x
        return res + self.bias
    
    def weight_update(self, lr, grad):
        self.weights -= lr * grad
        
    def bias_update(self, lr, grad):
        self.bias -= lr * grad

In [1557]:
class OutputLayer(Layer):
    """
    OutputLayer Class.
    
    Consist of many neurons of Output Layer
    """
    def __init__(self, weight_init : InitializerType, bias_init : InitializerType, input_size, output_size, param_1, param_2, seeds = 0, activation = ActivationFunction.linear, layer_name = None):
        super().__init__(weight_init, bias_init, input_size, output_size, param_1, param_2, seeds, activation, layer_name)
        
    def forward(self, x):
        self.sum = torch.stack([neuron.forward(x) for neuron in self.neurons])[0]
        self.output = ActivationFunction.softmax(self.sum)
        return self.output

In [1558]:
class ArtificialNeuralNetwork:
    """
    ArtificialNeuralNetwork Class.
    
    Consist of many layers of Type Layer.
    """
    def __init__(self, input_size, output_size, hidden_layers, hidden_size, weight_init, bias_init, param_1, param_2, seeds = 0, activation = ActivationFunction.linear):
        self.layers = []
        self.layers.append(Layer(weight_init, bias_init, input_size, hidden_size, param_1, param_2, seeds, activation, "Input Layer"))
        for i in range(hidden_layers):
            self.layers.append(Layer(weight_init, bias_init, hidden_size, hidden_size, param_1, param_2, seeds, activation, f"Hidden Layer {i}"))
        self.layers.append(OutputLayer(weight_init, bias_init, hidden_size, output_size, param_1, param_2, seeds, activation, "Output Layer"))
        self.derivative_activation = None
        if activation == ActivationFunction.sigmoid:
            self.derivative_activation = ActivationFunction.derivative_sigmoid
        elif activation == ActivationFunction.linear:
            self.derivative_activation = ActivationFunction.derivative_linear
        elif activation == ActivationFunction.relu:
            self.derivative_activation = ActivationFunction.derivative_relu
        elif activation == ActivationFunction.tanh:
            self.derivative_activation = ActivationFunction.derivative_tanh
        elif activation == ActivationFunction.softmax:
            self.derivative_activation = ActivationFunction.derivative_softmax
        
    def forward(self, x):
        for layer in self.layers:
            print(layer)
            x = layer.forward(x)
        return x

    def backward(self, lr, output, target):
        print(target, output)
        for layer in reversed(self.layers):
            error_node = output * (1 - output) * (output - target)
            delta_w = lr * error_node * output
            delta_b = lr * error_node
            layer.weight_update(lr, delta_w)
            layer.bias_update(lr, delta_b)
            output = error_node
        return output
    
            
        
        
    def train(self, x, y, lr, epochs, loss_func, verbose = False):
        for epoch in range(epochs):
            loss = 0
            for x_input, y_input in zip(x, y):
                y_pred = self.forward(x_input)
                loss = loss_func(y_pred, y_input)
                self.backward(lr, y_pred, y_input)
            if verbose:
                print(f"Epoch {epoch} - Loss: {loss}")

# Pipeline for testing MNIST dataset

In [1559]:
# Const variables
input_size = 784
hidden_layers = 1
hidden_size = 41000
output_size = 10
learning_rate = 0.01

In [1560]:
# Import Libraries
import numpy as np
import pandas as pd

In [1561]:
# Import Dataset
train = pd.read_csv("data/train.csv")

In [1562]:
# Data Preprocessing
data = np.array(train)
m, n = data.shape
np.random.shuffle(data)

data_dev = data[0:1000].T
Y_dev = data_dev[0]
X_dev = data_dev[1:n]
X_dev = X_dev / 255.

data_train = data[1000:m].T
Y_train = data_train[0]
X_train = data_train[1:n]
X_train = X_train / 255.
_,m_train = X_train.shape
X_train = X_train.T

In [1563]:
# Initialize Artificial Neural Network
ann = ArtificialNeuralNetwork(input_size, output_size, hidden_layers, 128, InitializerType.RANDOM_DIST_NORMAL, InitializerType.RANDOM_DIST_NORMAL, 0, 0.01, 0, ActivationFunction.sigmoid)

In [1564]:
# Train Artificial Neural Network
ann.train(torch.tensor(X_train), torch.tensor(Y_train), learning_rate, 100, LossFunction.mean_squared_error, verbose = True)

Layer Name: Input Layer
Neurons: 128

Layer Name: Hidden Layer 0
Neurons: 128

Layer Name: Output Layer
Neurons: 10

tensor(8) tensor([0.0077, 0.0077, 0.0078, 0.0078, 0.0079, 0.0079, 0.0078, 0.0076, 0.0078,
        0.0077, 0.0078, 0.0078, 0.0078, 0.0079, 0.0079, 0.0078, 0.0077, 0.0077,
        0.0079, 0.0079, 0.0079, 0.0077, 0.0078, 0.0080, 0.0079, 0.0078, 0.0078,
        0.0078, 0.0079, 0.0079, 0.0079, 0.0077, 0.0078, 0.0078, 0.0078, 0.0078,
        0.0078, 0.0078, 0.0079, 0.0078, 0.0078, 0.0079, 0.0078, 0.0078, 0.0078,
        0.0080, 0.0077, 0.0077, 0.0078, 0.0079, 0.0079, 0.0078, 0.0078, 0.0078,
        0.0079, 0.0078, 0.0078, 0.0079, 0.0079, 0.0081, 0.0077, 0.0077, 0.0080,
        0.0078, 0.0078, 0.0079, 0.0079, 0.0079, 0.0077, 0.0080, 0.0078, 0.0078,
        0.0077, 0.0078, 0.0078, 0.0078, 0.0077, 0.0079, 0.0078, 0.0079, 0.0078,
        0.0079, 0.0077, 0.0078, 0.0079, 0.0078, 0.0079, 0.0078, 0.0078, 0.0080,
        0.0077, 0.0078, 0.0077, 0.0077, 0.0078, 0.0079, 0.0079, 0.0077, 0

RuntimeError: The size of tensor a (784) must match the size of tensor b (128) at non-singleton dimension 0