## Custom forward feet network
created only with np library

In [6]:
import numpy as np
from sklearn.datasets import load_iris

In [29]:
class NN:
    
    def __init__(self, layers, activation, loss_function):
        self.layers = layers
        self.n_layers = len(layers)
        self.n_weights = len(layers) - 1
        
        self.activations = {
            "relu": self.relu,
            "sigmoid": self.sigmoid,
            "tanh": self.tanh
        }

        self.loss_functions = {
            "mse": self.mse,
            "mae": self.mae
        }

        self.activation_function = self.activations[activation]
        self.loss_function = self.loss_functions[loss_function]

        
        self.weights = self.default_weights_init(layers)
        self.biases = self.default_bias_init(layers)
        



    # gettting the weights from the normal distribution
    def default_weights_init(self, layers):
        weights = []
        for i in range(self.n_weights):
            weights.append(np.random.randn(layers[i], layers[i + 1]))
        return weights

    # setting all biases to zero
    def default_bias_init(self, layers):
        biases = []
        for i in layers[1:]:
            biases.append(np.random.randn(1, i))
        return biases


    # data set - array of tuples, where the first element is as input array and the second element is the desired output array
    # ratios are the ratios of train, validation and test data
    def load_data(self, X, y, ratios, categorization=False):
        if categorization:
            # one-hot encoding for the output layer - assumes indexing from 0 in the labels
            num_cols = np.max(y) + 1
            y = np.eye(num_cols)[y]
        # checking if the data matches the NN architecture - I/O
        #assert (len(X[0]) == self.layers[0]) and (len(y[0]) == self.layers[-1]), "NN structure doesn't match the data"
        assert (sum(ratios) != 1), "ratios should sum up to 1"
        idx1 = int(ratios[0] * len(X))
        idx2 = int(sum(ratios[:2]) * len(X))
        indices = np.arange(len(X))
        np.random.shuffle(indices)
        X_shuffled, y_shuffled = X[indices], y[indices]
        self.X_train = X_shuffled[:idx1]
        self.Y_train = y_shuffled[:idx1]
        self.X_valid = X_shuffled[idx1:idx2]
        self.Y_valid = y_shuffled[idx1:idx2]
        self.X_test = X_shuffled[idx2:]
        self.Y_test = y_shuffled[idx2:]

    
    def train_NN(self, n_epochs, batch_size, learning_rate):
        for epoch in range(n_epochs + 1):
            for _ in range (int(len(self.X_train / batch_size))):
                # creating a minibatch on the fly
                minibatch_indexes = np.random.choice(range(0, len(self.X_train)), batch_size, replace=False)
                minibatch_X = self.X_train[minibatch_indexes]
                minibatch_Y = self.Y_train[minibatch_indexes]
                self.update_minibatch(minibatch_X, minibatch_Y, learning_rate, batch_size)
            if epoch % 100 == 0:
                self.evaluate_categorization(epoch)

    # calculating gradients for a single minibatch + updating NN parameters
    def update_minibatch(self, minibatch_X, minibatch_Y, learning_rate, batch_size):
        nabla_weights = [np.zeros(w.shape) for w in self.weights]
        nabla_biases = [np.zeros(b.shape) for b in self.biases]
        for (X,Y) in zip(minibatch_X, minibatch_Y):
            # backpropagation algorithm
            delta_nabla_weights, delta_nabla_biases = self.backpropagation(X, Y)
            # updating the nablas - adding gradients
            nabla_weights = [nw + dnw for (nw, dnw) in zip(nabla_weights, delta_nabla_weights)]
            nabla_biases = [nb + dnb for (nb, dnb) in zip(nabla_biases, delta_nabla_biases)]
        # updating the NN parameters, averiging the gradients + multiplying by the learning rate
        self.weights = [w - (learning_rate / batch_size) * delta_w for (w, delta_w) in zip(self.weights, nabla_weights)]
        self.biases = [b - (learning_rate / batch_size) * delta_b for (b, delta_b) in zip(self.biases, nabla_biases)]


    # expects input as an 1 x n  numpy array (n = neurons in input layer)
    # only for evaluation, custom input to the network
    def forward_pass(self, input):
        for i in range(self.n_weights):
            z = input @ self.weights[i] + self.biases[i]
            activation = self.activation_function(z)
            input = activation
        return input

    # backpropagation algorithm
    def backpropagation(self, x, y):
        # matrices of gradients - all zeros
        nabla_weights = [np.zeros(w.shape) for w in self.weights]
        nabla_biases = [np.zeros(b.shape) for b in self.biases]
        # storing activations and z's for each layer
        # storing the input as the first activation
        activations = [x]
        weighted_inputs = []

        # feed forward + storing all information
        for (w, b) in zip(self.weights, self.biases):
            z = x @ w + b
            x = self.activation_function(z)
            # storing weighted input + activation for further use
            weighted_inputs.append(z)
            activations.append(x)

        # backward pass
        # error in the output layer
        delta = (self.loss_function(activations[-1], y, derivation=True) * self.activation_function(weighted_inputs[-1], derivation=True))
        # gradient of biases = error in the corresponding layer
        nabla_biases[-1] = delta
        # gradient of weights = error in the corresponding layer dotted/weighted with activations of the previous layer
        nabla_weights[-1] = np.dot(activations[-2].T.reshape(-1, 1), delta)
        # iterating layers second to last to the second (first one won't be updated)
        for l in range(self.n_layers - 2, 0, -1):
            current_z = weighted_inputs[l - 1]
            current_z_der = self.activation_function(current_z, derivation=True)
            #              1x2           2x4                      1x4
            delta = np.dot(delta, self.weights[l].T) * current_z_der
            nabla_biases[l - 1] = delta
            nabla_weights[l - 1] = np.dot(activations[l - 1].T.reshape(-1, 1), delta)
        
        return (nabla_weights, nabla_biases)


    # ------------------------------------- Evaluation Functions ------------------------------------- #

    def evaluate(self):
        for (X, y) in zip(self.X_valid, self.Y_valid):
            y_hat = self.forward_pass(X)
            error_validation = self.loss_function(y, y_hat)
        for (X, y) in zip(self.X_train, self.Y_train):
            y_hat = self.forward_pass(X)
            error_training = self.loss_function(y, y_hat)
        print(f"train error: {error_training / len(self.X_train)} | validation error: {error_validation / len(self.X_valid)}")

    def evaluate_categorization(self, epoch):
        y_hat = [np.argmax(self.forward_pass(X)) for X in self.X_valid] # NN assigned indexes
        correct_to_validation = np.sum(y_hat == np.argmax(self.Y_valid, axis=1)) / len(self.X_valid)
        y_hat = [np.argmax(self.forward_pass(X)) for X in self.X_train]
        correct_to_training = np.sum(y_hat == np.argmax(self.Y_train, axis=1)) / len(self.X_train)
        print(f"epoch: {epoch:5} | train error: {1 - correct_to_training:.3f} | validation error: {1 - correct_to_validation:.3f}")

    # ------------------------------------- Activation Functions ------------------------------------- #
    
    # ReLU - 0 if x < 0 else x
    def relu(self, input, derivation=False):
        if not derivation:
            return np.maximum(0, input)
        else:
            return np.where(input > 0, 1, 0)

    # Sigmoid - 1 / (1 + e^-x)
    def sigmoid(self, input, derivation=False):
        if not derivation:
            return 1 / (1 + np.exp(-input))
        else:
            return self.sigmoid(input)*(1-self.sigmoid(input))

    # Tanh - 1 - 2 / (e^2x + 1)
    def tanh(self, input, derivation=False):
        if not derivation:
            return 1 - 2 / (np.exp(2 * input) + 1)
        else:
            return 1 - self.tanh(input) ** 2
            
    
    # ------------------------------------- Loss Functions ------------------------------------- #

    # mean square error - classification loss function
    # Y - true values that SHOULD be predicted by the model
    # Y_hat - ACTUAL prediction by the model
    def mse(self, y, y_hat, derivation=False):
        if not derivation:
            return 0.5 * (y - y_hat) ** 2
        else:
            return (y - y_hat)


    # mean absolute error - classification loss function
    # Y - true values that SHOULD be predicted by the model
    # Y_hat - ACTUAL prediction by the model
    def mae(self, y, y_hat, derivation=False):
        if not derivation:
            return 0.5 * (y - y_hat) ** 2
        else:
            return (y - y_hat)


        

### Iris dataset
simple dataset for classifing iris species\
*Two of the three species were collected in the Gasp√© Peninsula "all from the same pasture, and picked on the same day and measured at the same time by the same person with the same apparatus".*

data = [sepal.length, sepal.width, petal.length, petal.width] all floats\
labels = {0 : Setosa, 1: Versicolour, 2: Virginica}

In [47]:
iris = load_iris()
X_iris, y_iris = iris.data, iris.target

In [48]:
iris_nn = NN([4, 4, 3], activation="tanh", loss_function="mse")

In [49]:
iris_nn.load_data(X_iris, y_iris, (0.7, 0.2, 0.1), categorization=True)

In [50]:
iris_nn.train_NN(300, 16, 0.1)

epoch:     0 | train error: 0.362 | validation error: 0.233
epoch:   100 | train error: 0.352 | validation error: 0.267
epoch:   200 | train error: 0.352 | validation error: 0.267
epoch:   300 | train error: 0.362 | validation error: 0.233


In [43]:
print(iris_nn.forward_pass([6. , 3. , 4.8, 1.8]))

[[0. 0. 0.]]


In [17]:
for x, y in zip(X_iris, y_iris):
    print(x, y, np.argmax(example.forward_pass(x)))

[5.1 3.5 1.4 0.2] 0 0
[4.9 3.  1.4 0.2] 0 0
[4.7 3.2 1.3 0.2] 0 0
[4.6 3.1 1.5 0.2] 0 0
[5.  3.6 1.4 0.2] 0 0
[5.4 3.9 1.7 0.4] 0 0
[4.6 3.4 1.4 0.3] 0 0
[5.  3.4 1.5 0.2] 0 0
[4.4 2.9 1.4 0.2] 0 0
[4.9 3.1 1.5 0.1] 0 0
[5.4 3.7 1.5 0.2] 0 0
[4.8 3.4 1.6 0.2] 0 0
[4.8 3.  1.4 0.1] 0 0
[4.3 3.  1.1 0.1] 0 0
[5.8 4.  1.2 0.2] 0 0
[5.7 4.4 1.5 0.4] 0 0
[5.4 3.9 1.3 0.4] 0 0
[5.1 3.5 1.4 0.3] 0 0
[5.7 3.8 1.7 0.3] 0 0
[5.1 3.8 1.5 0.3] 0 0
[5.4 3.4 1.7 0.2] 0 0
[5.1 3.7 1.5 0.4] 0 0
[4.6 3.6 1.  0.2] 0 0
[5.1 3.3 1.7 0.5] 0 0
[4.8 3.4 1.9 0.2] 0 0
[5.  3.  1.6 0.2] 0 0
[5.  3.4 1.6 0.4] 0 0
[5.2 3.5 1.5 0.2] 0 0
[5.2 3.4 1.4 0.2] 0 0
[4.7 3.2 1.6 0.2] 0 0
[4.8 3.1 1.6 0.2] 0 0
[5.4 3.4 1.5 0.4] 0 0
[5.2 4.1 1.5 0.1] 0 0
[5.5 4.2 1.4 0.2] 0 0
[4.9 3.1 1.5 0.2] 0 0
[5.  3.2 1.2 0.2] 0 0
[5.5 3.5 1.3 0.2] 0 0
[4.9 3.6 1.4 0.1] 0 0
[4.4 3.  1.3 0.2] 0 0
[5.1 3.4 1.5 0.2] 0 0
[5.  3.5 1.3 0.3] 0 0
[4.5 2.3 1.3 0.3] 0 0
[4.4 3.2 1.3 0.2] 0 0
[5.  3.5 1.6 0.6] 0 0
[5.1 3.8 1.9 0.4] 0 0
[4.8 3.  1