# Neural Network from Scratch

This notebook implements a neural network without using machine learning libraries.

This notebook applies code by Sylvain Gugger appearing 
in [A simple neural net in numpy](https://sgugger.github.io/a-simple-neural-net-in-numpy.html#a-simple-neural-net-in-numpy), 
posted on Tue 20 March 2018, on Iris Dataset.

In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

np.random.seed(42)

In [83]:
def onehot_encode(y):
    """
    One-hot encoding
    :param y:
    :param unique_elements:
    :return:
    """
    unique_elements = np.unique(y)
    nb_unique_elements = len(unique_elements)
    onehot_encoded_y = []
    for category in y:
        values = [0] * nb_unique_elements
        index = np.where(unique_elements == category)[0][0]
        values[index] = 1
        onehot_encoded_y.append(values)
    
    return np.array(onehot_encoded_y)

In [37]:
class StandardScaler:
    """Data standardization"""
    def fit(self, data):
        """
        Calculate the mean and standard deviation of each column.
        :param data:
        :return:
        """
        self.means = np.mean(data, axis=0)
        self.stds = np.std(data, axis=0)

    def transform(self, data):
        """
        Standardize data
        :param data:
        :return:
        """
        return (data - self.means) / self.stds

In [38]:
class Relu:
    """ReLU activation function"""
    def forward(self, x):
        self.old_x = np.copy(x)
        return np.clip(x, 0, None)

    def backward(self, grad):
        return np.where(self.old_x > 0, grad, 0)

In [39]:
class Sigmoid:
    """Sigmoid activation function"""
    def forward(self, x):
        self.old_y = np.exp(x) / (1. + np.exp(x))
        return self.old_y

    def backward(self, grad):
        return self.old_y * (1. - self.old_y) * grad

In [40]:
class Softmax:
    """Softmax activation function"""
    def forward(self, x):
        self.old_y = np.exp(x) / np.exp(x).sum(axis=1)[:, None]
        return self.old_y

    def backward(self, grad):
        return self.old_y * (grad - (grad * self.old_y).sum(axis=1)[:, None])

In [41]:
class CrossEntropy:
    """Cross Entropy cost"""
    def forward(self, x, y):
        self.old_x = x.clip(min=1e-8, max=None)
        self.old_y = y
        return (np.where(y == 1, -np.log(self.old_x), 0)).sum(axis=1)

    def backward(self):
        return np.where(self.old_y == 1, -1 / self.old_x, 0)

In [42]:
class Linear:
    """Linear Layer"""
    def __init__(self, nb_in: int, nb_out: int):
        self.nb_in = nb_in
        self.nb_out = nb_out
        self.weights = np.random.randn(nb_in, nb_out) * np.sqrt(2 / nb_in)
        self.biases = np.zeros(nb_out)

    def forward(self, x):
        self.old_x = x
        return np.dot(x, self.weights) + self.biases

    def backward(self, grad):
        self.grad_b = grad.mean(axis=0)
        self.grad_w = (np.matmul(self.old_x[:, :, None], grad[:, None, :])).mean(axis=0)
        return np.dot(grad, self.weights.transpose())

In [43]:
class Model:
    """Model which is the neural network"""
    def __init__(self, layers, cost):
        self.layers = layers
        self.cost = cost

    def forward(self, x):
        for layer in self.layers:
            x = layer.forward(x)
        return x

    def loss(self, x, y):
        return self.cost.forward(self.forward(x), y)

    def backward(self):
        grad = self.cost.backward()
        for i in range(len(self.layers) - 1, -1, -1):
            grad = self.layers[i].backward(grad)

    def summary(self):
        """Show model structure"""
        print("Sommaire du modèle :")
        nb_params = 0
        for layer in self.layers:
            if isinstance(layer, Linear):
                n = layer.nb_in * layer.nb_out + layer.nb_out
                print(f"Linear({layer.nb_in}, {layer.nb_out}) with {n} parameters.")
                nb_params += n
            else:
                print(f"{layer.__class__.__name__}()")
        print(f"Total of {nb_params} parameters.")

In [81]:
class NeuralNet:
    def __init__(self, nb_in: int, nb_out: int, nb_hn: int = 10, nb_hl: int = 2):
        """
        This method creates the neural network.
        :param nb_in: Number of input neurons.
        :param nb_out: Number of output neurons.
        :param nb_hn: Number of neurons in a hidden layer.
        :param nb_hl: Number of hidden layers.
        """
        self.nb_in = nb_in
        self.nb_out = nb_out
        self.nb_hn = nb_hn
        self.nb_hl = nb_hl

        layers = []
        for i in range(nb_hl):
            if i == 0:
                layers.append(Linear(nb_in, nb_hn))
            else:
                layers.append(Linear(nb_hn, nb_hn))
            layers.append(Relu())
        layers.append(Linear(nb_hn, nb_out))
        layers.append(Softmax())

        self.model = Model(layers, CrossEntropy())
        self.model.summary()
        self.scaler = None

    def train(self, train, train_labels, lr=1e-2, decay=0., nb_epoch=1000):
        """
        Train the model on the training set.
        :param train: 
        :param train_labels: 
        :param lr: Learning rate.
        :param decay:
        :param nb_epoch:
        """
        self.scaler = StandardScaler()
        self.scaler.fit(train)
        X_scaled = self.scaler.transform(train)
        print(f"X_scaled.shape={X_scaled.shape}")

        # One-hot encode Y.
        print(f"train_labels.shape={train_labels.shape}")
        onehot_encoded_y = onehot_encode(train_labels)
        print(f"onehot_encoded_y.shape={onehot_encoded_y.shape}")
        
        # Concatenate X and Y matrices by columns to create the data matrix.
        data = np.concatenate((X_scaled, onehot_encoded_y), axis=1)

        # Create mini-batches of size 2 from the data matrix.
        mini_batches = np.array_split(data, len(data) // 2)

        position = self.nb_out
        modulo = int(nb_epoch/10)

        for epoch in range(nb_epoch):
            running_loss = 0.
            num_inputs = 0
            for mini_batch in mini_batches:
                inputs, targets = mini_batch[:, :-position], mini_batch[:, -position:]
                num_inputs += inputs.shape[0]
                # Forward pass and calculation of the loss.
                running_loss += self.model.loss(inputs, targets).sum()
                # Back propagation.
                self.model.backward()
                # Learning rate decay.
                current_lr = lr / (1 + decay * epoch)

                # Update parameters.
                for layer in self.model.layers:
                    if type(layer) == Linear:
                        layer.weights -= current_lr * layer.grad_w
                        layer.biases -= current_lr * layer.grad_b

            if not epoch % modulo and False:
                print(f'Epoch {epoch + 1}/{nb_epoch}, lr={current_lr:.4f}, loss={running_loss / num_inputs:.4f}')

    def predict(self, x) -> int:
        """
        Predict the class of a given example.
        :param x: 
        :return: 
        """
        x_scaled = self.scaler.transform(x)
        output = self.model.forward(x_scaled)
        y_pred = int(np.argmax(output, axis=1))
        return y_pred

    def evaluate(self, X, y):
        """
        Evaluate the model on a set of examples without sklearn.
        :param X: 
        :param y: 
        :return: 
        """
        y_pred == self.predict_all(X)
        accuracy = sum(y_pred == y) / len(y)
        return accuracy

    def predict_all(self, X):
        """
        Compute predictions on the set of attributes.
        :param X:
        :return: 
        """
        X_scaled = self.scaler.transform(X)
        output = self.model.forward(X_scaled)
        y_pred = np.argmax(output, axis=1)
        return y_pred

In [90]:
def evaluate(clf, X, y, source):
    """
    Evaluate the model and display the accuracy, precision, recall, and F1 score metrics,
    without sklearn.

    So, this function performs the following steps:
    - Evaluate the model on the provided data (training, validation or test).
    - Calculate metrics (confusion matrix, accuracy, precision, recall, F1-score) for each class
      (label).
    - Calculate the averages of these metrics over all the classes.
    - Store metrics in a dictionary to later produce a summary table.

    :param clf: Classifier.
    :param X: Attributes of the examples.
    :param y: Actual example labels.
    :param source: Character string indicating the source "train" or "test".
    :return: Dictionary containing all calculated metrics.
    """
    class_name = type(clf).__name__
    print(f"Evaluate {class_name} on {source}")

    y_pred = clf.predict_all(X)
    scores = dict()

    for label in np.unique(y):
        # The confusion matrix looks like this:
        #          predicted
        #           (+)  (-)
        #          ---------
        #     (+) | TP | FN |
        # actual   ---------
        #     (-) | FP | TN |
        #          ---------
        confmat = np.array(
            [[sum([
                ((y[i] == label) == actual) and ((y_pred[i] == label) == predicted)
                for i in range(len(y))])
                for actual in [True, False]]
                for predicted in [True, False]])

        # Capture values TP, FN, FP, TN.
        tp = confmat[0, 0]
        fn = confmat[0, 1]
        fp = confmat[1, 0]
        tn = confmat[1, 1]

        # Calculate the requested metrics.
        # Accuracy = (TP + TN) / (TP + TN + FP + FN)
        # Precision = TP / (TP + FP)
        # Recall = TP / (TP + FN)
        # F1 score = 2 * (Precision * Recall) / (Precision + Recall)
        # If the denominator is zero, the metric is set to zero.
        accuracy = 0.0 if tp + tn + fp + fn == 0 else (tp + tn) / (tp + tn + fp + fn)
        precision = 0.0 if tp + fp == 0 else tp / (tp + fp)
        recall = 0.0 if tp + fn == 0 else tp / (tp + fn)
        f1 = 0.0 if precision + recall == 0 else 2 * (precision * recall) / (precision + recall)

        scores[label] = {'accuracy': accuracy, 'precision': precision, 'recall': recall, 'f1': f1}

        print(f"Métrics for class {label} :")
        print(f"- Accuracy={accuracy}, Precision={precision}, Recall={recall}, F1-score={f1}")
        print(f"- Confusion matrix = \n{np.array2string(confmat)}")

    # Calculate the averages of the metrics over all the classes.
    n = len(scores)
    mean_accuracy = sum([scores[label]['accuracy'] for label in scores]) / n
    mean_precision = sum([scores[label]['precision'] for label in scores]) / n
    mean_recall = sum([scores[label]['recall'] for label in scores]) / n
    mean_f1 = sum([scores[label]['f1'] for label in scores]) / n
    scores[-1] = {'accuracy': mean_accuracy, 'precision': mean_precision, 'recall': mean_recall, 'f1': mean_f1}

    print(f"Average metrics across all classes :")
    print(f"- Exactitude={mean_accuracy}, Précision={mean_precision}, "
          f"Rappel={mean_recall}, F1-score={mean_f1}")

    return scores

In [76]:
# load the iris dataset and get X and Y data
iris = load_iris()
X = pd.DataFrame(iris.data)
y = pd.DataFrame(iris.target)

In [77]:
# set aside 20% of train and test data for evaluation
X_train, X_test, y_train, y_test = train_test_split(X, y,
    test_size=0.2, shuffle = True, random_state = 123)
print("X_train shape: {}".format(X_train.shape))
print("X_test shape: {}".format(X_test.shape))
print("y_train shape: {}".format(y_train.shape))
print("y_test shape: {}".format(y_test.shape))

X_train = X_train.to_numpy()
X_test = X_test.to_numpy()
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()

X_train shape: (120, 4)
X_test shape: (30, 4)
y_train shape: (120, 1)
y_test shape: (30, 1)


In [86]:
nb_in = X_train.shape[1]  # Number of input neurons.
nb_out = len(np.unique(y_train))  # Number of output neurons.
nb_hn = 10  # Number of neurons per hidden layer.
nb_hl = 2  # Number of hidden layers.

print(f"nb_in={nb_in}, nb_out={nb_out}, nb_hn={nb_hn}, nb_hl={nb_hl}")

nb_in=4, nb_out=3, nb_hn=10, nb_hl=2


In [91]:
clf = NeuralNet(nb_in, nb_out, nb_hn, nb_hl)
clf.train(X_train, y_train)
scores = evaluate(clf, X_test, y_test, "test")

Sommaire du modèle :
Linear(4, 10) with 50 parameters.
Relu()
Linear(10, 10) with 110 parameters.
Relu()
Linear(10, 3) with 33 parameters.
Softmax()
Total de 193 paramètres.
X_scaled.shape=(120, 4)
train_labels.shape=(120, 1)
onehot_encoded_y.shape=(120, 3)
Evaluate NeuralNet on test
Métrics for class 0 :
- Accuracy=[1.], Precision=[1.], Recall=[1.], F1-score=[1.]
- Confusion matrix = 
[[[13]
  [ 0]]

 [[ 0]
  [17]]]
Métrics for class 1 :
- Accuracy=[0.93333333], Precision=[1.], Recall=[0.75], F1-score=[0.85714286]
- Confusion matrix = 
[[[ 6]
  [ 2]]

 [[ 0]
  [22]]]
Métrics for class 2 :
- Accuracy=[0.93333333], Precision=[0.81818182], Recall=[1.], F1-score=[0.9]
- Confusion matrix = 
[[[ 9]
  [ 0]]

 [[ 2]
  [19]]]
Average metrics across all classes :
- Exactitude=[0.95555556], Précision=[0.93939394], Rappel=[0.91666667], F1-score=[0.91904762]
