# Neural Network from scratch using only numpy.

In [1]:
#Importing libraries.
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import classification_report
import torchvision.datasets as datasets
import torchvision.transforms as transforms

%matplotlib inline

SEED = 42 # set random seed to 42

Helper function to plot decision boundary.

In [2]:
def plot_decision_boundary(pred_func, x_min, x_max, y_min, y_max, cmap, ax):
    h = 0.01
    # Generate a grid of points with distance h between them
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    # Predict the function value for the whole gid
    Z = pred_func(np.c_[xx.flatten(), yy.flatten()])
    Z = Z.reshape(xx.shape)
    # Plot the contour
    ax.contourf(xx, yy, Z, cmap=cmap, alpha=0.5)

## Neural Network class:

In [None]:
class NeuralNetwork():
    @staticmethod
    def len_check(y_pred,y_true):
        prediction_dim = y_pred.shape
        true_dim = y_true.shape
        if (prediction_dim!=true_dim):
            print(f'length of y_pred is {prediction_dim} while length of y_true is {true_dim}, there is a length mismatch. Raising error!')
            raise RuntimeError
        else:
            return

    @staticmethod
    def cross_entropy_loss(y_pred, y_true):
        row_wiseSummed = np.sum(y_true*np.log(y_pred))
        loss = -np.mean(row_wiseSummed)
        return loss

    @staticmethod
    def mean_squared_loss(y_pred, y_true)->np.ndarray:
        #1/N * summation (y_hat - y)^2
        loss = np.mean((y_pred-y_true)**2)
        return loss

    @staticmethod
    def accuracy(y_pred, y_true)->np.ndarray:
        correct_preds = np.sum((y_pred==y_true),axis=1)
        total_preds = len(y_pred)
        accuracy = (correct_preds/total_preds)*100
        return accuracy

    @staticmethod
    def softmax(x)->np.ndarray:
        x = x-np.max(x,axis=1,keepdims=True)
        sum_ofExps = np.sum(np.exp(x),axis=1,keepdims=True)
        softmax_vals = np.exp(x)/sum_ofExps
        return softmax_vals

    @staticmethod
    def sigmoid(x)->np.ndarray:
        #clipping results for numerical stability
        x=np.clip(x,-500,500)
        activations = 1/(1+np.exp(-x))
        return activations
    
    @staticmethod
    def Tanh(x)->np.ndarray:
        #clipping results for numerical stability
        x = np.clip(x,-500,500)
        activations = (np.exp(x)-np.exp(-x))/np.exp(x)+np.exp(-x)
        return activations

    @staticmethod
    def ReLU(x)->np.ndarray:
        #clipping results for numerical stability
        activations = np.where(x>0,x,0)
        return activations

    def Leaky_ReLU(self, x)->np.ndarray:
        #clipping results for numerical stability
        activations = np.where(x>self.leaky_slope*x,x,self.leaky_slope*x)
        return activations

    def __init__(self, nodes_per_layer: list[int], mode: str = 'classification', optimizer: str = 'fullbatch', activation: str = 'sigmoid', batch_size: int = 64):
        '''Creates a Feed-Forward Neural Network.
        "nodes_per_layer" is a list containing the number of nodes in each layer (including the input layer) e.g. [2, 10, 10, 2] (2 features, 2 hidden layers with 10 neurons each, 2 classes)"
        '''

        if len(nodes_per_layer) < 2:
            raise ValueError('Network must have at least 2 layers (input and output).')
        if not (np.array(nodes_per_layer) > 0).all():
            raise ValueError('The number of nodes in all layers must be positive.')

        assert mode.lower() in ['classification', 'regression'], "Only classification and regression modes are supported"
        assert optimizer.lower() in ['sgd', 'minibatch', 'fullbatch'], 'Unknown Optimizer'
        assert activation.lower() in ['sigmoid', 'relu', 'leaky_relu','tanh'], 'Unknown activation function'

        self.num_layers = np.size(nodes_per_layer)
        self.mode = mode
        self.nodes_per_layer = nodes_per_layer
        self.input_shape = nodes_per_layer[0]
        self.output_shape = nodes_per_layer[-1]
        self.optimizer = optimizer
        self.activation_strat = activation
        self.batch_size = batch_size

        if activation == 'leaky_relu':
            self.leaky_slope = 0.05

        # Initialize all weights based on a standard normal distribution and all biases to 0.
        self.weights_ = []
        self.biases_ = []
        self.__init_weights(nodes_per_layer)


    def __init_weights(self, nodes_per_layer):
        '''Initializes all weights based on a standard normal distribution and all biases to 0.'''
        np.random.seed(SEED)
        for i, _ in enumerate(nodes_per_layer):
            if i == 0:
                # skip the input layer; it does not have weights/bias
                continue

            weight_matrix = np.random.normal(size=(nodes_per_layer[i-1], nodes_per_layer[i]))
            self.weights_.append(weight_matrix)
            bias_vector = np.zeros(shape=(nodes_per_layer[i],))
            self.biases_.append(bias_vector)




    def fit(self, Xs, Ys, X_val, Y_val, epochs, lr=1e-3):
        '''Trains the model on the given dataset for "epoch" number of itterations with step size="lr".
        Returns list containing loss for each epoch.'''
        self.history = []
        self.val_history = []
        self._run_optimizer(Xs,Ys, X_val, Y_val, epochs, lr)
        return self.history, self.val_history


    def _run_optimizer(self, Xs, Ys, X_val, Y_val, epochs, lr):
        ''' Executes the gradient descent algorithm '''
        # Add functionality for converting integer labels to one-hot labels (optional)

        if self.optimizer == 'fullbatch':
            for epoch in tqdm(range(epochs)):
                activations = self.forward_pass(Xs) # Forward Pass over whole dataset
                deltas = self.backward_pass(Ys, activations) # Backward pass takes layer activations and true labels as input

                layer_inputs = [Xs] + activations[:-1]
                self.weight_update(deltas, layer_inputs, lr) # Weight update

                preds = self.predict(Xs) # Make prediction on input sample

                current_loss = self.cross_entropy_loss(preds, Ys) if self.mode == 'classification' else self.mean_squared_loss(preds, Ys)
                self.history.append(current_loss)

                # Compute Validation Loss at the end of each epoch
                preds = self.predict(X_val)
                current_loss = self.cross_entropy_loss(preds, Y_val) if self.mode == 'classification' else self.mean_squared_loss(preds, Y_val)
                self.val_history.append(current_loss)

        elif self.optimizer == 'sgd':
             # Code Here
             pass


        elif self.optimizer == 'minibatch':
            # Code Here
            pass

    def forward_pass(self, input_data):
        '''Executes the feed forward algorithm.
        "input_data" is the input to the network in row-major form
        Returns "activations", which is a list of all layer outputs (excluding input layer of course)'''

        # Code here


        return activations

    def backward_pass(self, targets, layer_activations):
        '''Executes the backpropagation algorithm.
        "targets" is the ground truth/labels.
        "layer_activations" are the return value of the forward pass step.
        Returns "deltas", which is a list containing weight update values for all layers (excluding the input layer of course).'''

        # Code here

        return deltas

    def weight_update(self, deltas, layer_inputs, lr):
        '''Executes the gradient descent algorithm.
        "deltas" is return value of the backward pass step
        "layer_inputs" is a list containing the inputs for all layers (including the input layer)
        "lr" is the learning rate'''

        # Code here




    def predict(self, Xs):
        '''Returns the model predictions (output of the last layer) for the given "Xs".'''
        return self.forward_pass(Xs)[-1]

    def evaluate(self, Xs, Ys):
        '''Returns appropriate metrics for the task, calculated on the dataset passed to this method.'''
        pred = ...
        if self.mode == 'regression': # return MSE
            return ...
        elif self.mode == 'classification': # return CE_Loss and accuracy in that order
            return ...