# Neural Nets from Scratch

This notebook contains material on how to implement feed forward neural nets from scratch, following the book named as such: https://nnfs.io/


In [1]:
import nnfs
from nnfs.datasets import spiral_data
import numpy as np
import matplotlib.pyplot as plt
nnfs.init()

# Layers

When data forward-propogates through the network, it passes through successive `layers`. In  **`Dense layers`**, each neuron in a layer receives the **weighted sum** of the inputs form the previous layer.

In [2]:
class DenseLayer:
    
    def __init__(self, n_inputs, n_neurons):
        
        # initialize random weights. The shape is (input size, # of desired neurons)
        self.weights = 0.01 * np.random.randn(n_inputs, n_neurons) # scale the weights down for faster training
        
        # initialize biases as zero vectors
        self.biases = np.zeros((1, n_neurons))
         
    def forward(self, inputs):
        # weighted sum
        self.output = np.dot(inputs, self.weights) + self.biases

# Activation functions


In [None]:
class ReLU:
    """Some notes about ReLU:
    1. ReLU is not normalized, meaning values can range from [0,infinity]
    2. ReLU outputs are completely independent of each other (exclusive)
    3. Because of the two reasons above, ReLU cannot be used in the final layer for predicting probabilities (classification)
    4. np.maximum takes the element wise max between two arrays
    """
    
    def forward(self, inputs):
        
        # any negative values are turned into 0
        self.output = np.maximum(0, inputs)
        
class Softmax:
    """Some notes about softmax:
    1. Softmax returns a probability distribution (all the floats add up to 1)
    2. Each probability score also represents a confidence score (i.e., [.45, .55] means the model has low confidence)
    3. Softmax is almost exclusively used in the output layer
    """
    
    def forward(self, inputs):
        
        """More notes:
        1. axis=1 specifies that we should only operate across rows, not columns
        
        2. keepdims=True makes it so the output array has the same dimensions as the input
        
        3. we subtract the largest of the inputs to prevent "dead neurons" and exploding values.
            - Dead neurons = when neurons start always outputting a specific value and thus have a zero gradient
            - exploding values = when values start getting exponentially large
            
        4. performing this subtraction scales the values to a range [-1,0]
        
        """
        
        # get unnormalized probabilities
        exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
        
        # normalize them for each sample
        probs = exp_values / np.sum(exp_values, axis=1, keepdims=True)
        
        self.output = probs

# Loss functions


In [3]:
class Loss:
    
    def calculate(self, output, y):
        
        # calculate the sample losses
        sample_losses = self.forward(output,y)
        
        # calculate the mean loss
        data_loss = np.mean(sample_losses)
        
        return data_loss
    
class CategoricalCrossEntropy(Loss):
    
    def forward(self, y_pred, y_true):
        
        # number of samples in a batch
        samples = len(y_pred)
        
        # clip data to prevent division by zero
        # clip both sides to not drag mean towards any value\
        y_pred_clipped = np.clip(y_pred, 1e-7, 1-1e-7)
        
        
        # probabilities for target values:
        
        # only if categorical labels (1D one-hot vectors)
        if len(y_true.shape) == 1:
            correct_confidences = y_pred_clipped[range(samples), y_true]
        
        # only for one-hot matrices
        elif len(y_true.shape) == 2:
            correct_confidences = np.sum(y_pred_clipped * y_true, axis=1)
            
            
        # losses
        negative_log_likelihoods = -np.log(correct_confidences)
        return negative_log_likelihoods

# Code execution

The following cells are for executing the neural network code

In [None]:
X,y = spiral_data(samples=100, classes=3)

dense1 = DenseLayer(2,3)
relu = ReLU()

# create second Dense layer as the output layer with 3 input features (output of previous layer) and 3 output values
dense2 = DenseLayer(3,3)
softmax = Softmax()

# instantiate our loss function
cce_loss = CategoricalCrossEntropy()


# training data forward pass through Dense layer and forward pass through ReLU
dense1.forward(X)
relu.forward(dense1.output)

# make a forward pass through the second Dense layer
# takes the output of previous layer as input
dense2.forward(relu.output)
softmax.forward(dense2.output)

print(softmax.output[:5])


loss = cce_loss.calculate(softmax.output, y)

print(f"Loss: {loss}")