# NNFS - Final Code per Chapter

In [1]:
import numpy as np
import nnfs

# sets random seed to 0
# Sets dtype default to float32
# overrides np.dot()
#nnfs.init()

from nnfs.datasets import spiral_data
import math

## Chapter Two Final Code

In [2]:
inputs = [[1.0, 2.0, 3.0, 2.5],
          [2.0, 5.0, -1.0, 2.0],
          [-1.5, 2.7, 3.3, -0.8]]

weights = [[0.2, 0.8, -0.5, 1.0],
           [0.5, -0.91, 0.26, -0.5],
           [-0.26, -0.27, 0.17, 0.87]]

biases = [2.0, 3.0, 0.5]

outputs = np.dot(inputs, np.asarray(weights).T) + biases
outputs

array([[ 4.8  ,  1.21 ,  2.385],
       [ 8.9  , -1.81 ,  0.2  ],
       [ 1.41 ,  1.051,  0.026]])

## Chapter 3 Final Code

In [3]:
class Layer_Dense:
    
    def __init__(self, n_inputs, n_neurons):
        # Initialize Weights & Biases
        # set weights to be shape (n_inputs, n_neurons) so Matrix Product can be taken easily 
        # multiply by .01 to initialize non-zero weights small enough to minimize influence on training
        self.weights = .01 * np.random.randn(n_inputs, n_neurons)
        # one bias per neuron, initially set to zero
        self.biases = np.zeros((1, n_neurons))
    
    # Forward Pass
    def forward(self, inputs):
        # Calculate output values from inputs, weights and biases
        self.output = np.dot(inputs, self.weights) +  self.biases

## Chapter 4 Final Code

In [4]:
class Activation_ReLU:
    def forward(self, inputs):
        self.output = np.maximum(0, inputs)


In [5]:
class Activation_Softmax:
    
    # define forward pass
    def forward(self, inputs):
        # Subtract max to help prevent overflow errors (exploding values)
        exp_values = np.exp(inputs - np.max(inputs, axis=1,
                                        keepdims=True))
        # Normalize for each sample
        probabilities = exp_values / np.sum(exp_values, axis=1,
                                        keepdims=True)
        self.output = probabilities

## Chapter 5 Final Code

In [6]:
class Loss:
    
    # Calculate data & regularization loss given model output & ground truth vals
    def calculate(self, output, y):
        
        # Calculate sample losses
        sample_losses = self.forward(output, y)
        
        # Calculate mean loss
        data_loss = np.mean(sample_losses)
        
        return data_loss

In [13]:
class Loss_CategoricalCrossEntropy(Loss):
    
    # Forward Pass
    def forward(self, y_pred, y_true):
        
        # Number of samples in a batch
        samples = len(y_pred)
        
        # clip prediction 
        # min - to avoid taking the log of 0 and having a value of -inf
        # max - to avoid log of 1 being negative/shifting confidence toward 1
        y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)
        
        # If array is 1D, thus sparse (categorical)
        # Get predictions at indices indicated in y_true
        if len(y_true.shape) == 1:
            correct_confidences = y_pred_clipped[range(samples), y_true]
            
        # If array is 2D, thus one hot encoded
        # multiply prediction array by ground truth array & sum
        elif len(y_true.shape) == 2:
            correct_confidences = np.sum(y_pred_clipped * y_true, axis=1)

        # get the negative log of the predictions 
        neg_log_likelihoods = -np.log(correct_confidences)
    
        return neg_log_likelihoods

## Full Implementation

In [14]:
# Spiral Dataset
X, y = spiral_data(samples=100, classes=3)

# Initialize first hidden layer w/ 3 neurons
dense1 = Layer_Dense(2, 3)

# Initialize ReLU Activation Function
activation1 = Activation_ReLU()

# Initialize Second hidden layer w/ 3 neurons
dense2 = Layer_Dense(3, 3)

# Initialize Softmax Activation Function
activation2 = Activation_Softmax()

# Forward pass data through layer one
dense1.forward(X)

# Forward pass layer one output through ReLU Activation Function
activation1.forward(dense1.output)

# Forward pass output of ReLU through second dense layer
dense2.forward(activation1.output)

# Forward pass output of second dense layer through Softmax
activation2.forward(dense2.output)

activation2.output[:5]

array([[0.33333333, 0.33333333, 0.33333333],
       [0.3333333 , 0.33333316, 0.33333354],
       [0.33333319, 0.3333326 , 0.33333421],
       [0.33333323, 0.33333325, 0.33333352],
       [0.33333317, 0.33333247, 0.33333436]])

In [16]:
# Initialize loss function
loss_function = Loss_CategoricalCrossEntropy()

# Perform forward pass through loss function
loss = loss_function.calculate(activation2.output, y)
loss

1.0986178263287691

In [17]:
# Get Accuracy for output of softmax & targets
# Calculate values along first axis
predictions = np.argmax(activation2.output, axis=1)

# If y is 2D then convert values
if len(y.shape) == 2:
    y = np.argmax(y, axis=1)
accuracy = np.mean(predictions == y)
accuracy

0.30333333333333334

## Chapter 6 Final Code

In [None]:
# A fruitless attempt at optimization
# Helper variables
lowest_loss = 9999999 # arbitrary large initial value
best_dense1_weights = dense1.weights.copy()
best_dense1_biases = dense1.biases.copy()
best_dense2_weights = dense2.weights.copy()
best_dense2_biases = dense1.biases.copy()

for i in range(10000):
    # Generate a new set of weights for iteration
    dense1.weights = 0.05 * np.random.randn(2, 3)
    dense1.biases = 0.05 * np.random.randn(1, 3)
    dense2.weights = 0.05 * np.random.randn(3, 3)
    dense1.biases = 0.05 * np.random.randn(1, 3)
    
    # Forward pass of data through layers & activation functions
    dense1.forward(X)
    activation1.forward(dense1.output) #ReLU
    dense2.forward(activation1.output)
    activation2.forward(dense2.output) #softmax
    
    # get loss
    loss = loss_function.calculate(activation2.output, y)
    
    # Get Accuracy for output of softmax & targets
    # Calculate values along first axis
    predictions = np.argmax(activation2.output, axis=1)
    accuracy = np.mean(predictions == y)
    
    # if loss is less than current loss save values of weights/biases
    if loss < lowest_loss:
        print(f'New set of weights/biases found.\n Iteration: {i} loss: {loss} accuracy: {accuracy}')
        lowest_loss = loss
        best_dense1_weights = dense1.weights.copy()
        best_dense1_biases = dense1.biases.copy()
        best_dense2_weights = dense2.weights.copy()
        best_dense2_biases = dense1.biases.copy()
    else:
        dense1.weights = best_dense1_weights
        dense1.biases = best_dense1_biases
        dense2.weights = best_dense2_weights
        dense1.biases = best_dense2_biases