# Loss Funtions

### Categorical Cross-Entropy Loss

In [4]:
import math
import numpy as np

# An example output from the output layer of the neural network
softmax_outputs = [0.7, 0.1, 0.2]

# Ground Truth
class_targets = [1, 0, 0]

loss = -(
    math.log(softmax_outputs[0]) * class_targets[0]+
    math.log(softmax_outputs[1]) * class_targets[1]+
    math.log(softmax_outputs[2]) * class_targets[2]
)

print(loss)

0.35667494393873245


That’s the full categorical cross-entropy calculation, but we can make a few assumptions given
one-hot target vectors. First, what are the values for class_targets[1] and
class_targets[2] in this case? They’re both 0, and anything multiplied by 0 is 0. Thus, we
don’t need to calculate these indices. Next, what’s the value for class_targets[0] in this
case? It’s 1. So this can be omitted as any number multiplied by 1 remains the same.

In [5]:
loss = -(math.log(softmax_outputs[0]))
print(loss)

0.35667494393873245


We need to modify our output in two additional ways. First, we’ll update our process
to work on batches of softmax output distributions; and second, make the negative log calculation
dynamic to the target index (the target index has been hard-coded so far).

In [6]:
# Probabilities for 3 samples
softmax_outputs = np.array([
    [0.7, 0.1, 0.2],
    [0.1, 0.5, 0.4],
    [0.02, 0.9, 0.08]
])

class_targets = [0, 1, 1] # index

for targ_idx, distribution in zip(class_targets, softmax_outputs):
    print(distribution[targ_idx])


0.7
0.5
0.9


The zip() function, again, lets us iterate over multiple iterables at the same time in Python. This
can be further simplified using NumPy (we’re creating a NumPy array of the Softmax outputs this
time)

In [7]:
print(softmax_outputs[[0, 1, 2], class_targets])

[0.7 0.5 0.9]


In [8]:
print(softmax_outputs[range(len(softmax_outputs)), class_targets])

[0.7 0.5 0.9]


In [9]:
# Apply the negative log
neg_log = -np.log(softmax_outputs[range(len(softmax_outputs)), class_targets])

print(neg_log)

[0.35667494 0.69314718 0.10536052]


In [10]:
avg_loss = np.mean(neg_log)

print(avg_loss)

0.38506088005216804


#### Code for one-hot encoded ground truths

In [11]:
softmax_outputs = np.array([
    [0.7, 0.1, 0.2],
    [0.1, 0.5, 0.4],
    [0.02, 0.9, 0.08]
])

class_targets = np.array([
    [1, 0, 0],
    [0, 1, 0],
    [0, 1, 0]
])

# Probabilities for target values -
# only if categorical labels
if len(class_targets.shape) == 1:
    correct_confidences = softmax_outputs[
        range(len(softmax_outputs)),
        class_targets
    ]
# Mask values - only for one-hot encoded labels
elif len(class_targets.shape) == 2:
    correct_confidences = np.sum(
        softmax_outputs * class_targets,
        axis=1
    )

losses = -np.log(correct_confidences)

avg_loss = np.mean(neg_log)

print(avg_loss)

0.38506088005216804


## Loss Class

In [12]:
# Common loss class
class Loss:

    # Calculate the data and regularization losses given model output and ground truth values
    def claculate(self, output, y):
         # Calculate sample losses
         sample_losses = self.forward(output, y)

         # Calculate mean loss
         data_loss = np.mean(sample_losses)

         return data_loss

In [13]:
# Cross-entropy loss
class Loss_CategoricalCrossEntropy(Loss):

    def forward(self, y_pred, y_true):
        
        # Number of samples in a batch
        samples = len(y_pred)

        # Clip data to prevent division by 0
        # Clip both sides to not drag mean towards any value
        y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)

        # Probabilities for target values
        # Only if categorical labels
        if len(y_true.shape) == 1:
            correct_confidences = y_pred_clipped[
                range(samples),
                y_true
            ]
        
        # Mask values - only for one-hot encoded labels
        elif len(y_true.shape) == 2:
            correct_confidences = np.sum(
                y_pred_clipped * y_true,
                axis=1
            )

        # Losses
        negative_log_likelihood = -np.log(correct_confidences)
        return negative_log_likelihood

In [14]:
loss_function = Loss_CategoricalCrossEntropy()
loss = loss_function.claculate(softmax_outputs, class_targets)

print(loss)

0.38506088005216804


## Combining everything up to this point:

In [15]:
import numpy as np # type: ignore
from nnfs.datasets import spiral_data   # type: ignore
import nnfs # type: ignore

nnfs.init()

# Dense Layer
class Layer_Dense:

    # Layer initialization
    def __init__(self, n_inputs, n_neurons):
        # Initialize weights and biases
        self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)
        self.biases = np.zeros((1, n_neurons))

    # Forward Pass
    def forward(self, inputs):
        # Calculate output values from inputs, weights and biases
        self.output = np.dot(inputs, self.weights) + self.biases


class ReLU:
    
    def forward(self, inputs):
        self.output = np.maximum(0, inputs)

class Softmax:

    def forward(self, inputs):
        # Get unnormalized probabilities
        exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))

        # Normalize them for each sample
        probabilities = exp_values / np.sum(exp_values, axis=1, keepdims=True)

        self.output = probabilities

# Common loss class
class Loss:

    # Calculate the data and regularization losses given model output and ground truth values
    def claculate(self, output, y):
         # Calculate sample losses
         sample_losses = self.forward(output, y)

         # Calculate mean loss
         data_loss = np.mean(sample_losses)

         return data_loss

# Cross-entropy loss
class Loss_CategoricalCrossEntropy(Loss):

    def forward(self, y_pred, y_true):
        
        # Number of samples in a batch
        samples = len(y_pred)

        # Clip data to prevent division by 0
        # Clip both sides to not drag mean towards any value
        y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)

        # Probabilities for target values
        # Only if categorical labels
        if len(y_true.shape) == 1:
            correct_confidences = y_pred_clipped[
                range(samples),
                y_true
            ]
        
        # Mask values - only for one-hot encoded labels
        elif len(y_true.shape) == 2:
            correct_confidences = np.sum(
                y_pred_clipped * y_true,
                axis=1
            )

        # Losses
        negative_log_likelihood = -np.log(correct_confidences)
        return negative_log_likelihood
    
# Create dataset
X, y = spiral_data(samples = 100, classes = 3)

# Create Dense layer with 2 input features and 3 output values
dense1 = Layer_Dense(2, 3)

# Create ReLU activation (to be used with Dense layer)
activation1 = ReLU()

# Create second Dense layer with 3 input features (as we take output of previous layer here) and 3 output values
dense2 = Layer_Dense(3, 3)

# Create Softmax activation (to be used with Dense layer)
activation2 = Softmax()

# Create loss function
loss_function = Loss_CategoricalCrossEntropy()

# Make a forward pass of our training data through this layer
dense1.forward(X)

# Make a forward pass through activation function
# It takes the output of first dense layer
activation1.forward(dense1.output)

# Make a forward pass through second Dense layer
# It takes outputs of activation function of first layer as inputs
dense2.forward(activation1.output)

# Make a forward pass through activation function
# It takes in output of second dense layer
activation2.forward(dense2.output)

# Let's see output of the first few samples
print(activation2.output[0:5])

# Perform a forward pass through loss function
# It takes the output of second dense layer and returns loss
loss = loss_function.claculate(activation2.output, y)

# Print loss value
print("loss:", loss)

[[0.33333334 0.33333334 0.33333334]
 [0.3333332  0.3333332  0.33333364]
 [0.3333329  0.33333293 0.3333342 ]
 [0.3333326  0.33333263 0.33333477]
 [0.33333233 0.3333324  0.33333528]]
loss: 1.0986104


## Accuracy Calculation

In [16]:
import numpy as np
# Probabilities of 3 samples
softmax_outputs = np.array([
    [0.7, 0.2, 0.1],
    [0.5, 0.1, 0.4],
    [0.02, 0.9, 0.08]
])

# Target (ground-truth) labels for 3 samples
class_targets = np.array([0, 1, 1])

# Calculate values along second axis (axis of index 1)
predictions = np.argmax(softmax_outputs, axis=1)

# If targets are one-hot encoded - convert them
if len(class_targets.shape) == 2:
    class_targets = np.argmax(class_targets, axis=1)
    
# True evaluates to 1; False to 0
accuracy = np.mean(predictions==class_targets)

print('acc:', accuracy)

acc: 0.6666666666666666


We can add the following to the end of our full script above to calculate its accuracy:

In [17]:
predictions = np.argmax(activation2.output, axis=1)

if len(y.shape) == 2:
    y = np.argmax(y, axis=1)

accuracy = np.mean(predictions==y)

# Print accuracy
print('acc:', accuracy)

acc: 0.34
