# Step 6: Coding Loss Functions in Python

Loss functions provide a way to quantify how wrong our answer is. By minimizing this function, we can optimize the weights in our neural network.

The categorical cross-entropy loss function can be mathematically expressed as $-\sum{true * \log{(pred)}}$ for each element. In this way, we're only paying attention to the confidence score for the class that actually matters.

In [2]:
import numpy as np
softmax_outputs = np.array([
    [0.7, 0.1, 0.2],
    [0.1, 0.5, 0.4],
    [0.02, 0.9, 0.08]
])
class_targets = [0, 1, 1]
softmax_outputs[[0, 1, 2], class_targets]

array([0.7, 0.5, 0.9])

In [3]:
loss = -np.log(softmax_outputs[
    range(len(softmax_outputs)), class_targets])
average_loss = np.mean(loss)
average_loss

np.float64(0.38506088005216804)

For one-hot encoded class labels, we can just use element-wise multiplication to grab the useful inputs: 

![image](crossentropyloss.png)

In [4]:
y_true = np.array([
    [1, 0, 0],
    [0, 1, 0],
    [0, 1, 0]
])

y_pred = np.array([
    [0.7, 0.2, 0.1],
    [0.1, 0.5, 0.4],
    [0.02, 0.9, 0.08]
])

relevant_preds = np.sum(y_true * y_pred, axis=1)
loss = -np.log(relevant_preds)
average_loss = np.mean(loss)
average_loss

np.float64(0.38506088005216804)

Last thing to do is to clip the results of the loss function so that they are neither undefined nor zero, as that would mess with the gradients.

In [5]:
# general Loss class
class Loss:
    def calculate(self, output, y):
        sample_losses = self.forward(output, y)
        data_loss = np.mean(sample_losses)
        return data_loss

In [6]:
# class implementation of the cross-entropy loss
class CategoricalCrossEntropyLoss(Loss):
    def forward(self, y_pred, y_true):
        samples = len(y_pred)
        y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)
        
        # means the targets are just numbers
        if len(y_true.shape) == 1:
            correct_confidences = y_pred_clipped[
                range(samples),
                y_true
            ]
        
        elif len(y_true.shape) == 2:
            correct_confidences = np.sum(
                y_pred_clipped*y_true,
                axis=1
            )
        
        neg_logs = -np.log(correct_confidences)
        return neg_logs

In [8]:
# testing out the implementation
softmax_outputs = np.array([
    [0.7, 0.1, 0.2],
    [0.1, 0.5, 0.4],
    [0.02, 0.9, 0.08]
])

class_targets = np.array([
    [1, 0, 0],
    [0, 1, 0],
    [0, 1, 0]
])

loss_func = CategoricalCrossEntropyLoss()
loss = loss_func.calculate(softmax_outputs, class_targets)
loss

np.float64(0.38506088005216804)

Here's a forward pass with the loss function

In [11]:
class DenseLayer:
    def __init__(self, n_inputs, n_neurons):
        self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)
        self.biases = np.zeros((1, n_neurons))
    
    def forward(self, inputs):
        self.output = np.dot(inputs, self.weights) + self.biases

class ActivationReLU:
    def forward(self, inputs):
        self.output = np.maximum(0, inputs)

class ActivationSoftmax:
    def forward(self, inputs):
        exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
        row_sums = np.sum(exp_values, axis=1, keepdims=True)
        self.output = exp_values / row_sums

In [13]:
import nnfs
from nnfs.datasets import spiral_data
nnfs.init()

# initialize data
X, y = spiral_data(samples=100, classes=3)

# initialize layers
dense1 = DenseLayer(2, 3)
activation1 = ActivationReLU()
dense2 = DenseLayer(3, 3)
activation2 = ActivationSoftmax()
loss_func = CategoricalCrossEntropyLoss()

# forward pass
dense1.forward(X)
activation1.forward(dense1.output)
dense2.forward(activation1.output)
activation2.forward(dense2.output)
print(activation2.output[:5])

# calculate loss
loss = loss_func.calculate(activation2.output, y)
print(f'loss: {loss}')

[[0.33333334 0.33333334 0.33333334]
 [0.3333332  0.3333332  0.33333364]
 [0.3333329  0.33333293 0.3333342 ]
 [0.3333326  0.33333263 0.33333477]
 [0.33333233 0.3333324  0.33333528]]
loss: 1.0986104011535645


Introducing Accuracy

In [14]:
softmax_outputs = np.array([
    [0.7, 0.2, 0.1],
    [0.1, 0.5, 0.4],
    [0.02, 0.9, 0.08]
])
class_targets = np.array([0, 1, 1])

# calculate prediction values along second axis
predictions = np.argmax(softmax_outputs, axis=1)

# convert one-hot encoded targets if necessary
if len(class_targets.shape) == 2:
    class_targets = np.argmax(class_targets, axis=1)

accuracy = np.mean(predictions == class_targets)
accuracy

np.float64(1.0)

In [16]:
# calculating the accuracy of the forward pass
class_predictions = np.argmax(activation2.output, axis=1)

if len(y.shape) == 2:
    y = np.argmax(y, axis=1)

accuracy = np.mean(class_predictions == y)
accuracy

np.float64(0.34)