# Calculating Network Error with Loss

## Categorical Cross-Entropy Loss

In [1]:
import math

<img src='./assets/cross-functions.png' width=700 />

In [2]:
softmax_output = [0.7, 0.1, 0.2]

In [3]:
# Ground truth
target_output = [1, 0, 0]

In [4]:
loss = - (
    math.log(softmax_output[0]) * target_output[0] + math.log(softmax_output[1]) * target_output[1] + math.log(softmax_output[2]) * target_output[2]
)

In [5]:
loss

0.35667494393873245

In [6]:
import numpy as np

In [7]:
b = 5.2
np.log(b)

np.float64(1.6486586255873816)

In [8]:
math.e ** np.log(b)

np.float64(5.199999999999999)

In [9]:
# Probabilities for 3 samples
softmax_outputs = np.array([[0.7, 0.1, 0.2],
[0.1, 0.5, 0.4],
[0.02, 0.9, 0.08]])

In [10]:
class_targets = [0, 1, 1] # dog, cat, cat

In [11]:
tuple(zip(class_targets, softmax_outputs))

((0, array([0.7, 0.1, 0.2])),
 (1, array([0.1, 0.5, 0.4])),
 (1, array([0.02, 0.9 , 0.08])))

In [14]:
for targ_idx, distribution in zip(class_targets, softmax_outputs):
    print(f'value targ_idx {targ_idx}')
    print(f'value distribution {distribution}')
    print(distribution[targ_idx])

value targ_idx 0
value distribution [0.7 0.1 0.2]
0.7
value targ_idx 1
value distribution [0.1 0.5 0.4]
0.5
value targ_idx 1
value distribution [0.02 0.9  0.08]
0.9


In [15]:
softmax_outputs[[0, 1, 2], class_targets]

array([0.7, 0.5, 0.9])

In [18]:
softmax_outputs = np.array([[0.7, 0.1, 0.2],
[0.1, 0.5, 0.4],
[0.02, 0.9, 0.08]])

In [19]:
range(len(softmax_outputs))

range(0, 3)

In [20]:
softmax_outputs[range(len(softmax_outputs)), class_targets]

array([0.7, 0.5, 0.9])

In [21]:
-np.log(softmax_outputs[range(len(softmax_outputs)), class_targets])

array([0.35667494, 0.69314718, 0.10536052])

In [22]:
neg_log = -np.log(softmax_outputs[range(len(softmax_outputs)), class_targets])
average_loss = np.mean(neg_log)
print(average_loss)

0.38506088005216804


In [23]:
softmax_outputs = np.array([[0.7, 0.1, 0.2],
                            [0.1, 0.5, 0.4],
                            [0.02, 0.9, 0.08]])
class_targets = np.array([[1, 0, 0],
                            [0, 1, 0],
                            [0, 1, 0]])

In [29]:
len(class_targets.shape)

2

In [36]:
softmax_outputs[range(len(softmax_outputs)),class_targets]

array([[0.1 , 0.1 , 0.02],
       [0.7 , 0.5 , 0.02],
       [0.7 , 0.5 , 0.02]])

In [30]:
# Probabilities for target values -
# only if categorical labels
if len(class_targets.shape) == 1:
    correct_confidences = softmax_outputs[range(len(softmax_outputs)),class_targets]
    print(correct_confidences)
# Mask values - only for one-hot encoded labels
elif len(class_targets.shape) == 2:
    correct_confidences = np.sum(softmax_outputs * class_targets,axis=1)
    print(correct_confidences)

[0.7 0.5 0.9]


<img src='./assets/explain_loss_1.png' />

<img src='./assets/explain_loss_2.png' />

<img src='./assets/exlpain_loss_3.png' />

In [32]:
# Losses 
neg_log = -np.log(correct_confidences)
neg_log

array([0.35667494, 0.69314718, 0.10536052])

In [33]:
average_loss = np.mean(neg_log)
average_loss

np.float64(0.38506088005216804)

In [34]:
np.e**(-np.inf)

0.0

In [35]:
-np.log(0)

  -np.log(0)


np.float64(inf)

We read it as the limit of a natural logarithm of x, with x approaching 0 from a positive (it is impossible to calculate the natural logarithm of a negative value) equals negative infinity.

<img src='./assets/error_loss_log.png' />

This will prevent loss from being exactly 0, making it a very small value instead, but won’t make
it a negative value and won’t bias overall loss towards 1. Within our code and using numpy, we’ll
accomplish that using np.clip() method:

https://www.geeksforgeeks.org/numpy-clip-in-python/

y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)

## The Categorical Cross-Entropy Loss Class

# Common loss class

In [37]:
class Loss: # parent class
    # Calculates the data and regularization losses
    # Given model output and ground truth values (y)

    def calculate(self, output,y):

        # Calculate sample losses
        sample_losses = self.forward(output,y)

        # Calculate mean loss
        data_loss = np.mean(sample_losses)

        return data_loss

# Cross-entropy loss

In [40]:
class Loss_CategoricalCrossentropy(Loss):
    
    # forward pass
    # y_pred is softmax output
    # y_true is ground true
    def forward(self, y_pred,y_true):

        # Number of sample in a batch
        samples = len(y_pred)

        # Clip data to prevent divsion by 0
        # Clip both sides to not drag mean toward any values
        y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)
        # Probabilities for target values -
        # only if categorical labels
        if len(y_true.shape) == 1:
            correct_confidences = y_pred_clipped[range(samples),y_true]
        # Mask values - only for one-hot encoded labels
        elif len(y_true.shape) == 2:
            correct_confidences = np.sum(y_pred_clipped * y_true,axis=1)
        
        # Losses
        negative_log_likelihoods = -np.log(correct_confidences)
        return negative_log_likelihoods

In [39]:
loss_function = Loss_CategoricalCrossentropy()
loss = loss_function.calculate(softmax_outputs, class_targets)
print(loss)

0.38506088005216804


## Accuracy Calculation

In [41]:
# Probabilities of 3 samples
softmax_outputs = np.array([[0.7, 0.2, 0.1],
[0.5, 0.1, 0.4],
[0.02, 0.9, 0.08]])
# Target (ground-truth) labels for 3 samples
class_targets = np.array([0, 1, 1])

https://www.geeksforgeeks.org/numpy-argmax-python/

Return indices max elements in arrays

In [None]:
# Calculate values along second axis ( axis of index 1 )
predictions = np.argmax(softmax_outputs , axis=1) 
predictions

array([0, 0, 1])

In [44]:
# If targets are one-hot encoded - convert them
if len(class_targets.shape) == 2:
    class_targets = np.argmax(class_targets, axis=1)

# True evaluates to 1 and False to 0
accuracy = np.mean(class_targets == predictions)

accuracy

np.float64(0.6666666666666666)