In [13]:
import numpy as np

class CategoricalCrossentropyLoss:
    def __init__(self):
        self.input = None
        self.target = None
        self.output = None

    def categorical_crossentropy(self, y_true, y_pred):
        epsilon = 1e-15
        y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
        return -np.sum(y_true * np.log(y_pred))

    def forward(self, y_pred, y_true):
        self.input = y_pred
        self.target = y_true
        self.output = self.categorical_crossentropy(y_true, y_pred)
        return self.output

    def backward(self):
        epsilon = 1e-15
        grad_input = -self.target / (self.input + epsilon)
        return grad_input

# Example usage:
categorical_crossentropy_loss = CategoricalCrossentropyLoss()

# Example true labels (one-hot encoded)
y_true = np.array([0, 1, 0])

# Example predicted probabilities from the model
y_pred = np.array([0.2, 0.7, 0.1])

# Forward pass
loss = categorical_crossentropy_loss.forward(y_pred, y_true)
print("Categorical Crossentropy Loss:", loss)

# Backward pass
grad_input = categorical_crossentropy_loss.backward()
print("Gradient with respect to input:", grad_input)

Categorical Crossentropy Loss: 0.35667494393873245
Gradient with respect to input: [ 0.         -1.42857143  0.        ]


In [20]:

# Cross-entropy loss
class Loss_CategoricalCrossentropy:

    # Forward pass
    def forward(self, y_pred, y_true):

        # Number of samples in a batch
        samples = len(y_pred)


        # Clip data to prevent division by 0
        # Clip both sides to not drag mean towards any value
        y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)

        # Probabilities for target values -
        # only if categorical labels
        if len(y_true.shape) == 1:
            correct_confidences = y_pred_clipped[
                range(samples),
                y_true
            ]

        # Mask values - only for one-hot encoded labels
        elif len(y_true.shape) == 2:
            correct_confidences = np.sum(
                y_pred_clipped * y_true,
                axis=1
            )

        # Losses
        negative_log_likelihoods = -np.log(correct_confidences)
        return negative_log_likelihoods

    # Backward pass
    def backward(self, y_pred, y_true):

        # Number of samples
        samples = len(y_pred)
        # Number of labels in every sample
        # We'll use the first sample to count them
        labels = len(y_pred[0])

        # If labels are sparse, turn them into one-hot vector
        if len(y_true.shape) == 1:
            y_true = np.eye(labels)[y_true]

        print(y_true)

        # Calculate gradient
        self.dinputs = -y_true / y_pred

        print(self.dinputs)
        # Normalize gradient
        self.dinputs = self.dinputs / samples

        return self.dinputs

# Example usage:
categorical_crossentropy_loss = Loss_CategoricalCrossentropy()

# Example true labels (one-hot encoded)
y_true = np.array([[0, 1, 0]])

# Example predicted probabilities from the model
y_pred = np.array([[0.2, 0.7, 0.1]])

# Forward pass
loss = categorical_crossentropy_loss.forward(y_pred, y_true)
print("Categorical Crossentropy Loss:", loss)

# Backward pass
grad_input = categorical_crossentropy_loss.backward(y_pred, y_true)
print("Gradient with respect to input:", grad_input)


Categorical Crossentropy Loss: [0.35667494]
[[0 1 0]]
[[ 0.         -1.42857143  0.        ]]
Gradient with respect to input: [[ 0.         -1.42857143  0.        ]]
