# CROSSENTROPY Loss
<img src = "./figs/pytorch_ce_loss.png">

ref : https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html

In [1]:
import torch
torch.manual_seed(2023)

C = 4 # Assumed 4 class classification
batch = 9 # with 9 data

input_x = torch.rand(batch*C) # Unnormalized Logits, not " Probability " !
input_x = input_x.view(batch, C)
target_y = torch.randint(low=0,high=C,size=(batch,), dtype=torch.long) # Target ( dtype=torch.long when used for CE loss )

print(f"input x (logit)\n{input_x}")
print(f"\ntarget y\n{target_y}")

input x (logit)
tensor([[0.4290, 0.7201, 0.9481, 0.4797],
        [0.5414, 0.9906, 0.4086, 0.2183],
        [0.1834, 0.2852, 0.7813, 0.1048],
        [0.6550, 0.8375, 0.1823, 0.5239],
        [0.2432, 0.9644, 0.5034, 0.0320],
        [0.8316, 0.3807, 0.3539, 0.2114],
        [0.9839, 0.6632, 0.7001, 0.0155],
        [0.3840, 0.7968, 0.4917, 0.4324],
        [0.5174, 0.6913, 0.1628, 0.5692]])

target y
tensor([1, 0, 1, 2, 3, 1, 0, 1, 1])


In [2]:
# Convert Unnormalized logit "input_x" to Probability
input_x_prob = torch.softmax(input_x, axis=1)
print(f"input x (prob)\n{input_x_prob}")

input x (prob)
tensor([[0.1972, 0.2639, 0.3315, 0.2075],
        [0.2400, 0.3761, 0.2102, 0.1737],
        [0.2062, 0.2283, 0.3749, 0.1906],
        [0.2702, 0.3243, 0.1684, 0.2370],
        [0.1936, 0.3983, 0.2512, 0.1568],
        [0.3578, 0.2279, 0.2219, 0.1924],
        [0.3499, 0.2539, 0.2634, 0.1328],
        [0.2139, 0.3233, 0.2383, 0.2245],
        [0.2535, 0.3017, 0.1778, 0.2670]])


### Manual CE Loss ( for-loop )

In [3]:
loss = 0
for idx in range(len(input_x_prob)):
    true_class = target_y[idx]
    loss += -torch.log(input_x_prob[idx][true_class]+1e-12) 

print(f"Total Loss : {loss / len(input_x)}" ) # Assumed CELoss(reduction='mean')
print(f"Simplified : {loss / len(input_x):.4f}" )

Total Loss : 1.4141454696655273
Simplified : 1.4141


### Compare with torch CE Loss

In [4]:
torch_ce = torch.nn.CrossEntropyLoss()
print(f'Pytroch Implemented CE :{torch_ce(input_x, target_y).item()}')
print(f'Simplified :{torch_ce(input_x, target_y).item():.4f}')

Pytroch Implemented CE :1.414145588874817
Simplified :1.4141


# BCE Loss
<img src = "./figs/pytorch_bce_loss.png">

ref : https://pytorch.org/docs/stable/generated/torch.nn.BCELoss.html

In [5]:
import torch
torch.manual_seed(2023)

batch = 6
input_x = torch.rand(batch) # Input "Probabilities"
target_y = torch.randint(low=0,high=2,size=(batch,), dtype=torch.float) # Target ( dtype=torch.float when used for CE loss )

print(f"input x\n{input_x}")
print(f"\ntarget y\n{target_y}")

input x
tensor([0.4290, 0.7201, 0.9481, 0.4797, 0.5414, 0.9906])

target y
tensor([1., 0., 0., 0., 1., 0.])


### Manual BCE Loss ( for-loop )

In [6]:
loss = 0
for idx in range(len(input_x)):
    loss += -target_y[idx] * torch.log(input_x[idx]+1e-12) # y=1 cases
    loss += -(1-target_y[idx]) * torch.log(1-input_x[idx]+1e-12) # y=0 cases

print(f"Total Loss : {loss / len(input_x)}" ) # Assumed BCELoss(reduction='mean')
print(f"Simplified : {loss / len(input_x):.4f}" )

Total Loss : 1.8351644277572632
Simplified : 1.8352


### Manual BCE Loss ( One-line )

In [7]:
y_equals_1 = -target_y * torch.log(input_x + 1e-12)
y_equals_0 = -(1-target_y) * torch.log(1-input_x + 1e-12)

print(f"Loss for the 'y = 1' cases : {y_equals_1}")
print(f"Loss for the 'y = 0' cases : {y_equals_0}")
print(f"Total Loss : {torch.mean(y_equals_1 + y_equals_0)}") # Used torch.mean assuming BCELoss(reduction='mean')
print(f"Simplified : {torch.mean(y_equals_1 + y_equals_0):.4f}")

Loss for the 'y = 1' cases : tensor([0.8463, 0.0000, 0.0000, 0.0000, 0.6136, 0.0000])
Loss for the 'y = 0' cases : tensor([0.0000, 1.2732, 2.9593, 0.6533, 0.0000, 4.6653])
Total Loss : 1.8351644277572632
Simplified : 1.8352


### Compare with torch BCE Loss

In [8]:
torch_bce = torch.nn.BCELoss()
print(f'Pytroch Implemented BCE :{torch_bce(input_x, target_y).item()}')
print(f'Simplified :{torch_bce(input_x, target_y).item():.4f}')

Pytroch Implemented BCE :1.8351644277572632
Simplified :1.8352


# Label Smoothing
<img src = "./figs/label_smoothing.png">

ref : https://towardsdatascience.com/label-smoothing-make-your-model-less-over-confident-b12ea6f81a9a

In [9]:
import torch
import torch.nn as nn

class BCELossWithLabelSmoothing(nn.Module):
    def __init__(self, alpha, reduction='mean'):
        super(BCELossWithLabelSmoothing, self).__init__()
        self.alpha = alpha
        self.reduction = reduction

    def forward(self, output, target):
        N = target.max().item()+1
        target = target * (1-self.alpha) + self.alpha / N

        if(1): # one-line
            correct = -target * torch.log(output + 1e-12)
            wrong = -(1-target)*torch.log(1-output + 1e-12)
            loss = (correct + wrong).sum() 
            if(self.reduction == 'mean'):
                loss /= len(output)

        else: # for-loop
            loss = 0
            for idx, pred in enumerate(output):
                correct = -(target[idx]) * torch.log(pred+1e-12)
                wrong = -(1-target[idx]) * torch.log(1-pred+1e-12)
                loss += correct
                loss += wrong
            if(self.reduction == 'mean'):
                loss /= len(output)

        return loss

torch_bce = torch.nn.BCELoss()
print(f'Torch BCELoss {torch_bce(input_x, target_y):.4f}')

alphas = torch.arange(11)/10
for alpha in alphas:
    bce_LS = BCELossWithLabelSmoothing(alpha)
    print(f'Implemented BCELoss with Label smoothing (alpha = {alpha:.1f}) : {bce_LS(input_x, target_y):.4f}')

Torch BCELoss 1.8352
Implemented BCELoss with Label smoothing (alpha = 0.0) : 1.8352
Implemented BCELoss with Label smoothing (alpha = 0.1) : 1.7640
Implemented BCELoss with Label smoothing (alpha = 0.2) : 1.6927
Implemented BCELoss with Label smoothing (alpha = 0.3) : 1.6215
Implemented BCELoss with Label smoothing (alpha = 0.4) : 1.5503
Implemented BCELoss with Label smoothing (alpha = 0.5) : 1.4791
Implemented BCELoss with Label smoothing (alpha = 0.6) : 1.4079
Implemented BCELoss with Label smoothing (alpha = 0.7) : 1.3367
Implemented BCELoss with Label smoothing (alpha = 0.8) : 1.2655
Implemented BCELoss with Label smoothing (alpha = 0.9) : 1.1943
Implemented BCELoss with Label smoothing (alpha = 1.0) : 1.1231


Check how the `Label smoothing` changes the target 

In [10]:
for alpha in alphas:
    N = target_y.max().item()+1
    target_smoo = target_y * (1-alpha) + alpha / N
    print(f"alpha : {alpha.item():.1f} | target : {target_smoo}")

alpha : 0.0 | target : tensor([1., 0., 0., 0., 1., 0.])
alpha : 0.1 | target : tensor([0.9500, 0.0500, 0.0500, 0.0500, 0.9500, 0.0500])
alpha : 0.2 | target : tensor([0.9000, 0.1000, 0.1000, 0.1000, 0.9000, 0.1000])
alpha : 0.3 | target : tensor([0.8500, 0.1500, 0.1500, 0.1500, 0.8500, 0.1500])
alpha : 0.4 | target : tensor([0.8000, 0.2000, 0.2000, 0.2000, 0.8000, 0.2000])
alpha : 0.5 | target : tensor([0.7500, 0.2500, 0.2500, 0.2500, 0.7500, 0.2500])
alpha : 0.6 | target : tensor([0.7000, 0.3000, 0.3000, 0.3000, 0.7000, 0.3000])
alpha : 0.7 | target : tensor([0.6500, 0.3500, 0.3500, 0.3500, 0.6500, 0.3500])
alpha : 0.8 | target : tensor([0.6000, 0.4000, 0.4000, 0.4000, 0.6000, 0.4000])
alpha : 0.9 | target : tensor([0.5500, 0.4500, 0.4500, 0.4500, 0.5500, 0.4500])
alpha : 1.0 | target : tensor([0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000])
