In [32]:
# 演示各种loss函数
import torch
import torch.nn.functional as F
import torch.nn as nn

In [33]:
# softmax() function
preds = torch.tensor([[0.1, 0.2, 0.3, 0.4], [0.1, 0.1, 0.1, 0.1]])

exp = torch.exp(preds)
sum_ = torch.sum(exp, dim=1).reshape(-1, 1)
softmax = exp / sum_
print('手动计算softmax:\n', softmax)

softmax_ = F.softmax(preds, dim=1) # dim=1按行计算
print('函数计算softmax:\n', softmax_)

手动计算softmax:
 tensor([[0.2138, 0.2363, 0.2612, 0.2887],
        [0.2500, 0.2500, 0.2500, 0.2500]])
函数计算softmax:
 tensor([[0.2138, 0.2363, 0.2612, 0.2887],
        [0.2500, 0.2500, 0.2500, 0.2500]])


In [34]:
# log_softmax function
preds = torch.tensor([[0.1, 0.2, 0.3, 0.4], [0.1, 0.1, 0.1, 0.1]])

exp = torch.exp(preds)
sum_ = torch.sum(exp, dim=1).reshape(-1, 1)
softmax = exp / sum_
log_softmax = torch.log(softmax) 
print('手动计算log_softmax:\n', log_softmax)

softmax_ = F.log_softmax(preds, dim=1) # dim=1按行计算
print('函数计算log_softmax:\n', softmax_)

手动计算log_softmax:
 tensor([[-1.5425, -1.4425, -1.3425, -1.2425],
        [-1.3863, -1.3863, -1.3863, -1.3863]])
函数计算log_softmax:
 tensor([[-1.5425, -1.4425, -1.3425, -1.2425],
        [-1.3863, -1.3863, -1.3863, -1.3863]])


In [35]:
# NLL: Negative Log Likelihood 负对数似然
preds = torch.tensor([[0.1, 0.2, 0.3, 0.4], [0.1, 0.1, 0.1, 0.1]])
target = torch.tensor([2, 3])

exp = torch.exp(preds)
sum_ = torch.sum(exp, dim=1).reshape(-1, 1)
softmax = exp / sum_
log_softmax = torch.log(softmax) 

one_hot = F.one_hot(target).float() 
nllloss = -torch.sum(one_hot * log_softmax) / target.shape[0]
print('手动计算nll loss: ', nllloss)

Log_Softmax = F.log_softmax(preds, dim=1)  
Nllloss = F.nll_loss(Log_Softmax, target)  
print('函数计算nll loss: ', Nllloss)

手动计算nll loss:  tensor(1.3644)
函数计算nll loss:  tensor(1.3644)


In [36]:
# Cross Entropy Loss，就是NLL Loss
preds = torch.tensor([[0.1, 0.2, 0.3, 0.4], [0.1, 0.1, 0.1, 0.1]])
target = torch.tensor([2, 3])

one_hot = F.one_hot(target).float() # 对标签作 one_hot 编码
print('[1]one_hot编码target:\n', one_hot)
exp = torch.exp(preds)
print('[2]对网络预测preds求指数:\n', exp)
sum_ = torch.sum(exp, dim=1).reshape(-1, 1)  # 按行求和
softmax = exp / sum_  # 计算 softmax()
print('[3]softmax操作:\n', softmax)
log_softmax = torch.log(softmax) # 计算 log_softmax()
print('[4]softmax后取对数:\n', log_softmax)
nllloss = -torch.sum(one_hot * log_softmax) / target.shape[0]  # 标签乘以激活后的数据，求平均值，取反
print("[5]手动使用nllloss计算交叉熵:", nllloss)

print('----------------------------------------------')
# 调用 NLLLoss() 函数计算
Log_Softmax = F.log_softmax(preds, dim=1)  # log_softmax() 激活
Nllloss = F.nll_loss(Log_Softmax, target)  # 无需对标签作 one_hot 编码
print("函数使用Nll loss计算交叉熵:", Nllloss)
# 直接使用交叉熵损失函数 CrossEntropy_Loss()
cross_entropy = F.cross_entropy(preds, target)  # 无需对标签作 one_hot 编码
print('函数交叉熵cross_entropy:', cross_entropy)

[1]one_hot编码target:
 tensor([[0., 0., 1., 0.],
        [0., 0., 0., 1.]])
[2]对网络预测preds求指数:
 tensor([[1.1052, 1.2214, 1.3499, 1.4918],
        [1.1052, 1.1052, 1.1052, 1.1052]])
[3]softmax操作:
 tensor([[0.2138, 0.2363, 0.2612, 0.2887],
        [0.2500, 0.2500, 0.2500, 0.2500]])
[4]softmax后取对数:
 tensor([[-1.5425, -1.4425, -1.3425, -1.2425],
        [-1.3863, -1.3863, -1.3863, -1.3863]])
[5]手动使用nllloss计算交叉熵: tensor(1.3644)
----------------------------------------------
函数使用Nll loss计算交叉熵: tensor(1.3644)
函数交叉熵cross_entropy: tensor(1.3644)


In [37]:
# Label Smoothing：在交叉熵损失CrossEntropy_Loss中，非标签对应位置的预测信息是没有被使用的，
# 而Label Smoothing使用了这种信息
def linear_combination(x, y, epsilon):
    return epsilon * x + (1 - epsilon) * y

def reduce_loss(loss, reduction='mean'):
    return loss.mean() if reduction == 'mean' else loss.sum() if reduction == 'sum' else loss

class LabelSmoothing_CrossEntropy(nn.Module):
    def __init__(self, epsilon: float = 0.1, reduction='mean'):
        super().__init__()
        self.epsilon = epsilon
        self.reduction = reduction

    def forward(self, preds, target):
        n = preds.size()[-1]    
        log_preds = F.log_softmax(preds, dim=-1)    
        print(log_preds)
        # 每一个类别的平均, 即考虑了除指定target之外的其他类别
        # -(-1.5425 + -1.4425 + -1.3425 + -1.2425)/4
        loss = reduce_loss(-log_preds.sum(dim=-1), self.reduction) # dim: -1 代表按照最里面的维度，也就是4个列（四个类别）相加
        nll = F.nll_loss(log_preds, target, reduction=self.reduction)   
        return linear_combination(loss / n, nll, self.epsilon)  # n 样本数

preds = torch.tensor([[0.1, 0.2, 0.3, 0.4], [0.1, 0.1, 0.1, 0.1]])
target = torch.tensor([2, 3])

ls = LabelSmoothing_CrossEntropy()
lsloss = ls(preds, target)
print('Label smoothing损失:', lsloss)

tensor([[-1.5425, -1.4425, -1.3425, -1.2425],
        [-1.3863, -1.3863, -1.3863, -1.3863]])
Label smoothing损失: tensor(1.3669)
