# 1 损失函数

## 1.1 MSE

In [1]:
import torch
from torch.nn import MSELoss #Class

In [2]:
yhat = torch.randn(size=(50,),dtype=torch.float32)
y = torch.randn(size=(50,),dtype=torch.float32)

In [3]:
criterion = MSELoss(reduction = "sum")
# MSEloss(reduction = "mean"/"sum") #MSE #SSE

In [4]:
loss = criterion(yhat,y)

In [6]:
loss

tensor(86.6319)

## 1.2 二分类交叉熵

### 1.2.1 手动实现

$L(w) = \sum_{i=1}^m(y_i*ln(\sigma_i) + (1-y_i)*ln(1-\sigma_i))$

In [7]:
m = 3*pow(10,3)

torch.random.manual_seed(420)
X = torch.rand((m,4),dtype=torch.float32)
w = torch.rand((4,1),dtype=torch.float32)
y = torch.randint(low=0,high=2,size=(m,1),dtype=torch.float32)
zhat = torch.mm(X,w)
sigma = torch.sigmoid(zhat)

In [8]:
loss  = -(1/m) * sum(y * torch.log(sigma) + (1-y) * torch.log(1-sigma))

In [9]:
loss

tensor([0.7962])

注意，在写损失函数这样的复杂函数时，除了普通的加减乘除以外的全部计算，都要使用torch中的函数，因为tensor的运算速度是远远超过普通Python代码，甚至是NumPy的。你可以试着比较在样本量为300W时，以下两行代码运行的时间差异

In [27]:
import time
import numpy as np

In [23]:
m = 3 * pow(10, 5)

In [41]:
X = torch.rand((m, 4), dtype=torch.float32)
w = torch.rand((4, 1), dtype=torch.float32)

In [43]:
y = torch.randint(low=0, high=2, size=(m,1), dtype=torch.float32)

In [44]:
y

tensor([[0.],
        [1.],
        [1.],
        ...,
        [1.],
        [1.],
        [0.]])

In [24]:
X = torch.rand((m,4),dtype=torch.float32)
w = torch.rand((4,1),dtype=torch.float32)
y = torch.randint(low=0,high=2,size=(m,1),dtype=torch.float32)
zhat = torch.mm(X,w)
sigma = torch.sigmoid(zhat)

python原生

In [25]:
start = time.time() #捕获现在的时间
loss1 = -(1/m)*sum(y*torch.log(sigma) + (1-y)*torch.log(1-sigma))
now = time.time() #以秒计
print(now - start)

1.0851020812988281


torch

In [38]:
start = time.time()
loss2 = -(1/m)*torch.sum(y*torch.log(sigma) + (1-y)*torch.log(1-sigma))
now = time.time() #以秒计
print(now - start)

0.001994609832763672


numpy

In [39]:
# 2种转成Numpy的写法
np_sigma = torch.sigmoid(zhat).numpy() 
np_y = np.array(y)

In [40]:
start = time.time()
loss3 = - (1/m) * np.sum(np_y*np.log(np_sigma) + (1-np_y)*np.log(1-np_sigma))
now = time.time()
print(now - start)

0.003989219665527344


### 1.2.2 交叉熵实现

有nn中的2个，还有functional库中的2个，推荐nn.BCEWithLogitsLoss

In [48]:
import torch.nn as nn

In [49]:
criterion = nn.BCELoss(reduction = "mean") # none sum mean
loss = criterion(sigma,y)
loss

tensor(0.7608)

In [51]:
# pytorch官方更加推荐，因为内置的sigmoid函数可以让精度问题被缩小
criterion2 = nn.BCEWithLogitsLoss(reduction = "mean")
loss = criterion2(zhat, y)
loss

tensor(0.7608)

In [52]:
from torch.nn import functional as F
#直接调用functional库中的计算函数
F.binary_cross_entropy_with_logits(zhat,y)

tensor(0.7608)

In [53]:
F.binary_cross_entropy(sigma,y)

tensor(0.7608)

## 1.3 多分类交叉熵

二分类单样本使用这样的表示：$P = (\hat y_i|x_i,w) = P_1^{y_i}*P_0^{1-y_i}$

多分类单样本使用这的表示：$P = (\hat y_i|x_i,w) = P_1^{y_{i(k=1)}}*P_2^{y_{i(k=2)}}*...*P_k^{y_{i(k=K)}}$

由于仅为真实类别时才会有结果，所以可以简化为：$P = (\hat y_i|x_i,w) = P_j^{y_{i(k=j)}}$

多样本下的损失函数是：$L(w) = - \sum_{i=1}^my_{i(k=j)}ln\sigma_i$  
$ln\sigma_i$是对softmax计算的结果取Ln  
$- \sum_{i=1}^my_{i(k=j)}$是负对数似然函数NLLLoss：Negative Log Likelihood function

In [54]:
import torch
import torch.nn as nn

In [55]:
m = 3 * pow(10, 3)

In [75]:
torch.random.manual_seed(420)
X = torch.rand((m, 4), dtype=torch.float32)
w = torch.rand((4, 3), dtype=torch.float32)
y = torch.randint(low=0, high=3, size=(m,), dtype=torch.float32)

In [76]:
z_hat = torch.mm(X, w)

In [77]:
logsm = nn.LogSoftmax(dim=1)
log_sigma = logsm(z_hat)
log_sigma

tensor([[-1.1139, -0.8802, -1.3585],
        [-1.0558, -0.8982, -1.4075],
        [-1.0920, -1.0626, -1.1430],
        ...,
        [-1.0519, -0.9180, -1.3805],
        [-1.0945, -1.1219, -1.0798],
        [-1.0276, -0.8891, -1.4649]])

In [78]:
y

tensor([2., 2., 2.,  ..., 2., 2., 2.])

In [79]:
criterion = nn.NLLLoss() #实例化
#由于交叉熵损失需要将标签转化为独热形式，因此不接受浮点数作为标签的输入
#对NLLLoss而言，需要输入logsigma
criterion(log_sigma, y.long())

tensor(1.1147)

用nn的更快

In [80]:
criterion = nn.CrossEntropyLoss()

In [81]:
criterion(z_hat, y.long())

tensor(1.1147)