In [1]:
import numpy as np
import torch
import torch.nn as nn

torch.__version__

'1.1.0'

## L1Loss

In [2]:
x = torch.randn(2, 3)
y = torch.randn(2, 3)

In [3]:
x

tensor([[-0.3346,  0.6671, -0.1157],
        [ 1.4908, -2.3456,  0.4362]])

In [4]:
y

tensor([[ 0.6505,  1.0393, -0.0190],
        [ 0.2215,  1.3490, -0.9015]])

In [5]:
nn.L1Loss()(x, y)

tensor(1.2925)

In [6]:
nn.L1Loss(reduce=False)(x, y)



tensor([[0.9851, 0.3721, 0.0966],
        [1.2693, 3.6946, 1.3377]])

In [7]:
abs(x.numpy() - y.numpy())

array([[0.9850757 , 0.37210232, 0.09662484],
       [1.2692786 , 3.6945539 , 1.3376639 ]], dtype=float32)

In [8]:
abs(x.numpy() - y.numpy()).mean()

1.29255

## MSELoss

In [9]:
x = torch.randn(2, 3)
y = torch.randn(2, 3)

x

tensor([[ 0.4210,  0.4374, -2.2026],
        [-0.7550, -0.8363, -1.0213]])

In [10]:
y

tensor([[ 0.8455,  1.1493,  0.3961],
        [ 0.0829, -0.3617,  1.0289]])

In [11]:
nn.MSELoss(reduce=False)(x, y)

tensor([[0.1802, 0.5068, 6.7533],
        [0.7020, 0.2252, 4.2032]])

In [12]:
nn.MSELoss()(x, y)

tensor(2.0951)

In [13]:
(x.numpy() - y.numpy())**2

array([[0.18021712, 0.5068279 , 6.7533436 ],
       [0.7019822 , 0.22523706, 4.20323   ]], dtype=float32)

In [14]:
((x.numpy() - y.numpy())**2).mean()

2.0951397

## CrossEntropyLoss

In [15]:
x = torch.randn(2, 4)
y = torch.LongTensor(2).random_(4)
x

tensor([[-0.0468,  0.4993,  1.2129,  0.3495],
        [ 1.2116, -0.7870,  0.2877,  0.7849]])

In [16]:
y

tensor([1, 1])

In [17]:
nn.CrossEntropyLoss(reduce=False)(x, y), nn.CrossEntropyLoss()(x, y)

(tensor([1.4999, 2.7803]), tensor(2.1401))

In [18]:
x = x.numpy()
y = y.numpy()

lst = []
for k in range(len(x)):
    lst.append(-np.log(np.exp(x[k][y[k]]) / np.exp(x[k]).sum()))
lst, np.mean(lst)

([1.4999167, 2.780252], 2.1400843)

## NLLLoss

LogSoftmax

In [19]:
x = torch.randn(2, 4)
x

tensor([[-0.3196,  1.0305,  0.8622, -0.4668],
        [-0.7645, -1.2543,  0.6179, -1.8538]])

In [20]:
y = nn.LogSoftmax(dim=1)(x)
y

tensor([[-2.1952, -0.8450, -1.0133, -2.3423],
        [-1.7807, -2.2704, -0.3982, -2.8700]])

In [21]:
x = x.numpy()
lst = []
for k in range(len(x)):
    lst.append(np.log( np.exp(x[k]) / np.exp(x[k]).sum()))
lst

[array([-2.195178  , -0.84504247, -1.013306  , -2.3423107 ], dtype=float32),
 array([-1.7806988 , -2.2704043 , -0.39822772, -2.8699784 ], dtype=float32)]

NLLLoss

In [22]:
x0 = torch.randn(3, 4)
x = nn.LogSoftmax(dim=1)(x0)
x0, x

(tensor([[ 1.5300, -0.2779,  1.6529, -0.2534],
         [ 2.1039, -2.4200,  0.1825,  1.0036],
         [ 0.9671, -1.3041, -0.3630, -1.0224]]),
 tensor([[-0.9013, -2.7092, -0.7784, -2.6847],
         [-0.3988, -4.9227, -2.3202, -1.4991],
         [-0.4084, -2.6796, -1.7385, -2.3978]]))

In [23]:
y = torch.LongTensor(3).random_(4)
y

tensor([3, 2, 2])

In [24]:
nn.NLLLoss()(x, y)

tensor(2.2478)

In [25]:
nn.NLLLoss(reduce=False)(x, y)

tensor([2.6847, 2.3202, 1.7385])

In [26]:
x = x.numpy()
y = y.numpy()

In [27]:
lst = []
for k in range(len(x)):
    lst.append(-x[k][y[k]])

lst, np.mean(lst)

([2.6846929, 2.3202393, 1.7384927], 2.2478085)

## PoissonNLLLoss

In [28]:
x = torch.randn(2, 4)
x

tensor([[ 1.9420, -1.4329, -0.5251, -0.2245],
        [ 1.9118, -0.1430,  1.2402, -0.4719]])

In [29]:
y = torch.randn(2, 4)
y

tensor([[-1.8771,  0.3971, -0.7158, -1.5648],
        [ 0.2422,  1.3819, -2.1111, -0.4077]])

In [30]:
nn.PoissonNLLLoss()(x, y)

tensor(3.2452)

In [31]:
nn.PoissonNLLLoss(reduce=False)(x, y)

tensor([[10.6183,  0.8076,  0.2156,  0.4476],
        [ 6.3018,  1.0644,  6.0746,  0.4314]])

In [32]:
x = x.numpy()
y = y.numpy()

In [33]:
# target∗log(target)−target+0.5∗log(2πtarget)
def sterling_approx(y):
    return y*np.log(y) - y + 0.5*np.log(np.pi*y)

In [34]:
lst = []
for k in range(len(x)):
    lsti = []
    for i in range(len(x[k])):
        lss = np.exp(x[k,i])-y[k,i]*x[k,i] + (sterling_approx(y[k,i]) if y[k,i]>1 else 0)
        lsti.append(lss)
    lst.append(lsti)

In [35]:
np.array(lst)

array([[10.61833382,  0.8075977 ,  0.21558091,  0.44755796],
       [ 6.30180693,  0.86356484,  6.07457256,  0.43143192]])

In [36]:
np.mean(lst)

3.2200558290941568

## KLDivLoss

In [37]:
x = torch.rand(2, 3)
y = torch.rand(2, 3)
x

tensor([[0.8982, 0.6808, 0.6775],
        [0.6976, 0.7954, 0.5107]])

In [38]:
xlog = torch.log(x)
xlog

tensor([[-0.1073, -0.3845, -0.3893],
        [-0.3601, -0.2289, -0.6719]])

In [39]:
y

tensor([[0.2037, 0.0688, 0.8870],
        [0.7722, 0.4348, 0.6336]])

In [40]:
nn.KLDivLoss()(xlog, y)



tensor(-0.0448)

In [41]:
nn.KLDivLoss(reduce=False)(xlog, y)

tensor([[-0.3022, -0.1577,  0.2390],
        [ 0.0784, -0.2626,  0.1365]])

In [42]:
x = x.numpy()
xlog = np.log(x)
y = y.numpy()

In [43]:
lst = []
for i in range(len(x)):
    lsti = []
    for j in range(len(x[i])):
        # xi is already log 
        lsti.append(y[i][j] * (np.log(y[i][j]) - xlog[i][j]))
    lst.append(lsti)
np.array(lst)

array([[-0.30224314, -0.15767011,  0.23896182],
       [ 0.07840499, -0.26258826,  0.13653126]], dtype=float32)

In [44]:
np.mean(lst)

-0.044767242

## BCELoss

Sigmoid

In [45]:
x = torch.randn(2, 4)
y = nn.Sigmoid()(x)
x

tensor([[ 1.4301, -0.8961, -0.7666,  1.9435],
        [-1.0342, -0.1959,  0.2314,  0.9925]])

In [46]:
y

tensor([[0.8069, 0.2899, 0.3172, 0.8747],
        [0.2623, 0.4512, 0.5576, 0.7296]])

In [47]:
x = x.numpy()

In [48]:
1 / (1 + np.exp(-x))

array([[0.8069127 , 0.28985325, 0.31721136, 0.8747403 ],
       [0.26226112, 0.45117465, 0.55759424, 0.7295732 ]], dtype=float32)

### single label

In [49]:
x0 = torch.randn(3)
x = nn.Sigmoid()(x0)
x

tensor([0.7001, 0.3054, 0.2547])

In [50]:
y = torch.FloatTensor(3).random_(2)
y

tensor([0., 0., 0.])

In [51]:
nn.BCELoss()(x, y)

tensor(0.6209)

In [52]:
nn.BCELoss(reduce=False)(x, y)

tensor([1.2044, 0.3645, 0.2940])

In [53]:
loss = nn.BCELoss(size_average=False)
lss = loss(x, y)
lss



tensor(1.8628)

In [54]:
x = x.numpy()
y = y.numpy()

In [55]:
lst = []
for i in range(len(x)):
    lst.append(-np.log(x[i]) if y[i]==1 else -np.log(1-x[i]))
lst, np.mean(lst)

([1.2043650400665098, 0.3644876386142225, 0.29395562812671666],
 0.6209361022691496)

Equivalently

In [56]:
lst = []
for i in range(len(x)):
    lst.append(-np.log(x[i])*y[i] + -np.log(1-x[i])*(1-y[i]))
lst, np.mean(lst)

([1.2043650400665098, 0.3644876386142225, 0.29395562812671666],
 0.6209361022691496)

### multilabel

In [57]:
x0 = torch.randn(3, 2)
x = nn.Sigmoid()(x0)
x

tensor([[0.7537, 0.2714],
        [0.1750, 0.2778],
        [0.5977, 0.1937]])

In [58]:
y = torch.FloatTensor(3, 2).random_(2)
y

tensor([[1., 1.],
        [1., 0.],
        [1., 1.]])

In [59]:
nn.BCELoss()(x, y)

tensor(0.9686)

In [60]:
nn.BCELoss(reduce=False)(x, y)

tensor([[0.2827, 1.3040],
        [1.7431, 0.3255],
        [0.5146, 1.6414]])

In [61]:
x = x.numpy()
y = y.numpy()

In [62]:
lst = []
for i in range(len(x)):
    lsti = []
    for j in range(len(x[i])):
        lsti.append(-np.log(x[i][j]) if y[i][j]==1 else -np.log(1-x[i][j]))
    lst.append(lsti)
np.array(lst), np.mean(lst)

(array([[0.28274369, 1.30402422],
        [1.74308038, 0.32548741],
        [0.51464498, 1.64135098]]), 0.9685552778948606)

Equivalently

In [63]:
lst = []
for i in range(len(x)):
    lst.append(-np.log(x[i])*y[i] + -np.log(1-x[i])*(1-y[i]))
np.array(lst), np.mean(lst)

(array([[0.2827437 , 1.3040242 ],
        [1.7430804 , 0.32548746],
        [0.514645  , 1.641351  ]], dtype=float32), 0.9685553)

## BCEWithLogitsLoss

This is just simply adding a sigmoid in front of BCELoss above.

### single label

In [64]:
x = torch.randn(3)
x

tensor([-0.2167,  0.5878, -0.1317])

In [65]:
xs = nn.Sigmoid()(x)
xs

tensor([0.4460, 0.6429, 0.4671])

In [66]:
y = torch.FloatTensor(3).random_(2)
y

tensor([1., 1., 1.])

In [67]:
nn.BCELoss()(xs, y)

tensor(0.6701)

In [68]:
nn.BCEWithLogitsLoss()(x, y)

tensor(0.6701)

### multilabel

In [69]:
x = torch.randn(3, 2)
x

tensor([[ 0.9737, -0.5696],
        [-1.2862, -0.4452],
        [ 0.2491,  1.4940]])

In [70]:
xs = nn.Sigmoid()(x)
xs

tensor([[0.7258, 0.3613],
        [0.2165, 0.3905],
        [0.5619, 0.8167]])

In [71]:
y = torch.FloatTensor(3, 2).random_(2)
y

tensor([[0., 0.],
        [0., 1.],
        [0., 1.]])

In [72]:
nn.BCELoss()(xs, y)

tensor(0.6591)

In [73]:
nn.BCEWithLogitsLoss()(x, y)

tensor(0.6591)

## MarginRankingLoss

In [74]:
x1 = torch.randn(3)
x2 = torch.randn(3)
y = torch.FloatTensor(np.random.choice([1, -1], 3))

x1, x2, y

(tensor([-1.5859,  2.4323, -0.7773]),
 tensor([0.6635, 0.0349, 1.0758]),
 tensor([-1.,  1., -1.]))

In [75]:
nn.MarginRankingLoss(margin=0.1)(x1, x2, y)

tensor(0.)

In [76]:
x1 = x1.numpy()
x2 = x2.numpy()
y = y.numpy()
margin=0.1

In [77]:
lst = []
for i in range(len(x1)):
    lst.append(max(0, -y[i]*(x1[i]-x2[i]) + margin))

lst, np.mean(lst)

([0, 0, 0], 0.0)

## HingeEmbeddingLoss

In [78]:
x = torch.randn(2, 3)
y = torch.FloatTensor(np.random.choice([-1, 1], (2, 3)))

x

tensor([[ 0.7793, -1.8066,  0.6025],
        [ 1.1728,  0.4498, -3.3290]])

In [79]:
y

tensor([[-1., -1., -1.],
        [ 1., -1.,  1.]])

In [80]:
nn.HingeEmbeddingLoss(margin=1)(x, y)

tensor(0.3031)

In [81]:
x = x.numpy()
y = y.numpy()
margin=1

In [82]:
lst=[]

for i in range(len(x)):
    lsti = []
    for j in range(len(x[i])):
        if y[i][j]==1:
            lsti.append(x[i][j])
        else:
            lsti.append(max(0, margin-x[i][j]))
    lst.append(lsti)
np.array(lst)

array([[ 0.22066861,  2.80661166,  0.39751905],
       [ 1.17283928,  0.55023506, -3.3289938 ]])

In [83]:
np.mean(lst)

0.3031466454267502

## MultiLabelMarginLoss

This is a very confusing class. Great reference here: https://blog.csdn.net/zhangxb35/article/details/72464152

### one-sample example

In [84]:
x = torch.randn(1, 4)
y = torch.LongTensor(1, 4).random_(-1, 4)
x

tensor([[ 0.2384,  0.6574, -0.5870,  0.1878]])

In [85]:
y

tensor([[ 3, -1,  2,  3]])

In [86]:
nn.MultiLabelMarginLoss()(x, y)

tensor(0.6863)

In [87]:
x = x.numpy()
y = y.numpy()

In [88]:
lst = []
for k in range(len(x)):
    sm = 0
    js = []
    for j in range(len(y[k])):
        if y[k][j]<0: break 
        js.append(y[k][j])
    for i in range(len(x[k])):
        for j in js:
            if (i not in js) and (i!=j):
                print(i, j)
                sm += max(0, 1-(x[k][j] - x[k][i]))
    lst.append(sm/len(x[k]))

0 3
1 3
2 3


In [89]:
lst, np.mean(lst)

([0.6863219626247883], 0.6863219626247883)

### multi-sample example

In [90]:
x = torch.randn(3, 4)
y = torch.LongTensor(3, 4).random_(-1, 4)
x

tensor([[ 1.0659,  0.2566,  2.1796,  0.9650],
        [-0.3558,  0.7200,  1.0814, -0.6410],
        [ 0.4595,  0.7589, -0.3233, -0.2871]])

In [91]:
y

tensor([[ 3,  0, -1,  3],
        [ 1, -1,  1,  1],
        [ 3,  2,  2,  1]])

In [92]:
nn.MultiLabelMarginLoss()(x, y)

tensor(1.0154)

In [93]:
x = x.numpy()
y = y.numpy()

lst = []
for k in range(len(x)):
    sm = 0
    js = []
    for j in range(len(y[k])):
        if y[k][j]<0: break 
        js.append(y[k][j])
    for i in range(len(x[k])):
        for j in js:
            if (i not in js) and (i!=j):
                sm += max(0, 1-(x[k][j] - x[k][i]))
    lst.append(sm/len(x[k]))

lst, np.mean(lst)

([1.2026643753051758, 0.34035441279411316, 1.5032338127493858],
 1.015417533616225)

## SmoothL1Loss

In [94]:
x = torch.randn(2, 3)
y = torch.randn(2, 3)

In [95]:
nn.SmoothL1Loss()(x, y)

tensor(0.9884)

In [96]:
nn.SmoothL1Loss(reduce=False)(x, y)

tensor([[2.3024, 0.9529, 0.2130],
        [1.6443, 0.6012, 0.2163]])

In [97]:
x = x.numpy() 
y = y.numpy()

In [98]:
def smoothl1loss(x, y):
    if abs(x-y)<1: return 1/2*(x-y)**2
    else: return abs(x-y)-1/2

In [99]:
lst = []
for i in range(len(x)):
    lsti=[]
    for j in range(len(x[i])):
        lsti.append(smoothl1loss(x[i][j], y[i][j]))
    lst.append(lsti)
np.array(lst), np.mean(lst)

(array([[2.30235314, 0.95294225, 0.21304643],
        [1.64431953, 0.60117257, 0.21632228]]), 0.9883593675831248)

## SoftMarginLoss

In [100]:
x = torch.randn(2, 4)
y = torch.FloatTensor(np.random.choice([-1, 1], (2, 4)))
x

tensor([[-0.1455, -1.1173,  1.3942,  0.7111],
        [ 0.9686, -0.5296,  0.6981,  0.7836]])

In [101]:
y

tensor([[-1.,  1.,  1.,  1.],
        [-1.,  1., -1., -1.]])

In [102]:
nn.SoftMarginLoss()(x, y)

tensor(0.8987)

In [103]:
x = x.numpy()
y = y.numpy()

In [104]:
lst = []
for k in range(len(x)):
    sm = 0
    for i in range(len(x[k])):
        sm += np.log(1 + np.exp(-y[k][i]*x[k][i]))
    lst.append(sm/len(x[k]))

lst, np.mean(lst)

([0.6611191283374047, 1.1361835376728326], 0.8986513330051187)

## MultiLabelSoftMarginLoss

In [105]:
x = torch.randn(2, 4)
y = torch.FloatTensor(2, 4).random_(2)
x

tensor([[ 0.9292, -0.6671,  1.3307,  0.0231],
        [ 1.5807,  0.3279, -0.0086, -0.2144]])

In [106]:
y

tensor([[0., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [107]:
nn.MultiLabelSoftMarginLoss()(x, y)

tensor(0.6866)

In [108]:
x = x.numpy()
y = y.numpy()

In [109]:
lst = []
for k in range(len(x)):
    sm = 0
    for i in range(len(x[k])):
        sm -= y[k, i]*np.log(np.exp(x[k, i])/(1+np.exp(x[k, i]))) +\
            (1-y[k, i])*np.log(1/(1+np.exp(x[k, i])))
    lst.append(sm/len(x[k]))

lst, np.mean(lst)

([0.8148622548392127, 0.5583164320116256], 0.6865893434254191)

## CosineEmbeddingLoss

In [110]:
x1 = torch.randn(2, 3)
x2 = torch.randn(2, 3)
y = torch.FloatTensor(np.random.choice([1, -1], 2))

x1

tensor([[ 1.2401,  0.9896, -0.3159],
        [ 0.7875,  1.0491, -0.0405]])

In [111]:
x2

tensor([[-0.5981,  0.0308,  0.3139],
        [-0.4064, -1.7569, -1.0439]])

In [112]:
y

tensor([1., 1.])

In [113]:
nn.CosineEmbeddingLoss(margin=0.1)(x1, x2, y)

tensor(1.7582)

In [114]:
x1 = x1.numpy()
x2 = x2.numpy()
y = y.numpy()
margin=0.1

In [115]:
from scipy.spatial.distance import cosine

def cos(x, y): return 1-cosine(x, y)

In [116]:
lst = []
for k in range(len(x1)):
    if y[k] == 1: lst.append(1-cos(x1[k], x2[k]))
    elif y[k] == -1: lst.append(max(0, cos(x1[k], x2[k])-margin))
lst, np.mean(lst)

([1.7408190965652466, 1.775598406791687], 1.7582087516784668)

## MultiMarginLoss

In [117]:
x = torch.randn(2, 4)
y = torch.LongTensor(2).random_(4)
x

tensor([[-0.7925,  0.9584, -0.7623,  0.9310],
        [-0.9939, -0.4930, -0.5300, -0.2276]])

In [118]:
y

tensor([3, 0])

In [119]:
nn.MultiMarginLoss(margin=0.9, p=2)(x, y)

tensor(0.9325)

In [120]:
x = x.numpy()
y = y.numpy()
p=2
margin=0.9

In [121]:
lst = []
for k in range(len(x)):
    sm = 0
    for i in range(len(x[k])):
        if i!= y[k]:
            sm += max(0, (margin - x[k, y[k]] + x[k, i])**p)
    lst.append(sm/len(x[k]))

lst, np.mean(lst)

([0.5418451297566412, 1.649880607219443], 1.095862868488042)

## TripletMarginLoss

In [122]:
x1 = torch.randn(2, 3)
x2 = torch.randn(2, 3)
x3 = torch.randn(2, 3)
margin = 0.9
p = 2

x1

tensor([[ 0.0954,  0.2399, -0.7222],
        [ 0.5554,  0.4132,  0.1144]])

In [123]:
nn.TripletMarginLoss(margin=margin, p=p)(x1, x2, x3)

tensor(2.6493)

In [124]:
x1 = x1.numpy()
x2 = x2.numpy()
x3 = x3.numpy()

In [125]:
def d(x1, x2, p):
    return sum((x1-x2)**p)**(1/p)

In [126]:
lst = []
for k in range(len(x1)):
    sm = 0
    for i in range(len(x1[k])):
        sm += max(d(x1[k], x2[k], p)-d(x1[k], x3[k], p)+margin, 0) 
    lst.append(sm/len(x1[k]))

lst, np.mean(lst)

([3.1739143609887397, 2.124598913333924], 2.649256637161332)

## References

- https://pytorch.org/docs/0.4.0/nn.html#loss-functions
- https://blog.csdn.net/zhangxb35/article/details/72464152