In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

## Binary Cross Entropy loss

In [2]:
# logits or ouptut of model
logits = torch.randn(10)
logits

tensor([-0.1090, -0.8097,  0.6572, -0.7774,  0.0305,  0.5466, -1.8099, -1.6507,
        -0.1379, -1.3062])

In [3]:
# converting logits to probability
y = torch.sigmoid(logits)
y

tensor([0.4728, 0.3079, 0.6586, 0.3149, 0.5076, 0.6333, 0.1406, 0.1610, 0.4656,
        0.2131])

In [4]:
# defining target variable
target = torch.randint(0, 2, (10,), dtype=torch.float)
target

tensor([0., 0., 0., 0., 1., 1., 0., 0., 0., 1.])

In [5]:
F.binary_cross_entropy(y, target)

tensor(0.6096)

In [6]:
F.binary_cross_entropy_with_logits(logits, target)

tensor(0.6096)

In [7]:
# calculating binary crossentropy loss
nn.BCELoss()(y, target)

tensor(0.6096)

In [8]:
nn.BCEWithLogitsLoss()(logits, target)

tensor(0.6096)

In [9]:
loss = 0
for y_i, p_i in zip(target, y):
    loss+=-y_i*torch.log(p_i)-(1-y_i)*torch.log(1-p_i)
loss = loss/len(y)

In [10]:
loss

tensor(0.6096)

## Categorical Cross Entropy Loss

In [11]:
X = torch.randn(10, 5)
print(X)

tensor([[ 0.5019,  0.0814,  0.6212,  0.6151,  0.1291],
        [ 0.1274,  0.8079, -1.2378,  1.7719, -0.8113],
        [ 0.6915,  1.3460, -0.4858, -0.3273, -0.4048],
        [-0.0376,  1.2261, -1.4496,  0.2873,  0.1005],
        [ 1.3861, -1.2449,  0.1062, -0.1544,  0.1906],
        [ 0.1728, -0.2751, -0.4401,  0.9695,  0.1078],
        [ 0.9502,  0.6799, -0.8475,  0.1249,  0.2014],
        [ 0.6389, -1.4215,  0.9650, -0.1469,  1.3086],
        [-0.4051,  1.1336,  0.0477,  0.8645,  1.2928],
        [ 0.3530, -1.9006, -1.0765, -0.9703,  0.1882]])


In [12]:
y = torch.randint(0, 5, (10,)) 
y

tensor([1, 4, 3, 2, 0, 3, 1, 1, 3, 4])

In [13]:
# crossentropy loss computes softmax by itself and takes logits as input
nn.CrossEntropyLoss()(X, y)

tensor(1.9900)

In [14]:
F.cross_entropy(X, y)

tensor(1.9900)

In [15]:
loss = 0
for logits, target in zip(X, y):
    # subtracting by maximum to avoid overflow, this is called as stable softmax, output remains same
    logits = logits - max(logits)
    loss += -torch.log(F.softmax(logits, dim=0)[target])
loss=loss/len(y)

In [16]:
loss

tensor(1.9900)