In [1]:
from math import e, log
import numpy as np
import torch

  from .autonotebook import tqdm as notebook_tqdm


### CE and BCE
common arguments
+ reduction: return () or (B,) tensor
+ weight: weight of each class

In [8]:
# CE # pd must be 2-d
pd = torch.tensor([[1,2], [3,6]], dtype=torch.float32)
gt1 = torch.tensor([0,1], dtype=torch.long)
gt2 = torch.tensor([[1,0], [0,1]], dtype=torch.float32)
print(torch.nn.CrossEntropyLoss()(pd, gt1))
print(torch.nn.CrossEntropyLoss()(pd, gt2))

print('---math below---')
softmax_prob = np.array([
    np.array([e**1, e**2]) / (e**1+e**2),
    np.array([e**3, e**6]) / (e**3+e**6),
])
print( -log(softmax_prob[0][0])*0.5 - log(softmax_prob[1][1])*0.5 ) # 0.5 is 1/classes

tensor(0.6809)
tensor(0.6809)
---math below---
0.6809245195459824


In [40]:
# BCE # pd always be reshaped to 1-d, gt must has same type and shape with pd
pd1 = torch.tensor([-1,2], dtype=torch.float32)
gt1 = torch.tensor([0,1], dtype=torch.float32)
pd2 = torch.tensor([[-1,2]], dtype=torch.float32)
gt2 = torch.tensor([[0,1]], dtype=torch.float32)
print(torch.nn.BCEWithLogitsLoss()(pd1, gt1))
print(torch.nn.BCEWithLogitsLoss()(pd2, gt2))

print('---math below---')
sigmoid_prob = np.array([
    1 / (1+e**1),
    1 / (1+e**-2),
])
print( -log(1-sigmoid_prob[0])*0.5 - log(sigmoid_prob[1])*0.5 )

tensor(0.2201)
tensor(0.2201)
---math below---
0.22009484928059772


In [46]:
# CE soft labeling
pd = torch.tensor([[1,2]], dtype=torch.float32)
gt = torch.tensor([[0.2,0.8]], dtype=torch.float32)
print(torch.nn.CrossEntropyLoss()(pd, gt))

print('---math below---')
softmax_prob = np.array([
    np.array([e**1, e**2]) / (e**1+e**2),
])
print( -0.2*log(softmax_prob[0][0]) -0.8*log(softmax_prob[0][1]) )

tensor(0.5133)
---math below---
0.513261687518223


In [54]:
# BCE soft labeling
pd = torch.tensor([1], dtype=torch.float32)
gt = torch.tensor([0.8], dtype=torch.float32)
print(torch.nn.BCEWithLogitsLoss()(pd, gt))

print('---math below---')
sigmoid_prob = np.array([
    1 / (1+e**-1)
])
print( -log(sigmoid_prob[0])*0.8 - log(1-sigmoid_prob[0])*0.2 )

tensor(0.5133)
---math below---
0.5132616875182228


### Pretrained weights

In [15]:
from torchvision.models import resnet18
import torch

T = torch.randn(1,3,224,224)
model = resnet18()
for name, param in model.named_parameters():
    print(f"Layer: {name}, Size: {param.size()}, Values: {param.data.reshape(-1)[:10]}")
    break
    
model = resnet18(weights=None)
for name, param in model.named_parameters():
    print(f"Layer: {name}, Size: {param.size()}, Values: {param.data.reshape(-1)[:10]}")
    break
    
model = resnet18(weights='IMAGENET1K_V1')
for name, param in model.named_parameters():
    print(f"Layer: {name}, Size: {param.size()}, Values: {param.data.reshape(-1)[:10]}")
    break

Layer: conv1.weight, Size: torch.Size([64, 3, 7, 7]), Values: tensor([ 0.0105,  0.0165, -0.0612,  0.0307,  0.0062,  0.0787, -0.0269,  0.0174,
         0.0621,  0.0029])
Layer: conv1.weight, Size: torch.Size([64, 3, 7, 7]), Values: tensor([-0.0079, -0.0252,  0.0107, -0.0206, -0.0586,  0.0011,  0.0236,  0.0040,
         0.0080,  0.0305])
Layer: conv1.weight, Size: torch.Size([64, 3, 7, 7]), Values: tensor([-0.0104, -0.0061, -0.0018,  0.0748,  0.0566,  0.0171, -0.0127,  0.0111,
         0.0095, -0.1099])
