<a href="https://colab.research.google.com/github/jhlee508/sparta-pytorch/blob/master/week2/week2_sparta_pytorch_2_SoftmaxRegression_LossFunction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 1. Low Level 구현

In [1]:
import torch
import torch.nn.functional as F

In [2]:
torch.manual_seed(1)

<torch._C.Generator at 0x7f9e300e6ad0>

In [12]:
z = torch.FloatTensor([1, 2, 3])

In [13]:
hypothesis = F.softmax(z, dim=0) # 0차원에서만 softmax 연산 수행
print(hypothesis)

tensor([0.0900, 0.2447, 0.6652])


In [14]:
hypothesis.sum()

tensor(1.)

In [15]:
z = torch.rand(3, 5, requires_grad=True)
print(z)

tensor([[0.7570, 0.2346, 0.6471, 0.3556, 0.4452],
        [0.0193, 0.2616, 0.7713, 0.3785, 0.9980],
        [0.9008, 0.4766, 0.1663, 0.8045, 0.6552]], requires_grad=True)


In [16]:
hypothesis = F.softmax(z, dim=1) # 각 행마다 softmax 연산 수행
print(hypothesis)

tensor([[0.2570, 0.1524, 0.2303, 0.1721, 0.1882],
        [0.1178, 0.1501, 0.2499, 0.1687, 0.3135],
        [0.2615, 0.1711, 0.1254, 0.2375, 0.2045]], grad_fn=<SoftmaxBackward0>)


In [17]:
y = torch.randint(5, (3,)).long()
print(y)

tensor([1, 4, 4])


In [22]:
print(y.unsqueeze(1)) # unsqueeze 1번 수행 (-> Matrix로 변환)

tensor([[1],
        [4],
        [4]])


In [23]:
# 모든 원소가 0의 값을 가진 3 × 5 텐서 생성
y_one_hot = torch.zeros_like(hypothesis)
y_one_hot.scatter_(1, y.unsqueeze(1), 1)
print(y_one_hot)

tensor([[0., 1., 0., 0., 0.],
        [0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 1.]])


In [24]:
cost = (y_one_hot * -torch.log(hypothesis)).sum(dim=1).mean()
print(cost)

tensor(1.5427, grad_fn=<MeanBackward0>)


# 2. High-Level 구현

In [25]:
import torch
import torch.nn.functional as F
import torch.nn as nn

In [26]:
torch.manual_seed(1)

<torch._C.Generator at 0x7f9e300e6ad0>

##2-1. F.softmax() + torch.log() = F.log_softmax()

In [27]:
# Low level
z = torch.rand(3, 5, requires_grad=True)
torch.log(F.softmax(z, dim=1))

tensor([[-1.3301, -1.8084, -1.6846, -1.3530, -2.0584],
        [-1.4147, -1.8174, -1.4602, -1.6450, -1.7758],
        [-1.5025, -1.6165, -1.4586, -1.8360, -1.6776]], grad_fn=<LogBackward0>)

In [28]:
# High level
F.log_softmax(z, dim=1) # Log + Softmax API

tensor([[-1.3301, -1.8084, -1.6846, -1.3530, -2.0584],
        [-1.4147, -1.8174, -1.4602, -1.6450, -1.7758],
        [-1.5025, -1.6165, -1.4586, -1.8360, -1.6776]],
       grad_fn=<LogSoftmaxBackward0>)

## 2-2. F.log_softmax() + F.nll_loss() = F.cross_entropy()

In [29]:
# Low level
# 첫번째 수식
(y_one_hot * -torch.log(F.softmax(z, dim=1))).sum(dim=1).mean()

tensor(1.7539, grad_fn=<MeanBackward0>)

In [30]:
# 두번째 수식
(y_one_hot * - F.log_softmax(z, dim=1)).sum(dim=1).mean()

tensor(1.7539, grad_fn=<MeanBackward0>)

In [32]:
# 세번째 수식
F.nll_loss(F.log_softmax(z, dim=1), y)

tensor(1.7539, grad_fn=<NllLossBackward0>)

In [37]:
# High level
# 네번째 수식 (함수)
F.cross_entropy(z, y)

tensor(1.7539, grad_fn=<NllLossBackward0>)

In [38]:
# High level
# 다섯번째 수식 (클래스)
nn.CrossEntropyLoss()(z, y)

tensor(1.7539, grad_fn=<NllLossBackward0>)