# 소프트맥스 회귀의 비용 함수 구현하기(로우-레벨)

In [1]:
import torch
import torch.nn.functional as F

In [2]:
torch.manual_seed(1)

<torch._C.Generator at 0x113ab4df0>

In [3]:
z = torch.FloatTensor([1, 2, 3])
hypothesis = torch.softmax(z, dim = 0)
print(hypothesis)

tensor([0.0900, 0.2447, 0.6652])


In [4]:
torch.sum(hypothesis)#합이 1인걸 확인할 수 있다

tensor(1.)

In [6]:
#shape가 (3, 5)인 랜덤 매트랙스를 생성한다
z = torch.rand(3, 5, requires_grad = True)
print(z)

tensor([[0.4550, 0.5725, 0.4980, 0.9371, 0.6556],
        [0.3138, 0.1980, 0.4162, 0.2843, 0.3398],
        [0.5239, 0.7981, 0.7718, 0.0112, 0.8100]], requires_grad=True)


In [14]:
#각 행의 원소들의 합이 1인걸 확인할 수 있다.
hypothesis = F.softmax(z, dim = 1)
print(hypothesis)
print(hypothesis.sum(dim = 1))

tensor([[0.1664, 0.1871, 0.1737, 0.2695, 0.2033],
        [0.2002, 0.1783, 0.2218, 0.1944, 0.2054],
        [0.1809, 0.2380, 0.2318, 0.1084, 0.2409]], grad_fn=<SoftmaxBackward>)
tensor([1.0000, 1.0000, 1.0000], grad_fn=<SumBackward1>)


In [15]:
#0~5 중 하나의 클래스를 가지고 있는 3개의 sample를 생성한다
y = torch.randint(5, (3,)).long()
print(y)

tensor([3, 1, 2])


In [21]:
#각 sample에 대해서 one-hot encoding을 수행한다
y_one_hot = torch.zeros_like(hypothesis)
y_one_hot.scatter_(1, y.unsqueeze(1), 1)
print(y_one_hot)

tensor([[0., 0., 0., 1., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.]])


In [23]:
cost = (y_one_hot * -torch.log(hypothesis)).sum(dim = 1).mean()
print(cost)

tensor(1.4992, grad_fn=<MeanBackward0>)


# 소프트맥스 회귀의 비용 함수 구현하기(하이-레벨)

In [27]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [35]:
torch.manual_seed(1)

<torch._C.Generator at 0x113ab4df0>

In [36]:
#shape가 (3, 5)인 랜덤 매트랙스를 생성한다
z = torch.rand(3, 5, requires_grad = True)
print(z)

tensor([[0.7576, 0.2793, 0.4031, 0.7347, 0.0293],
        [0.7999, 0.3971, 0.7544, 0.5695, 0.4388],
        [0.6387, 0.5247, 0.6826, 0.3051, 0.4635]], requires_grad=True)


In [38]:
#F.softmax() + torch.log() = F.log_softmax()이다
print(torch.log(F.softmax(z, dim = 1)))
print(F.log_softmax(z, dim = 1))

tensor([[-1.3301, -1.8084, -1.6846, -1.3530, -2.0584],
        [-1.4147, -1.8174, -1.4602, -1.6450, -1.7758],
        [-1.5025, -1.6165, -1.4586, -1.8360, -1.6776]], grad_fn=<LogBackward>)
tensor([[-1.3301, -1.8084, -1.6846, -1.3530, -2.0584],
        [-1.4147, -1.8174, -1.4602, -1.6450, -1.7758],
        [-1.5025, -1.6165, -1.4586, -1.8360, -1.6776]],
       grad_fn=<LogSoftmaxBackward>)


In [40]:
print(y_one_hot)

tensor([[0., 0., 0., 1., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.]])


In [42]:
#F.log_softmax() + F.nll_loss() = F.cross_entropy()
cost1 = (y_one_hot *-F.log_softmax(z, dim = 1)).sum(dim = 1).mean()
cost2 = F.nll_loss(F.log_softmax(z, dim = 1), y)
cost3 = F.cross_entropy(z, y)

print("cost1 : ",cost1)
print("cost2 : ",cost2)
print("cost3 : ",cost3)

cost1 :  tensor(1.5430, grad_fn=<MeanBackward0>)
cost2 :  tensor(1.5430, grad_fn=<NllLossBackward>)
cost3 :  tensor(1.5430, grad_fn=<NllLossBackward>)
