# 손실함수
## 출력값을 확률로 표현하기 
- sigmoid 함수 사용(--> 이진확률벡터--> BCE 손실함수 적용)

In [1]:
import torch
import torch.nn as nn

torch.manual_seed(70)

class Network(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Network, self).__init__()
        
        self.linear_ih = nn.Linear(in_features=input_size,
                                  out_features=hidden_size)
        
        self.linear_ho = nn.Linear(in_features=hidden_size,
                                  out_features=output_size)
        
        self.activation_layer = nn.Sigmoid()
        
    def forward(self, x):
        z1 = self.linear_ih(x)
        a1 = self.activation_layer(z1)
        z2 = self.linear_ho(a1)
        y = self.activation_layer(z2)
        return y
    
x = torch.Tensor([[0,1]])

net = Network(input_size=2, hidden_size=2, output_size=1)
y = net(x)
print(y.item())

0.42035338282585144


## 확률론적 접근
### 엔트로피

In [2]:
p = torch.Tensor([0.4, 0.6])
q = torch.Tensor([0.0, 1.0])

def self_information(x):
    return -torch.log(x)

def entropy(x):
    # log(0)=NaN 값이므로 아주 작은 수를 더해서 방지
    e = 1e-30
    return torch.sum((x+e)*self_information(x+e))

# 앞면이 40%, 뒷면이 60%인 확률의 동전
print(entropy(p).numpy().round(4))
# 뒷면만 100% 나오는 확실한 동전
print(entropy(q).numpy().round(4))

0.673
0.0


In [6]:
torch.sum(torch.tensor([[1,2,3]]))

tensor(6)

In [7]:
torch.sum(torch.Tensor([[1,2,3]]))

tensor(6.)

In [9]:
torch.log(torch.tensor([0.3]))

tensor([-1.2040])

## KL-divergence

In [10]:
def KL_divergence(q,p):
    """
    q: 예측 확률
    p: 타깃 확률
    """
    e = 1e-30
    return torch.sum((p+e)*torch.log(p+e) - (p+e)*torch.log(q+e))

u = torch.Tensor([0.5, 0.5])
print(KL_divergence(p,u))
print(KL_divergence(q,u))

tensor(0.0204)
tensor(33.8456)


In [11]:
# 인자에 예측확률분포의 로그함수
loss_function = nn.KLDivLoss(reduction='sum')
e = 1e-30

print(loss_function(torch.log(p+e), u+e))
print(loss_function(torch.log(q+e), u+e))

tensor(0.0204)
tensor(33.8456)


In [13]:
torch.manual_seed(70)

x = torch.Tensor([[0,1]])
t = torch.Tensor([1])

net = Network(2,2,1)
y = net(x)

one_hot = torch.eye(2)
# tensor([[1., 0.],
#         [0., 1.]])
prob_t = one_hot.index_select(dim=0, index=t.long())
# t.long() : 실수 텐서 t를 정수텐서로 만들어줌.
# .index_select(): 행 기준 1인덱스 행 선택
prob_y = torch.cat([1-y, y], dim=1)
# y: tensor([[0.4204]], grad_fn=<SigmoidBackward0>)
# 1-y: tensor([[0.5796]], grad_fn=<RsubBackward1>)
# 좌우로(1) 이어붙여주기 - cat

print(prob_t) # tensor([[0., 1.]])
print(prob_y) # tensor([[0.5796, 0.4204]], grad_fn=<CatBackward0>)

loss_function = nn.KLDivLoss(reduction='sum')
print(loss_function(torch.log(prob_y), prob_t))
# tensor(0.8667, grad_fn=<SumBackward0>)

tensor([[0., 1.]])
tensor([[0.5796, 0.4204]], grad_fn=<CatBackward0>)
tensor(0.8667, grad_fn=<SumBackward0>)


In [19]:
1-y

tensor([[0.5796]], grad_fn=<RsubBackward1>)

In [14]:
one_hot

tensor([[1., 0.],
        [0., 1.]])

In [15]:
torch.eye(5)

tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]])

In [16]:
t.long()

tensor([1])

In [17]:
t

tensor([1.])

In [18]:
y

tensor([[0.4204]], grad_fn=<SigmoidBackward0>)

## BCE Loss: 이진크로스엔트로피 손실

In [20]:
torch.manual_seed(70)

x = torch.Tensor([[0,1]])
t = torch.Tensor([1])

net = Network(2,2,1)
y = net(x)

loss_function = nn.BCELoss(reduction='sum')
print(loss_function(y.squeeze(1), t))

tensor(0.8667, grad_fn=<BinaryCrossEntropyBackward0>)


In [29]:
bce_loss = nn.BCELoss()
sigmoid = nn.Sigmoid()

probabilities = sigmoid(torch.randn(4,1,requires_grad=True))
targets = torch.tensor([1,0,1,0], dtype=torch.float32).view(4,1)
loss = bce_loss(probabilities, targets)

print(probabilities)
print(loss)

tensor([[0.8104],
        [0.5761],
        [0.5539],
        [0.7081]], grad_fn=<SigmoidBackward0>)
tensor(0.7226, grad_fn=<BinaryCrossEntropyBackward0>)


In [30]:
targets

tensor([[1.],
        [0.],
        [1.],
        [0.]])

In [31]:
torch.randn(4,1,requires_grad=True)

tensor([[ 0.3460],
        [-0.1633],
        [-0.5894],
        [-1.5458]], requires_grad=True)

## softmax

In [21]:
torch.manual_seed(70)

# 선형결합 결과 텐서
z = torch.rand(1,10)

# 인자로 차원 지정해줘야 함.
y = torch.softmax(z, dim=1)
print(y)

tensor([[0.0986, 0.0914, 0.0897, 0.0831, 0.1241, 0.0729, 0.1315, 0.1243, 0.0711,
         0.1133]])


In [22]:
z

tensor([[0.4350, 0.3591, 0.3411, 0.2644, 0.6656, 0.1341, 0.7236, 0.6668, 0.1091,
         0.5745]])

## Cross Entropy Loss

In [23]:
loss_function = nn.CrossEntropyLoss(reduction='sum')
print(loss_function(z, t.long()))

tensor(2.3930)


In [25]:
ce_loss = nn.CrossEntropyLoss()
"""
CEL은 각 입력이 클래스 하나에 속하고 
각 클래스에는 고유한 인덱스가 있다고 가정
"""
outputs = torch.randn(3,5,requires_grad=True)
# tensor([[ 1.3889,  0.5574, -1.1089,  0.1678, -0.5106],
#         [-0.9303,  0.0338,  0.0661,  1.4811, -0.9629],
#         [-1.9167,  1.0238,  0.6950, -0.7553, -0.0706]], requires_grad=True)

targets = torch.tensor([1,0,3], dtype=torch.int64)
# 각 샘플의 정답 클래스에 해당하는 인덱스를 나타내는 원소 3개

loss = ce_loss(outputs, targets)
print(loss)

tensor(2.3407, grad_fn=<NllLossBackward0>)


In [26]:
outputs

tensor([[ 1.3889,  0.5574, -1.1089,  0.1678, -0.5106],
        [-0.9303,  0.0338,  0.0661,  1.4811, -0.9629],
        [-1.9167,  1.0238,  0.6950, -0.7553, -0.0706]], requires_grad=True)

# 추가내용) in <파이토치로 배우는 자연어처리>
## Mean Squared Error (MSE, 평균제곱오차)

In [24]:
mse_loss = nn.MSELoss()
outputs = torch.randn(3,5,requires_grad=True) # 신경망의 출력(예측)
targets = torch.randn(3,5) # 실제 타깃값
loss = mse_loss(outputs, targets) # L(y,y_hat)
print(loss)

tensor(1.7011, grad_fn=<MseLossBackward0>)
