In [22]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [23]:
z = torch.FloatTensor ([1,2,3])
hypothesis = F.softmax(z, dim=0)
print(hypothesis)
hypothesis.sum()

tensor([0.0900, 0.2447, 0.6652])


tensor(1.)

## Cross Entropy Loss

In [24]:
z=torch.rand(3,5, requires_grad = True)
hypothesis = F.softmax(z,dim=1)
print(hypothesis)

tensor([[0.2257, 0.1283, 0.2013, 0.1358, 0.3089],
        [0.2552, 0.2790, 0.1620, 0.1508, 0.1530],
        [0.2246, 0.1294, 0.2676, 0.2339, 0.1445]], grad_fn=<SoftmaxBackward0>)


In [25]:
y = torch.randint(5,(3,)).long()
print(y) # 답 정해주는 거 (인덱스)

tensor([4, 2, 0])


In [26]:
y_one_hot = torch.zeros_like(hypothesis)
y_one_hot.scatter_(1, y.unsqueeze(1), 1) #첫 번째 1 -> dim=1

tensor([[0., 0., 0., 0., 1.],
        [0., 0., 1., 0., 0.],
        [1., 0., 0., 0., 0.]])

In [27]:
cost = (y_one_hot * -torch.log(hypothesis)).sum(dim=1).mean()
print(cost)

tensor(1.4960, grad_fn=<MeanBackward0>)


## with torch.nn.functional

In [28]:
print(torch.log(F.softmax(z,dim=1)))
print((y_one_hot * -torch.log(F.softmax(z,dim=1))).sum(dim=1).mean())
# Low level

tensor([[-1.4884, -2.0537, -1.6031, -1.9964, -1.1747],
        [-1.3657, -1.2766, -1.8200, -1.8919, -1.8774],
        [-1.4934, -2.0452, -1.3181, -1.4529, -1.9343]], grad_fn=<LogBackward0>)
tensor(1.4960, grad_fn=<MeanBackward0>)


In [29]:
print(F.log_softmax(z,dim=1))
print(F.nll_loss(F.log_softmax(z,dim=1),y))  # Negative Log Likelihood
print(F.cross_entropy(z,y))
# High level (위랑 똑같은 값 출력)

tensor([[-1.4884, -2.0537, -1.6031, -1.9964, -1.1747],
        [-1.3657, -1.2766, -1.8200, -1.8919, -1.8774],
        [-1.4934, -2.0452, -1.3181, -1.4529, -1.9343]],
       grad_fn=<LogSoftmaxBackward0>)
tensor(1.4960, grad_fn=<NllLossBackward0>)
tensor(1.4960, grad_fn=<NllLossBackward0>)


## Training with Low-level Cross Entropy Loss

In [47]:
x_train = [[1,2,1,1],
           [2,1,3,2],
           [3,1,3,4],
           [4,1,5,5],
           [3,2,5,1],
           [1,3,2,4],
           [1,1,2,1],
           [4,1,2,1]]
           # x_train -> (8,4)
y_train = [2,2,2,1,1,1,0,0]
# y_train ->(8,)
x_train = torch.FloatTensor(x_train)
y_train = torch.LongTensor(y_train)

w = torch.zeros((4,3), requires_grad = True)
b = torch.zeros(1, requires_grad=True)
optimizer = optim.SGD([w,b],lr = 0.1)

nb_epochs = 1000
for epoch in range(nb_epochs + 1 ) :
    hypothesis = F.softmax(x_train.matmul(w) + b , dim=1)
    y_one_hot = torch.zeros_like (hypothesis)
    y_one_hot.scatter_(1, y_train.unsqueeze(1), 1)
    cost = (y_one_hot * -torch.log(F.softmax(hypothesis,dim=1))).sum(dim=1).mean()

    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    if epoch % 100 == 0 :
        print('Epoch {:4d}/{} Cost: {:.6f}'.format(epoch, nb_epochs, cost.item()))
    

Epoch    0/1000 Cost: 1.098612
Epoch  100/1000 Cost: 0.975704
Epoch  200/1000 Cost: 0.944895
Epoch  300/1000 Cost: 0.928340
Epoch  400/1000 Cost: 0.915387
Epoch  500/1000 Cost: 0.904186
Epoch  600/1000 Cost: 0.894344
Epoch  700/1000 Cost: 0.885711
Epoch  800/1000 Cost: 0.878166
Epoch  900/1000 Cost: 0.871586
Epoch 1000/1000 Cost: 0.865845


## F.cross_entropy

In [51]:
nb_epochs = 1000
for epoch in range(nb_epochs + 1 ) :
    z = x_train.matmul(w) + b
    cost = F.cross_entropy(z,y_train)

    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    if epoch % 100 == 0 :
        print('Epoch {:4d}/{} Cost: {:.6f}'.format(epoch, nb_epochs, cost.item()))

# 왜 위쪽 코드는 여러번 실행해도 cost가 그대로인데 얘는 실행할수록 숫자가 점점 똑같아질까...?

Epoch    0/1000 Cost: 0.724171
Epoch  100/1000 Cost: 0.724171
Epoch  200/1000 Cost: 0.724171
Epoch  300/1000 Cost: 0.724171
Epoch  400/1000 Cost: 0.724171
Epoch  500/1000 Cost: 0.724171
Epoch  600/1000 Cost: 0.724171
Epoch  700/1000 Cost: 0.724171
Epoch  800/1000 Cost: 0.724171
Epoch  900/1000 Cost: 0.724171
Epoch 1000/1000 Cost: 0.724171


## High-level Implementation with nn.Module

In [54]:
class SoftmaxClassifierModel (nn.Module) :
    def __init__(self) :
        super().__init__()
        self.linear = nn.Linear(4,3)

    def forward(self,x) :
        return self.linear(x)

model = SoftmaxClassifierModel()

optimizer = optim.SGD(model.parameters() ,lr = 0.1)

nb_epochs = 1000
for epoch in range(nb_epochs + 1 ) :
    prediction = model(x_train)
    cost = F.cross_entropy(prediction, y_train)


    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    if epoch % 100 == 0 :
        print('Epoch {:4d}/{} Cost: {:.6f}'.format(epoch, nb_epochs, cost.item()))
    
# 얘는 또 할 때마다 그냥 숫자가 다 바뀌는데 cost가 젤 적게 나오긴 하네...

Epoch    0/1000 Cost: 1.474953
Epoch  100/1000 Cost: 0.707645
Epoch  200/1000 Cost: 0.599398
Epoch  300/1000 Cost: 0.528057
Epoch  400/1000 Cost: 0.475295
Epoch  500/1000 Cost: 0.434100
Epoch  600/1000 Cost: 0.400780
Epoch  700/1000 Cost: 0.373120
Epoch  800/1000 Cost: 0.349687
Epoch  900/1000 Cost: 0.329501
Epoch 1000/1000 Cost: 0.311868
