In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
torch.manual_seed(1)

<torch._C.Generator at 0x7f9c5ca614b0>

# Cross Entropy Loss (Low-level)
$$ L = \frac{1}{N} \sum - y \log(\hat{y}) $$
where $\hat{y}$ is the predicted probability and $y$ is the correct probability (0 or 1).

In [7]:
z = torch.rand(3,5,requires_grad= True)
hypothesis = F.softmax(z,dim = 1)
print(hypothesis)
y = torch.randint(5,(3,)).long()
print(y)

tensor([[0.2201, 0.1774, 0.2032, 0.1925, 0.2068],
        [0.2794, 0.1372, 0.2804, 0.1534, 0.1495],
        [0.2203, 0.1962, 0.2866, 0.1599, 0.1370]], grad_fn=<SoftmaxBackward>)
tensor([1, 1, 4])


In [8]:
y_one_hot = torch.zeros_like(hypothesis)
#!!!!!
y_one_hot.scatter_(1,y.unsqueeze(1),1)  #dim = 1, y.unsqueeze(1): (3,) -> (3,1), 1의 값을 뿌리기 
cost = (y_one_hot*(-torch.log(hypothesis))).sum(dim=1).mean()   #차피 1부분만 

# with torch.nn.functional

In [10]:
#softmax,log
print(F.log_softmax(z,dim = 1))
#softmax,log + y -> loss
print(F.nll_loss(F.log_softmax(z,dim=1),y))
#혹은, 간편하게
print(F.cross_entropy(z,y))

tensor([[-1.5135, -1.7293, -1.5936, -1.6478, -1.5760],
        [-1.2752, -1.9861, -1.2714, -1.8746, -1.9003],
        [-1.5129, -1.6286, -1.2497, -1.8329, -1.9878]],
       grad_fn=<LogSoftmaxBackward>)
tensor(1.9011, grad_fn=<NllLossBackward>)
tensor(1.9011, grad_fn=<NllLossBackward>)


# training with F.cross_entropy

In [12]:
x_train = [[1, 2, 1, 1],
           [2, 1, 3, 2],
           [3, 1, 3, 4],
           [4, 1, 5, 5],
           [1, 7, 5, 5],
           [1, 2, 5, 6],
           [1, 6, 6, 6],
           [1, 7, 7, 7]]
y_train = [2, 2, 2, 1, 1, 1, 0, 0]
x_train = torch.FloatTensor(x_train)
y_train = torch.LongTensor(y_train)

In [14]:
W = torch.zeros((4,3),requires_grad = True)
b = torch.zeros(1, requires_grad = True)

optimizer = optim.SGD([W,b], lr = 0.1)
epochs = 1000
for epoch in range(epochs +1):
    z = x_train.matmul(W)+b
    cost = F.cross_entropy(z, y_train)
    #이 함수 안에 softmax, log, y_onehot을 구하여 loss를 구하는 과정이 전부 들어있다.
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    if epoch % 100 ==0:
        print('epoch: {:4d}/{} Cost: {:.6f}'.format(epoch, epochs, cost.item()))
    

epoch:    0/1000 Cost: 1.098612
epoch:  100/1000 Cost: 0.761050
epoch:  200/1000 Cost: 0.689991
epoch:  300/1000 Cost: 0.643229
epoch:  400/1000 Cost: 0.604117
epoch:  500/1000 Cost: 0.568255
epoch:  600/1000 Cost: 0.533922
epoch:  700/1000 Cost: 0.500291
epoch:  800/1000 Cost: 0.466908
epoch:  900/1000 Cost: 0.433507
epoch: 1000/1000 Cost: 0.399962


# High-level Implementation with nn.Module

In [16]:
class SoftmaxClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(4,3) #네개의 input을 받아서 3개의 class에 대한 확률값을 output
    def forward(self,x):
        return self.linear(x)    # (m,4) -> (m,3)


In [18]:
model = SoftmaxClassifier()

In [19]:
optimizer = optim.SGD(model.parameters(), lr = 0.1)
epochs = 1000

for epoch in range(epochs+1):
    #H(x)계산
    prediction = model(x_train) #(m,4) -> (m,3)
    #cos 계산
    cost = F.cross_entropy(prediction, y_train)
    
    #cost로 SGD
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    if epoch % 10 ==0:
        print('epoch: {:4d}/{} cost: {:.6f}'.format(epoch, epochs, cost.item()))

epoch:    0/1000 cost: 1.581921
epoch:   10/1000 cost: 1.144623
epoch:   20/1000 cost: 0.992444
epoch:   30/1000 cost: 0.911484
epoch:   40/1000 cost: 0.861131
epoch:   50/1000 cost: 0.825565
epoch:   60/1000 cost: 0.798587
epoch:   70/1000 cost: 0.777095
epoch:   80/1000 cost: 0.759307
epoch:   90/1000 cost: 0.744129
epoch:  100/1000 cost: 0.730856
epoch:  110/1000 cost: 0.719018
epoch:  120/1000 cost: 0.708289
epoch:  130/1000 cost: 0.698438
epoch:  140/1000 cost: 0.689294
epoch:  150/1000 cost: 0.680729
epoch:  160/1000 cost: 0.672645
epoch:  170/1000 cost: 0.664966
epoch:  180/1000 cost: 0.657629
epoch:  190/1000 cost: 0.650587
epoch:  200/1000 cost: 0.643798
epoch:  210/1000 cost: 0.637231
epoch:  220/1000 cost: 0.630856
epoch:  230/1000 cost: 0.624651
epoch:  240/1000 cost: 0.618597
epoch:  250/1000 cost: 0.612676
epoch:  260/1000 cost: 0.606875
epoch:  270/1000 cost: 0.601180
epoch:  280/1000 cost: 0.595582
epoch:  290/1000 cost: 0.590070
epoch:  300/1000 cost: 0.584637
epoch:  