# Lab 06. Softmax Classification

**Import**

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
# for reproducibility
torch.manual_seed(1)

<torch._C.Generator at 0x7f6a540bac10>

**Softmax**

In [3]:
z = torch.FloatTensor([1, 2, 3])

In [4]:
hypothesis = F.softmax(z, dim=0)
print(hypothesis)

tensor([0.0900, 0.2447, 0.6652])


In [5]:
hypothesis.sum()

tensor(1.)

**Cross Entropy Loss**(Low-Level)

In [6]:
z = torch.rand(3, 5, requires_grad=True)
hypothesis = F.softmax(z, dim=1)
print(hypothesis)

tensor([[0.2645, 0.1639, 0.1855, 0.2585, 0.1277],
        [0.2430, 0.1624, 0.2322, 0.1930, 0.1694],
        [0.2226, 0.1986, 0.2326, 0.1594, 0.1868]], grad_fn=<SoftmaxBackward0>)


In [7]:
y = torch.randint(5, (3,)).long()
print(y)

tensor([0, 2, 1])


In [8]:
y_one_hot = torch.zeros_like(hypothesis)
y_one_hot.scatter_(1, y.unsqueeze(1), 1)

tensor([[1., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 1., 0., 0., 0.]])

In [9]:
cost = (y_one_hot * -torch.log(hypothesis)).sum(dim=1).mean()
print(cost)

tensor(1.4689, grad_fn=<MeanBackward0>)


**Cross Entropy Loss with `torch.nn.functional`**

In [10]:
# low level
torch.log(F.softmax(z, dim=1))

tensor([[-1.3301, -1.8084, -1.6846, -1.3530, -2.0584],
        [-1.4147, -1.8174, -1.4602, -1.6450, -1.7758],
        [-1.5025, -1.6165, -1.4586, -1.8360, -1.6776]], grad_fn=<LogBackward0>)

In [11]:
# high level
F.log_softmax(z, dim=1)

tensor([[-1.3301, -1.8084, -1.6846, -1.3530, -2.0584],
        [-1.4147, -1.8174, -1.4602, -1.6450, -1.7758],
        [-1.5025, -1.6165, -1.4586, -1.8360, -1.6776]],
       grad_fn=<LogSoftmaxBackward0>)

In [12]:
# low level
(y_one_hot * -torch.log(F.softmax(z, dim=1))).sum(dim=1).mean()

tensor(1.4689, grad_fn=<MeanBackward0>)

In [16]:
#high level
F.nll_loss(F.log_softmax(z, dim=1), y) # NLL = Negative Log Liklihood

tensor(1.4689, grad_fn=<NllLossBackward0>)

In [17]:
F.cross_entropy(z, y)

tensor(1.4689, grad_fn=<NllLossBackward0>)

**Training with Low-level Cross Entropy Loss**

In [18]:
x_train = [[1, 2, 1, 1],
           [2, 1, 3, 2],
           [3, 1, 3, 4],
           [4, 1, 5, 5],
           [1, 7, 5, 5],
           [1, 2, 5, 6],
           [1, 6, 6, 6],
           [1, 7, 7, 7]]
y_train = [2, 2, 2, 1, 1, 1, 0, 0]
x_train = torch.FloatTensor(x_train)
y_train = torch.LongTensor(y_train)

In [24]:
# model 초기화
w = torch.zeros((4, 3), requires_grad=True)
b = torch.zeros(1, requires_grad=True)

# optimizer 설정
optimizer = optim.SGD([w, b], lr=0.1)

nb_epochs = 1000
for epoch in range(nb_epochs + 1):
    
    # cost 계산 (1)
    hypothesis = F.softmax(x_train.matmul(w)+b, dim=1)
    y_one_hot = torch.zeros_like(hypothesis)
    y_one_hot.scatter_(1, y_train.unsqueeze(1), 1)
    cost = (y_one_hot * -torch.log(F.softmax(hypothesis, dim=1))).sum(dim=1).mean()
    
    # cost로 H(x) 계산
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    # 100번마다 로그 출력
    if epoch % 100 == 0:
        print(f'Epoch {epoch:4d}/{nb_epochs}  Cost:{cost.item()}')

Epoch    0/1000  Cost:1.0986123085021973
Epoch  100/1000  Cost:0.901534914970398
Epoch  200/1000  Cost:0.839113712310791
Epoch  300/1000  Cost:0.8078263998031616
Epoch  400/1000  Cost:0.7884716391563416
Epoch  500/1000  Cost:0.7748216390609741
Epoch  600/1000  Cost:0.7644491195678711
Epoch  700/1000  Cost:0.7561913132667542
Epoch  800/1000  Cost:0.7493984699249268
Epoch  900/1000  Cost:0.743671178817749
Epoch 1000/1000  Cost:0.7387485504150391


**Training with `F.cross_entropy`**

In [25]:
# model 초기화
w = torch.zeros((4, 3), requires_grad=True)
b = torch.zeros(1, requires_grad=True)

# optimizer 설정
optimizer = optim.SGD([w, b], lr=0.1)

nb_epochs = 1000
for epoch in range(nb_epochs + 1):
    
    # cost 계산 (2)
    z = x_train.matmul(w) + b
    cost = F.cross_entropy(z, y_train)
    
    # cost로 H(x) 개선
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    # 100번마다 로그 출력
    if epoch % 100 == 0:
        print(f'Epoch {epoch:4d}/{nb_epochs}  Cost:{cost.item()}')

Epoch    0/1000  Cost:1.0986123085021973
Epoch  100/1000  Cost:0.761050283908844
Epoch  200/1000  Cost:0.6899911165237427
Epoch  300/1000  Cost:0.6432289481163025
Epoch  400/1000  Cost:0.6041170358657837
Epoch  500/1000  Cost:0.5682554244995117
Epoch  600/1000  Cost:0.5339219570159912
Epoch  700/1000  Cost:0.500291109085083
Epoch  800/1000  Cost:0.46690812706947327
Epoch  900/1000  Cost:0.43350690603256226
Epoch 1000/1000  Cost:0.399962455034256


**High-level Implemetation with `nn.Module`**

In [26]:
class SoftmaxClassifierModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(4, 3)
        
    def forward(self, x):
        return self.linear(x)

In [27]:
model = SoftmaxClassifierModel()

In [28]:
# set optimizer
optimizer = optim.SGD(model.parameters(), lr=0.1)

nb_epochs = 1000
for epoch in range(nb_epochs + 1):
    
    # H(x) 계산
    prediction = model(x_train)
    
    # cost 계산
    cost = F.cross_entropy(prediction, y_train)
    
    # cost로 H(x) 개선
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    # 100번마다 로그 출력
    if epoch % 100 == 0:
        print(f'Epoch {epoch:4d}/{nb_epochs}  Cost:{cost.item()}')

Epoch    0/1000  Cost:1.8495129346847534
Epoch  100/1000  Cost:0.689894437789917
Epoch  200/1000  Cost:0.6092584729194641
Epoch  300/1000  Cost:0.5512181520462036
Epoch  400/1000  Cost:0.5001410841941833
Epoch  500/1000  Cost:0.4519471526145935
Epoch  600/1000  Cost:0.40505102276802063
Epoch  700/1000  Cost:0.3587331473827362
Epoch  800/1000  Cost:0.3129115402698517
Epoch  900/1000  Cost:0.26952147483825684
Epoch 1000/1000  Cost:0.2419215440750122
