# 9. Softmax Classifier(소프트맥스 분류기)

## 9-1. Softmax Function(소프트맥스 함수)

- Softmax Function(소프트맥스 함수): 머신러닝 모델의 Output(결과)을 확률의 형태로 변환시켜주는 함수
    - 정의: $$ \sigma(z)_j = {e^{z_{j}} \over \Sigma^K_{k=1} e^{z_k}} ,for {j=1,...,K} $$
    - 결과가 나오기 직전, Activation Function(활성화 함수)로 소프트맥스 함수를 사용
    - 무작위 형태의 선형모델 결과값을 소프트맥스 함수를 거치게 하여 확률값으로 변환(값 모두 더할 시 1이됨)
    - 소프트맥스 함수를 통해 확률 부여 가능
    - 소프트맥스 사용 후 Cross-Entropy 레이어를 사용하여 1-HOT Labels(원핫 라벨)를 만들 수 있음

## 9-2. Cross Entropy(손실 함수)

- Cross Entropy: 소프트맥스 함수 사용후 확률화된 결과값을 바탕으로 원핫 라벨을 만들어주는 손실 함수
    - 정의 : $$ L = {1 \over N} \Sigma D(S(w{x_i}+b){y_i}) $$
    - $ D({\hat{Y}}, Y) = -Ylog{\hat{Y}} $
    - 원핫 라벨이 결과로 나옴

In [1]:
# Cross entropy example
# 실제 확률과 유사하게 손실이 나오는 것을 알 수 있음

import numpy as np 
# One hot
# 0: 1 0 0 0
# 1: 0 1 0 0
# 2: 0 0 1 0
Y = np.array([1, 0, 0])

Y_pred1 = np.array([0.7, 0.2, 0.1])
Y_pred2 = np.array([0.1, 0.3, 0.6])
print("loss1 = ", np.sum(-Y * np.log(Y_pred1)))
print("loss2 = ", np.sum(-Y * np.log(Y_pred2)))

loss1 =  0.35667494393873245
loss2 =  2.3025850929940455


In [1]:
# Cross entropy in Pytorch

import torch
import torch.nn as nn
import torchvision
from torch.autograd import Variable
import torch.optim as optim
import torchvision
from torch.utils.data import Dataset, DataLoader

# 1. Softmax + CrossEntropy (LogSoftmax + NLLLoss)
loss = nn.CrossEntropyLoss() # 간단히 모듈 적용

# 2. target is size of nBatch
# each element in target has to have 0 <= value < nClasses(0-2)
# Input is class, not one-hot
Y = Variable(torch.LongTensor([0]), requires_grad=False)
print(Y)

# 3. nput is size of nBatch x nClasses = 1 x 4
# Y_pred are logits (not softmax)
Y_pred1 = Variable(torch.Tensor([2.0, 1.0, 0.1]))
Y_pred2 = Variable(torch.Tensor([0.5, 2.0, 0.3]))

# 4. print(torch.Size(Y_p)
l1 = loss(Y_pred1, Y)
l2 = loss(Y_pred2, Y)

print("PyTorch Loss1 = ", l1.data, "\nPyTorch Loss2 = ", l2.data)

tensor([0])


IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)

## 9-3. Exercise 1: CrossEntropyLoss VS NLLLoss

## 9-4. Exercise 2: Build a classifier for Otto Group Product

In [18]:
# Cross entropy in Pytorch
import torch
import torch.nn as nn
import torchvision
from torch.autograd import Variable
import torch.optim as optim
import torchvision
from torch.utils.data import Dataset, DataLoader

# Softmax + CrossEntropy (LogSoftmax + NLLLoss)
loss = nn.CrossEntropyLoss()

# target is of size nBatch
# each element in target has to have 0 <= value < nClasses(0-2)
# Input is class, not one-hot
Y = Variable(torch.LongTensor([0]), requires_grad=False)
print(Y)
# input is of size nBatch x nClasses = 1 x 4
# Y_pred are logits (not softmax)
Y_pred1 = Variable(torch.Tensor([2.0, 1.0, 0.1]))
Y_pred2 = Variable(torch.Tensor([0.5, 2.0, 0.3]))
#print(torch.Size(Y_p)
l1 = loss(Y_pred1, Y)
l2 = loss(Y_pred2, Y)

print("PyTorch Loss1 = ", l1.data, "\nPyTorch Loss2 = ", l2.data)

SyntaxError: invalid syntax (<ipython-input-18-e90dbb653582>, line 23)

In [20]:
# MNIST Softmax 
import torch
import torch.nn as nn
import torchvision
from torch.autograd import Variable
import torch.optim as optim
import torchvision
from torch.utils.data import Dataset, DataLoader

# 0. Dataloader: Training settings
batch_size = 64
train_loader = torch.utils.data.DataLoader( # Train Set
    datasets.MNIST('../data', train=True, download=True, transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])), batch_size=batch_size, shuffle=True)
train_loader = torch.utils.data.DataLoader( # Test Set
    datasets.MNIST('../data', train=False, transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])), batch_size=batch_size, shuffle=True)

# 1. Neural Network: Design my model by class
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.l1 = nn.Linear(784, 520)
        self.l2 = nn.Linear(520, 320)
        self.l3 = nn.Linear(320, 240)
        self.l4 = nn.Linear(240, 120)
        self.l4 = nn.Linear(120, 10)
    
    def forward(self, x):
        x = x.view(-1, 784) # Flatten the data (n, 1, 28, 28) -> (n, 784)
        x = F.relu(self.l1(x))
        x = F.relu(self.l2(x))
        x = F.relu(self.l3(x))
        x = F.relu(self.l4(x))
        return self.l5(x) # 마지막에는 활성함수 사용 X

model = net()

# 2. Loss Function & Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

# 3. Training Cycle
def train(epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = Variable(data), Variable(target)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.data[0]))

NameError: name 'datasets' is not defined