### 1. Pytorch로 Softmax Regression 구현

In [1]:
import torch
x_train = torch.FloatTensor([ [1,2,1,1], [2,1,3,2], [3,1,3,4], [4,1,5,5], [1,7,5,5], [1,2,5,6], [1,6,6,6], [1,7,7,7] ]) 
y_train = torch.FloatTensor([ [0,0,1], [0,0,1], [0,0,1], [0,1,0], [0,1,0], [0,1,0], [1,0,0], [1,0,0] ])

In [2]:
W = torch.zeros(4, 3, requires_grad = True)
b = torch.zeros(1, 3, requires_grad = True)

optimizer = torch.optim.Adam([W, b], lr = 0.1)

for epoch in range(3001):
    hypothesis = torch.softmax(torch.mm(x_train, W) + b, dim = 1)
    # hypothesis = (torch.mm(x_train, W)+b).softmax(dim=1)
    cost = -torch.mean(torch.sum(y_train * torch.log(hypothesis), dim = 1))
    # cost = -(y_train * torch.log(hypothesis)).sum(dim=1).mean()
    
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    if epoch % 100 == 0:
        print("epoch: {}, cost: {:.6f}".format(epoch, cost.item()))

epoch: 0, cost: 1.098612
epoch: 100, cost: 0.279874
epoch: 200, cost: 0.162209
epoch: 300, cost: 0.105263
epoch: 400, cost: 0.074083
epoch: 500, cost: 0.055093
epoch: 600, cost: 0.042634
epoch: 700, cost: 0.034003
epoch: 800, cost: 0.027768
epoch: 900, cost: 0.023111
epoch: 1000, cost: 0.019535
epoch: 1100, cost: 0.016727
epoch: 1200, cost: 0.014479
epoch: 1300, cost: 0.012650
epoch: 1400, cost: 0.011140
epoch: 1500, cost: 0.009879
epoch: 1600, cost: 0.008813
epoch: 1700, cost: 0.007905
epoch: 1800, cost: 0.007124
epoch: 1900, cost: 0.006447
epoch: 2000, cost: 0.005856
epoch: 2100, cost: 0.005338
epoch: 2200, cost: 0.004881
epoch: 2300, cost: 0.004475
epoch: 2400, cost: 0.004113
epoch: 2500, cost: 0.003789
epoch: 2600, cost: 0.003499
epoch: 2700, cost: 0.003236
epoch: 2800, cost: 0.002999
epoch: 2900, cost: 0.002784
epoch: 3000, cost: 0.002588


In [3]:
W.requires_grad_(False) 
b.requires_grad_(False)
x_test = torch.FloatTensor([[1,11,10,9], [1,3,4,3], [1,1,0,1]]) 
test_all = (torch.mm(x_test, W)+b).softmax(dim=1) 
print(test_all) 
print(torch.argmax(test_all, dim=1))

tensor([[1.0000e+00, 5.5165e-19, 7.0149e-38],
        [1.4799e-02, 7.4294e-01, 2.4226e-01],
        [1.2256e-33, 9.0835e-12, 1.0000e+00]])
tensor([0, 1, 2])


### 2. 조금 더 깔끔하게 Softmax
    - 마음에 안드는 부분 1. [1,0,0], [0,1,0], [0,0,1] 대신 0, 1, 2를 쓰면 안되나?
    - 마음에 안드는 부분 2. 이렇게 복잡한 함수를 항상 직접 구현해야하나? 어차피 softmax, cross entropy인데?

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F

x_train = torch.FloatTensor([ [1,2,1,1], [2,1,3,2], [3,1,3,4], [4,1,5,5], [1,7,5,5], [1,2,5,6], [1,6,6,6], [1,7,7,7] ]) 
y_train = torch.LongTensor([2,2,2,1,1,1,0,0]) # longtensor? onehot-encoding 된 것을 0,1,2로?

In [5]:
model = nn.Linear(4,3) 
optimizer = torch.optim.Adam(model.parameters(),lr=1)

for epoch in range(3001):
    z = model(x_train)
    cost = F.cross_entropy(z, y_train) # 주의! F.cross_entropy 는 softmax와 cross entropy를 합친 것.
    
    
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    if epoch % 100 == 0:
        print("epoch: {}, cost: {:.6f}".format(epoch, cost.item()))

epoch: 0, cost: 1.831898
epoch: 100, cost: 0.061686
epoch: 200, cost: 0.032587
epoch: 300, cost: 0.020433
epoch: 400, cost: 0.014072
epoch: 500, cost: 0.010327
epoch: 600, cost: 0.007929
epoch: 700, cost: 0.006296
epoch: 800, cost: 0.005129
epoch: 900, cost: 0.004264
epoch: 1000, cost: 0.003603
epoch: 1100, cost: 0.003085
epoch: 1200, cost: 0.002672
epoch: 1300, cost: 0.002335
epoch: 1400, cost: 0.002058
epoch: 1500, cost: 0.001826
epoch: 1600, cost: 0.001631
epoch: 1700, cost: 0.001464
epoch: 1800, cost: 0.001320
epoch: 1900, cost: 0.001195
epoch: 2000, cost: 0.001086
epoch: 2100, cost: 0.000991
epoch: 2200, cost: 0.000907
epoch: 2300, cost: 0.000832
epoch: 2400, cost: 0.000765
epoch: 2500, cost: 0.000705
epoch: 2600, cost: 0.000651
epoch: 2700, cost: 0.000603
epoch: 2800, cost: 0.000559
epoch: 2900, cost: 0.000519
epoch: 3000, cost: 0.000483


### 3. Softmax Regression in Sklearn

sklearn에는 LogisticRegression에 Softmax regression이 함께 구현됨  
⇒ y에 두 종류 이상의 값이 있을 경우 softmax regression 실행

In [6]:
import numpy as np 
from sklearn.linear_model import LogisticRegression


x_train = np.array([ [1,2,1,1], [2,1,3,2], [3,1,3,4], [4,1,5,5], 
                    [1,7,5,5], [1,2,5,6], [1,6,6,6], [1,7,7,7] ])
y_train = np.array([ 2, 2, 2, 1, 1, 1, 0, 0 ])  # y에 0, 1, 2 등 둘 이상의 class가 존재 => softmax regression 

logistic = LogisticRegression() # 모델 생성 
logistic.fit(x_train, y_train) # 학습
pred = logistic.predict([[1,11,10,9], [1,3,4,3], [1,1,0,1]]) # test case (값 예측) 
print(pred) # 출력

[0 2 2]
