### **Softmax Function**

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
x_train = torch.FloatTensor([[1,2,1,1],
                             [2,1,3,2],
                             [3,1,3,4],
                             [4,1,5,5],
                             [1,7,5,5],
                             [1,2,5,8],
                             [1,6,6,6],
                             [1,7,7,7]])

y_train = torch.LongTensor([2,2,2,1,1,1,0,0])

In [3]:
class MultiLayerPerceptron(nn.Module):
  def __init__(self):
    super(MultiLayerPerceptron, self).__init__()
    self.linear1 = nn.Linear(4,3)
    self.activation = nn.Sigmoid()

    self.linear2 = nn.Linear(3,3)

  def forward(self, x):
    z1 = self.linear1(x)
    a1 = self.activation(z1)
    z2 = self.linear2(a1)

    return z2

In [17]:
model = MultiLayerPerceptron().train()

In [18]:
optimizer = torch.optim.Adam(params=model.parameters(), lr=1e-3,betas=(0.9, 0.999))

In [None]:
optimizer = optim.SGD(model.parameters(), lr=0.01) # set optimizer

In [20]:
epochs = 10000
model.train()
for epoch in range(epochs):
  logits = model(x_train) # forward propagation

  log_probs = nn.LogSoftmax(dim=1)(logits)
  cost = nn.NLLLoss()(log_probs, y_train) # get cost
  optimizer.zero_grad()
  cost.backward() # backward propagation
  optimizer.step() # update parameters


In [21]:
model.eval()
with torch.no_grad():
  logits = model(x_train)
probs = nn.Softmax(dim=1)(logits)

print('logit\n : {}'.format(logits))
print('predict with softmax\n : {}'.format(probs))
print('predict with argmax\n : {}'.format(torch.argmax(probs,dim=1)))

logit
 : tensor([[-7.6169, -4.7014,  6.2646],
        [-7.7429, -4.3363,  6.0829],
        [-7.8406, -4.2539,  6.0832],
        [-9.8510,  9.6620, -2.5147],
        [-3.5388,  5.8452, -3.5933],
        [-2.7645,  6.7972, -5.6584],
        [ 7.3019, -2.0323, -5.6333],
        [ 7.3408, -2.0607, -5.6406]])
predict with softmax
 : tensor([[9.3608e-07, 1.7280e-05, 9.9998e-01],
        [9.8967e-07, 2.9851e-05, 9.9997e-01],
        [8.9729e-07, 3.2405e-05, 9.9997e-01],
        [3.3542e-09, 9.9999e-01, 5.1487e-06],
        [8.4046e-05, 9.9984e-01, 7.9591e-05],
        [7.0363e-05, 9.9993e-01, 3.8954e-06],
        [9.9991e-01, 8.8345e-05, 2.4115e-06],
        [9.9992e-01, 8.2598e-05, 2.3026e-06]])
predict with argmax
 : tensor([2, 2, 2, 1, 1, 1, 0, 0])


### **MNIST Classifier Model**

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

In [None]:
if torch.cuda.is_available():
  device = torch.device('cuda')
else:
  device = torch.device('cpu')

In [None]:
import torchvision
import torchvision.transforms as transforms

train_dataset = torchvision.datasets.MNIST(root="MNIST_data/",
                                           train=True,
                                           transform=transforms.ToTensor(),
                                           download=True)

test_dataset = torchvision.datasets.MNIST(root="MNIST_data/",
                                          train=False,
                                          transform=transforms.ToTensor(),
                                          download=True)

In [None]:
batch_size = 128

train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size)

In [None]:
class Model(nn.Module):
  def __init__(self):
    super(Model, self).__init__()
    self.linear1 = nn.Linear(784, 784*3)
    self.linear2 = nn.Linear(784*3, 784*2)
    self.linear3 = nn.Linear(784*2, 10)

    self.activation = nn.Sigmoid()

  def forward(self, x):
    z1 = self.linear1(x)
    a1 = self.activation(z1)

    z2 = self.linear2(a1)
    a2 = self.activation(z2)

    z3 = self.linear3(a2)

    return z3

In [None]:
model = Model().to(device).train()

In [None]:
optimizer = optim.SGD(model.parameters(), lr=0.1) # set optimizer

In [None]:
criterion = nn.CrossEntropyLoss()

In [None]:
epochs = 15

model.train()
for epoch in range(epochs):
  avg_cost = 0
  total_batch_num = len(train_dataloader)

  for b_x, b_y in train_dataloader:
    b_x = b_x.view(-1, 28*28).to(device)
    logits = model(b_x) # forward propagation
    loss = criterion(logits, b_y.to(device)) # get cost

    optimizer.zero_grad()
    loss.backward() # backward propagation
    optimizer.step() # update parameters

    avg_cost += loss / total_batch_num

  print('Epoch :{} / {}, cost : {}'.format(epoch+1, epochs, avg_cost))

Epoch :1 / 15, cost : 2.339632511138916
Epoch :2 / 15, cost : 1.4761817455291748
Epoch :3 / 15, cost : 0.7560228705406189
Epoch :4 / 15, cost : 0.5423470139503479
Epoch :5 / 15, cost : 0.45935434103012085
Epoch :6 / 15, cost : 0.41686299443244934
Epoch :7 / 15, cost : 0.3897917866706848
Epoch :8 / 15, cost : 0.3749294579029083
Epoch :9 / 15, cost : 0.3622021973133087
Epoch :10 / 15, cost : 0.3494408428668976
Epoch :11 / 15, cost : 0.34157881140708923
Epoch :12 / 15, cost : 0.33297863602638245
Epoch :13 / 15, cost : 0.32641586661338806
Epoch :14 / 15, cost : 0.3168748617172241
Epoch :15 / 15, cost : 0.31048041582107544


In [None]:
correct = 0
total = 0

model.eval()
for b_x, b_y in test_dataloader:
  b_x = b_x.view(-1, 784).to(device)

  with torch.no_grad():
    logits = model(b_x)
  probs = nn.Softmax(dim=1)(logits)
  
  predicts = torch.argmax(logits, dim=1)

  total += len(b_y)
  correct += (predicts == b_y.to(device)).sum().item()

print(f'Accuracy of the network on test images: {100 * correct // total} %')

Accuracy of the network on test images: 91 %
