# Import

In [36]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np

In [9]:
# For reproducibility
torch.manual_seed(1)

<torch._C.Generator at 0x24c9aa31050>

# Softmax

In [32]:
z = torch.FloatTensor([1,2,3])

hypothesis = F.softmax(z, dim=0)

print(hypothesis)
print(hypothesis.sum().item())

for i in range(len(z)):
    print(torch.exp(z[i])/torch.exp(z).sum())

tensor([0.0900, 0.2447, 0.6652])
1.0
tensor(0.0900)
tensor(0.2447)
tensor(0.6652)


# Cross Entrophy Loss(Low-level)

In [58]:
z = torch.rand(3,5, requires_grad =True)
h = F.softmax(z, dim = 1)
print(h)

tensor([[0.2586, 0.1934, 0.2074, 0.1558, 0.1848],
        [0.1437, 0.2074, 0.1835, 0.1895, 0.2759],
        [0.2611, 0.2255, 0.1629, 0.1135, 0.2370]], grad_fn=<SoftmaxBackward0>)


In [113]:
y = torch.randint(5,(3,))
print(y)
y_one_hot = torch.zeros_like(h)
y_one_hot.scatter_(1,y.unsqueeze(1),1)

tensor([4, 2, 4])


tensor([[0., 0., 0., 0., 1.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 0., 1.]])

In [115]:
cost = (y_one_hot*-torch.log(h)).sum(dim=1).mean()
print(cost)

tensor(1.6079, grad_fn=<MeanBackward0>)


# Cross-entropy Loss with torch.nn.functional

In [121]:
torch.log(F.softmax(z,dim=1))

tensor([[-1.3524, -1.6428, -1.5731, -1.8593, -1.6887],
        [-1.9400, -1.5730, -1.6955, -1.6634, -1.2878],
        [-1.3430, -1.4893, -1.8144, -2.1762, -1.4396]], grad_fn=<LogBackward0>)

In [122]:
F.log_softmax(z,dim=1)

tensor([[-1.3524, -1.6428, -1.5731, -1.8593, -1.6887],
        [-1.9400, -1.5730, -1.6955, -1.6634, -1.2878],
        [-1.3430, -1.4893, -1.8144, -2.1762, -1.4396]],
       grad_fn=<LogSoftmaxBackward0>)

In [117]:
# Low level
(y_one_hot*-torch.log(F.softmax(z,dim=1))).sum(dim=1).mean()

tensor(1.6079, grad_fn=<MeanBackward0>)

In [123]:
# High level
F.nll_loss(F.log_softmax(z,dim=1), y) 

tensor(1.6079, grad_fn=<NllLossBackward0>)

In [124]:
F.cross_entropy(z,y)

tensor(1.6079, grad_fn=<NllLossBackward0>)

# Training with Low-level Cross Entropy Loss

In [143]:
x_train = [[1,2,1,1],
           [2,1,3,2],
           [3,1,3,4],
           [4,1,5,5],
           [1,7,5,5],
           [1,2,5,6],
           [1,6,6,6],
           [1,7,7,7]]
y_train = [2,2,2,1,1,1,0,0]

x_train = torch.FloatTensor(x_train)
y_train = torch.LongTensor(y_train)

print(x_train.shape)
print(y_train.shape)

torch.Size([8, 4])
torch.Size([8])


In [144]:
W = torch.zeros(4,3, requires_grad=True)
b = torch.zeros(3, requires_grad=True)

print(W)
print(b)     

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]], requires_grad=True)
tensor([0., 0., 0.], requires_grad=True)


In [166]:
optimizer = optim.SGD([W,b], lr = 1)

nb_epochs = 5000

for epoch in range(nb_epochs+1):
    
    z = x_train.matmul(W) + b
    h = torch.softmax(z, dim=1)
    
    y_ont_hot = torch.zeros_like(h)
    y_ont_hot.scatter_(1,y_train.unsqueeze(1), 1)
    
    cost = (y_ont_hot*-torch.log(h)).sum(dim=1).mean()
    
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    if epoch%100 ==0:
        print("Epoch: {:4d}/{}\tCost:{}".format(epoch,nb_epochs,cost.item()))

Epoch:    0/5000	Cost:0.0001869349944172427
Epoch:  100/5000	Cost:0.00018672627629712224
Epoch:  200/5000	Cost:0.00018651754362508655
Epoch:  300/5000	Cost:0.00018629392434377223
Epoch:  400/5000	Cost:0.00018605538934934884
Epoch:  500/5000	Cost:0.00018587647355161607
Epoch:  600/5000	Cost:0.00018568267114460468
Epoch:  700/5000	Cost:0.0001854590664152056
Epoch:  800/5000	Cost:0.0001852652640081942
Epoch:  900/5000	Cost:0.00018507146160118282
Epoch: 1000/5000	Cost:0.00018487765919417143
Epoch: 1100/5000	Cost:0.0001846987579483539
Epoch: 1200/5000	Cost:0.00018450497009325773
Epoch: 1300/5000	Cost:0.00018437078688293695
Epoch: 1400/5000	Cost:0.00018414721125736833
Epoch: 1500/5000	Cost:0.00018398321117274463
Epoch: 1600/5000	Cost:0.00018378940876573324
Epoch: 1700/5000	Cost:0.00018359562091063708
Epoch: 1800/5000	Cost:0.0001834465510910377
Epoch: 1900/5000	Cost:0.00018329746671952307
Epoch: 2000/5000	Cost:0.0001831260451581329
Epoch: 2100/5000	Cost:0.00018293225730303675
Epoch: 2200/5000

In [167]:
y_pred = torch.softmax((x_train[:2].matmul(W)+b), dim=1).max(dim=1)[1]

In [168]:
print(y_pred)
print(y_train[:2])

tensor([2, 2])
tensor([2, 2])
