In [1]:
import numpy as np
import torch
import torch.nn.functional as F
import torch.optim as optim

In [2]:
# for reproducibility
torch.manual_seed(1)

<torch._C.Generator at 0x7fbe95dc7ed0>

#### Cross-entropy Loss with torch.nn.functional

In [4]:
z = torch.rand(3,5, requires_grad=True)
print(z)
hypothesis = F.softmax(z, dim=1)
print(hypothesis)

tensor([[0.4550, 0.5725, 0.4980, 0.9371, 0.6556],
        [0.3138, 0.1980, 0.4162, 0.2843, 0.3398],
        [0.5239, 0.7981, 0.7718, 0.0112, 0.8100]], requires_grad=True)
tensor([[0.1664, 0.1871, 0.1737, 0.2695, 0.2033],
        [0.2002, 0.1783, 0.2218, 0.1944, 0.2054],
        [0.1809, 0.2380, 0.2318, 0.1084, 0.2409]], grad_fn=<SoftmaxBackward0>)


In [11]:
hypothesis.sum()

tensor(3., grad_fn=<SumBackward0>)

In [12]:
hypothesis.sum(-1)

tensor([1., 1., 1.], grad_fn=<SumBackward1>)

In [13]:
hypothesis.sum(0)

tensor([0.5475, 0.6034, 0.6273, 0.5722, 0.6496], grad_fn=<SumBackward1>)

In [14]:
y = torch.randint(5,(3,)).long()
print(y)
y_one_hot = torch.zeros_like(hypothesis)
print(y_one_hot)
y_one_hot.scatter_(1, y.unsqueeze(1), 1)
print(y_one_hot)

tensor([3, 2, 3])
tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]])
tensor([[0., 0., 0., 1., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.]])


In [9]:
z

tensor([[0.4550, 0.5725, 0.4980, 0.9371, 0.6556],
        [0.3138, 0.1980, 0.4162, 0.2843, 0.3398],
        [0.5239, 0.7981, 0.7718, 0.0112, 0.8100]], requires_grad=True)

In [8]:
# Low level
torch.log(F.softmax(z, dim=1))

tensor([[-1.7935, -1.6760, -1.7504, -1.3114, -1.5929],
        [-1.6086, -1.7244, -1.5062, -1.6381, -1.5826],
        [-1.7096, -1.4354, -1.4617, -2.2223, -1.4236]], grad_fn=<LogBackward0>)

In [15]:
torch.log(F.softmax(z, dim=1)).sum(dim=1).mean()

tensor(-8.1455, grad_fn=<MeanBackward0>)

In [16]:
# High Level
F.nll_loss(F.log_softmax(z,dim=1), y.long())

tensor(1.6800, grad_fn=<NllLossBackward0>)

In [17]:
# pytorch also has F.cross_entropy that combines F.log_softmax() and F.nll_loss()
F.cross_entropy(z, y)

tensor(1.6800, grad_fn=<NllLossBackward0>)

#### Data

In [19]:
xy = np.loadtxt('data-04-zoo.csv', delimiter=',', dtype=np.float32)
print(xy)

[[1. 0. 0. ... 0. 1. 0.]
 [1. 0. 0. ... 0. 1. 0.]
 [0. 0. 1. ... 0. 0. 3.]
 ...
 [1. 0. 0. ... 0. 1. 0.]
 [0. 0. 1. ... 0. 0. 6.]
 [0. 1. 1. ... 0. 0. 1.]]


In [20]:
x_train = torch.FloatTensor(xy[:,0:-1])
y_train = torch.LongTensor(xy[:,[-1]]).squeeze()
print(x_train.shape)
print(len(x_train))
print(x_train[:5])

torch.Size([101, 16])
101
tensor([[1., 0., 0., 1., 0., 0., 1., 1., 1., 1., 0., 0., 4., 0., 0., 1.],
        [1., 0., 0., 1., 0., 0., 0., 1., 1., 1., 0., 0., 4., 1., 0., 1.],
        [0., 0., 1., 0., 0., 1., 1., 1., 1., 0., 0., 1., 0., 1., 0., 0.],
        [1., 0., 0., 1., 0., 0., 1., 1., 1., 1., 0., 0., 4., 0., 0., 1.],
        [1., 0., 0., 1., 0., 0., 1., 1., 1., 1., 0., 0., 4., 1., 0., 1.]])


In [21]:
nb_classes = 7
y_one_hot = torch.zeros((len(y_train), nb_classes))
y_one_hot = y_one_hot.scatter(1, y_train.unsqueeze(1), 1)

#### Training with F.cross_entropy

In [22]:
W = torch.zeros((16,7), requires_grad=True)
b = torch.zeros(1, requires_grad=True)

# optimizer
optimizer = optim.SGD([W,b], lr=0.1)
nb_epochs = 1000
for epoch in range(nb_epochs + 1):
    z = x_train.matmul(W) + b
    cost = F.cross_entropy(z, y_train)

    # cost
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    if epoch % 100 == 0:
        print('Epoch {:4d}/{} Cost: {:.6f}'.format(epoch, nb_epochs, cost.item()))

Epoch    0/1000 Cost: 1.945910
Epoch  100/1000 Cost: 0.471836
Epoch  200/1000 Cost: 0.326327
Epoch  300/1000 Cost: 0.257839
Epoch  400/1000 Cost: 0.215762
Epoch  500/1000 Cost: 0.186603
Epoch  600/1000 Cost: 0.164898
Epoch  700/1000 Cost: 0.147955
Epoch  800/1000 Cost: 0.134279
Epoch  900/1000 Cost: 0.122962
Epoch 1000/1000 Cost: 0.113422
