In [1]:
import torch

## LossFunction

In [2]:
def mse(x_hat, x):
    # |x_hat| = (batch_size, dim)
    # |x| = (batch_size, dim)
    y = ((x - x_hat) ** 2).mean()
    
    return y

In [3]:
x = torch.FloatTensor([[1, 1],
                       [2, 2]])
x_hat = torch.FloatTensor([[0, 0],
                           [0, 0]])
x.shape, x_hat.shape

(torch.Size([2, 2]), torch.Size([2, 2]))

In [4]:
mse(x_hat, x)

tensor(2.5000)

### Predefined MSE in torch

In [5]:
import torch.nn.functional as F

In [6]:
F.mse_loss(x_hat, x)

tensor(2.5000)

In [10]:
F.mse_loss(x_hat, x, reduction='none')

tensor([[1., 1.],
        [4., 4.]])

In [7]:
F.mse_loss(x_hat, x, reduction='sum')

tensor(10.)

In [11]:
F.mse_loss(x_hat, x, reduction='mean')

tensor(2.5000)

In [12]:
import torch.nn as nn

In [13]:
mse_loss = nn.MSELoss()

mse_loss(x_hat, x)

tensor(2.5000)

## Gradient Descent

In [14]:
# grad 대상인가? True
x = torch.FloatTensor([[1, 2],
                       [3, 4]]).requires_grad_(True)

In [15]:
x1 = x + 2
x2 = x - 2
x3 = x1 * x2
y = x3.sum()

x1, x2, x3, y

(tensor([[3., 4.],
         [5., 6.]], grad_fn=<AddBackward0>),
 tensor([[-1.,  0.],
         [ 1.,  2.]], grad_fn=<SubBackward0>),
 tensor([[-3.,  0.],
         [ 5., 12.]], grad_fn=<MulBackward0>),
 tensor(14., grad_fn=<SumBackward0>))

In [16]:
# 미분 진행, auto_grad
y.backward()

In [17]:
x.grad

tensor([[2., 4.],
        [6., 8.]])

In [19]:
x

tensor([[1., 2.],
        [3., 4.]], requires_grad=True)

In [22]:
x3.numpy()

RuntimeError: Can't call numpy() on Tensor that requires grad. Use tensor.detach().numpy() instead.

In [23]:
x3.detach().numpy()

array([[-3.,  0.],
       [ 5., 12.]], dtype=float32)

#### Auto Grad 공부 추가 필요, 왜 x에 반영되는거지?

https://tutorials.pytorch.kr/beginner/blitz/autograd_tutorial.html

### Gradient by torch

In [26]:
import torch.nn.functional as F

In [25]:
target = torch.FloatTensor([[.1, .2, .3],
                            [.4, .5, .6],
                            [.7, .8, .9]])

In [27]:
x = torch.rand_like(target)

x.requires_grad = True

x

tensor([[0.8842, 0.6144, 0.7065],
        [0.0245, 0.7161, 0.8416],
        [0.2346, 0.6684, 0.6371]], requires_grad=True)

In [28]:
loss = F.mse_loss(x, target)

loss

tensor(0.1668, grad_fn=<MseLossBackward0>)

In [30]:
threshold = 1e-5
learning_rate = 1.
iter_cnt = 0

while loss > threshold:
    iter_cnt += 1
    
    loss.backward() # Cal gradient
    
    x = x - learning_rate * x.grad
    
    x.detach_()
    x.requires_grad_(True)
    
    loss = F.mse_loss(x, target)
    
    print('%d-th Loss: %.4e' % (iter_cnt, loss))
    print(x)

1-th Loss: 6.1035e-02
tensor([[0.5744, 0.4507, 0.5459],
        [0.1728, 0.6307, 0.7462],
        [0.4185, 0.7204, 0.7409]], requires_grad=True)
2-th Loss: 3.6922e-02
tensor([[0.4690, 0.3950, 0.4913],
        [0.2233, 0.6017, 0.7137],
        [0.4810, 0.7381, 0.7763]], requires_grad=True)
3-th Loss: 2.2336e-02
tensor([[0.3870, 0.3517, 0.4488],
        [0.2626, 0.5791, 0.6884],
        [0.5297, 0.7518, 0.8038]], requires_grad=True)
4-th Loss: 1.3512e-02
tensor([[0.3232, 0.3180, 0.4157],
        [0.2931, 0.5615, 0.6688],
        [0.5675, 0.7625, 0.8252]], requires_grad=True)
5-th Loss: 8.1738e-03
tensor([[0.2736, 0.2917, 0.3900],
        [0.3169, 0.5478, 0.6535],
        [0.5970, 0.7709, 0.8418]], requires_grad=True)
6-th Loss: 4.9446e-03
tensor([[0.2350, 0.2714, 0.3700],
        [0.3353, 0.5372, 0.6416],
        [0.6199, 0.7773, 0.8547]], requires_grad=True)
7-th Loss: 2.9912e-03
tensor([[0.2050, 0.2555, 0.3544],
        [0.3497, 0.5289, 0.6324],
        [0.6377, 0.7824, 0.8648]], requi