<a href="https://colab.research.google.com/github/haizzzi/lecture_notes/blob/main/short_tutorial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Short Tutorial

## Tensor

In [1]:
import torch

x = torch.Tensor(2, 2)
x = torch.Tensor([[1, 2], [3, 4]])

In [2]:
import numpy as np

x = [[1, 2], [3, 4]]
x = np.array(x)
x = torch.from_numpy(x)

$$x=\begin{bmatrix}
1, 2 \\
3, 4
\end{bmatrix}$$

## Autograd

In [3]:
import torch

x = torch.FloatTensor(2, 2)
y = torch.FloatTensor(2, 2)
y.requires_grad_(True)

z = (x + y) + torch.FloatTensor(2, 2)

In [4]:
import torch

x = torch.FloatTensor(2, 2)
y = torch.FloatTensor(2, 2)
y.requires_grad_(True)

with torch.no_grad():
    z = (x + y) + torch.FloatTensor(2, 2)

## Feed-forward

$$\begin{gathered}
y = xW+ b \\
\text{where }x\in\mathbb{R}^{M\times N},W\in\mathbb{R}^{N\times P}\text{ and }b\in\mathbb{R}^P. \\
\text{Thus, }y\in\mathbb{R}^{M\times P}.
\end{gathered}$$

$$\begin{aligned}
y&=f(x; \theta)\text{ where }\theta=\{W, b\}
\end{aligned}$$

In [5]:
import torch

def linear(x, W, b):
    y = torch.mm(x, W) + b

    return y

x = torch.FloatTensor(16, 10)
W = torch.FloatTensor(10, 5)
b = torch.FloatTensor(5)

y = linear(x, W, b)

## nn.Module



In [44]:
import torch
import torch.nn as nn

class MyLinear(nn.Module):

    def __init__(self, input_size, output_size):
        super().__init__()

        self.W = torch.FloatTensor(input_size, output_size)
        self.b = torch.FloatTensor(output_size)

    def forward(self, x):
        y = torch.mm(x, self.W) + self.b

        return y
x = torch.FloatTensor(16, 10)
linear = MyLinear(10, 5)
y = linear(x)
print(y)

params = [p.size() for p in linear.parameters()]
print(params)


tensor([[-4.9250e+06, -3.6932e-09, -4.9546e+06,  1.9619e-09,  5.9238e-09],
        [ 2.8393e-01, -1.0495e-39,  1.2314e-01, -4.9767e-41, -3.8089e-40],
        [ 2.8393e-01, -1.0494e-39,  1.2314e-01, -4.9666e-41, -3.8072e-40],
        [ 2.8393e-01, -1.0494e-39,  1.2314e-01, -4.9511e-41, -3.8040e-40],
        [ 2.8393e-01, -1.0496e-39,  1.2314e-01, -4.9710e-41, -3.8069e-40],
        [ 2.8393e-01, -1.0496e-39,  1.2314e-01, -4.9818e-41, -3.8066e-40],
        [ 2.8393e-01, -1.0493e-39,  1.2314e-01, -4.9525e-41, -3.8048e-40],
        [ 2.8393e-01, -1.0496e-39,  1.2314e-01, -4.9745e-41, -3.8076e-40],
        [ 2.8393e-01, -1.0494e-39,  1.2314e-01, -4.9642e-41, -3.8074e-40],
        [ 2.8393e-01, -1.0496e-39,  1.2314e-01, -4.9733e-41, -3.8073e-40],
        [ 2.8393e-01, -1.0495e-39,  1.2314e-01, -4.9746e-41, -3.8058e-40],
        [ 2.8393e-01, -1.0496e-39,  1.2314e-01, -4.9696e-41, -3.8070e-40],
        [ 2.8393e-01, -1.0492e-39,  1.2314e-01, -4.9696e-41, -3.8051e-40],
        [ 2.8393e-01, -1.

nn.Module 안에 선언된 텐서가 자동으로 파라미터로 등록되려면 반드시 nn.Parameter 로 감싸야 합니다.

지금 코드에서는

self.W = torch.FloatTensor(input_size, output_size)
self.b = torch.FloatTensor(output_size)


로 선언했기 때문에, linear.parameters()에 포함되지 않습니다. 따라서 params는 빈 리스트가 됩니다.

왜 그런가?

nn.Module은 nn.Parameter 타입인 속성만 학습 가능한 파라미터로 인식합니다.

그냥 torch.FloatTensor(...)를 넣으면 일반 텐서일 뿐, requires_grad=True가 아니고 optimizer가 업데이트도 안 합니다.

참고: http://pytorch.org/docs/master/nn.html?highlight=parameter#parameters

In [45]:
class MyLinear(nn.Module):

    def __init__(self, input_size, output_size):
        super(MyLinear, self).__init__()

        self.W = nn.Parameter(torch.FloatTensor(input_size, output_size), requires_grad=True)
        self.b = nn.Parameter(torch.FloatTensor(output_size), requires_grad=True)

    def forward(self, x):
        y = torch.mm(x, self.W) + self.b

        return y
linear = MyLinear(10, 5)
params = [p.size() for p in linear.parameters()]
print(params)


[torch.Size([10, 5]), torch.Size([5])]


In [18]:
class MyLinear(nn.Module):

    def __init__(self, input_size, output_size):
        super(MyLinear, self).__init__()

        self.linear = nn.Linear(input_size, output_size)

    def forward(self, x):
        y = self.linear(x)

        return y

linear = MyLinear(10, 5)
print(linear)

## Backward (Back-propagation)

In [24]:
objective = 100

x = torch.FloatTensor(16, 10)
linear = MyLinear(10, 5)
y = linear(x)
loss = (objective - y.sum())**2

loss.backward()
print(loss)

tensor(inf, grad_fn=<PowBackward0>)


## train() and eval()

In [25]:
# Training...
linear.eval()
# Do some inference process.
linear.train()
# Restart training, again.

MyLinear(
  (linear): Linear(in_features=10, out_features=5, bias=True)
)

## Linear regression example

$$\mathcal{L}_{\text{MSE}}(\hat{y}, y)=\frac{1}{N}\sum^N_{i=1}{(\hat{y}_i - y_i)^2}$$

$$\begin{gathered}
y=f(x_1, x_2, x_3) = 3x_1 + x_2 - 2x_3 \\
\hat{y}=\tilde{f}(x_1,x_2,x_3;\theta) \\
\hat{\theta}=\underset{\theta\in\Theta}{\text{argmin }}\mathcal{L}(\hat{y},y)
\end{gathered}$$

In [42]:
import random

import torch
import torch.nn as nn

class MyModel(nn.Module):

    def __init__(self, input_size, output_size):
        super(MyModel, self).__init__()

        self.linear = nn.Linear(input_size, output_size)

    def forward(self, x):
        y = self.linear(x)

        return y

def ground_truth(x):
    y = 3 * x[:, 0] + x[:, 1] - 2 * x[:, 2]
    return y.unsqueeze(1)  # (B,1)

def train(model, x, y, optim):
    # initialize gradients in all parameters in module.
    optim.zero_grad()

    # feed-forward
    y_hat = model(x)
    # get error between answer and inferenced.
    loss = ((y - y_hat)**2).mean()

    # back-propagation
    loss.backward()

    # one-step of gradient descent
    optim.step()

    return loss.data

batch_size = 64
n_epochs = 100
n_iter = 100

model = MyModel(3, 1)
optim = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0.9)
print(model)

for epoch in range(n_epochs):
    avg_loss = 0

    for i in range(n_iter):
        x = torch.rand(batch_size, 3)
        y = ground_truth(x)  # .data 제거

        loss = train(model, x, y, optim)

        avg_loss += loss
    avg_loss = avg_loss / n_iter

    # simple test sample to check the network.
    x_valid = torch.FloatTensor([[.3, .2, .1]])
    y_valid = ground_truth(x_valid.data)

    model.eval()
    y_hat = model(x_valid)
    model.train()

    print(avg_loss, y_valid.data[0], y_hat.data[0, 0])

    if avg_loss < .001: # finish the training if the loss is smaller than .001.
        break

MyModel(
  (linear): Linear(in_features=3, out_features=1, bias=True)
)
tensor(0.8493) tensor([0.9000]) tensor(0.7928)
tensor(0.4929) tensor([0.9000]) tensor(0.8287)
tensor(0.3552) tensor([0.9000]) tensor(0.8537)
tensor(0.2655) tensor([0.9000]) tensor(0.8805)
tensor(0.1866) tensor([0.9000]) tensor(0.8978)
tensor(0.1347) tensor([0.9000]) tensor(0.9074)
tensor(0.0951) tensor([0.9000]) tensor(0.9176)
tensor(0.0691) tensor([0.9000]) tensor(0.9234)
tensor(0.0508) tensor([0.9000]) tensor(0.9354)
tensor(0.0363) tensor([0.9000]) tensor(0.9334)
tensor(0.0260) tensor([0.9000]) tensor(0.9353)
tensor(0.0193) tensor([0.9000]) tensor(0.9397)
tensor(0.0141) tensor([0.9000]) tensor(0.9386)
tensor(0.0100) tensor([0.9000]) tensor(0.9379)
tensor(0.0074) tensor([0.9000]) tensor(0.9374)
tensor(0.0055) tensor([0.9000]) tensor(0.9371)
tensor(0.0040) tensor([0.9000]) tensor(0.9346)
tensor(0.0030) tensor([0.9000]) tensor(0.9340)
tensor(0.0023) tensor([0.9000]) tensor(0.9319)
tensor(0.0017) tensor([0.9000]) ten

## Use GPU

In [43]:
import torch
import torch.nn as nn

# ----- 공통 설정 -----
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class MyModel(nn.Module):
    def __init__(self, input_size, output_size):
        super().__init__()
        self.linear = nn.Linear(input_size, output_size)
    def forward(self, x):
        return self.linear(x)

def ground_truth(x):
    # x: (..., 3)  -> y: (..., 1)
    y = 3 * x[:, 0] + x[:, 1] - 2 * x[:, 2]
    return y.unsqueeze(1)

def train(model, x, y, optim):
    optim.zero_grad()
    y_hat = model(x)
    loss = ((y - y_hat) ** 2).mean()   # 평균으로 변경(권장)
    loss.backward()
    optim.step()
    return loss.item()

# ----- 모델/옵티마이저 -----
model = MyModel(3, 1).to(device)
optim = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0.9)

# ----- 학습 루프 -----
batch_size = 64
n_epochs = 100
n_iter = 100

for epoch in range(n_epochs):
    avg_loss = 0.0

    for i in range(n_iter):
        # 입력과 타깃을 '처음부터' 같은 device로 생성
        x = torch.rand(batch_size, 3, device=device)
        y = ground_truth(x)  # x가 CUDA면 y도 CUDA로 나옵니다.

        loss = train(model, x, y, optim)
        avg_loss += loss

    avg_loss /= n_iter

    # 검증 샘플도 같은 device로
    x_valid = torch.tensor([[0.3, 0.2, 0.1]], dtype=torch.float32, device=device)
    y_valid = ground_truth(x_valid)
    model.eval()
    with torch.no_grad():
        y_hat = model(x_valid)
    model.train()

    print(avg_loss, y_valid[0].item(), y_hat[0, 0].item())

    if avg_loss < 1e-3:
        break

1.383076980113983 0.9000000357627869 0.506269633769989
0.8366517925262451 0.9000000357627869 0.5701144337654114
0.5775570678710937 0.9000000357627869 0.6318530440330505
0.4101344475150108 0.9000000357627869 0.687428891658783
0.28684134036302567 0.9000000357627869 0.7203226685523987
0.20550175063312054 0.9000000357627869 0.7528856992721558
0.1489201507717371 0.9000000357627869 0.7700124382972717
0.10575040176510811 0.9000000357627869 0.7898146510124207
0.07683575615286826 0.9000000357627869 0.8155608177185059
0.05435170091688633 0.9000000357627869 0.8259813785552979
0.03953284310176969 0.9000000357627869 0.8461651802062988
0.028161329794675113 0.9000000357627869 0.8532272577285767
0.019513621255755426 0.9000000357627869 0.8657689094543457
0.014106674948707223 0.9000000357627869 0.8707885146141052
0.010128388414159417 0.9000000357627869 0.8753489255905151
0.00728272351436317 0.9000000357627869 0.8798295259475708
0.005111966333352029 0.9000000357627869 0.8857614398002625
0.003734011105261