# Ch.03. Hello PyTorch

refs) https://github.com/kh-kim/nlp_with_pytorch_examples/blob/master/chapter-03/short_tutorial.ipynb

In [0]:
import torch
import torch.nn as nn

## 3.3.2. Autograd

In [4]:
x = torch.FloatTensor(2,2)
print (x)
y = torch.FloatTensor(2,2)
print (y)
y.requires_grad_(True)

z = (x+y) + torch.FloatTensor(2,2)
print (z)

tensor([[1.2190e-35, 0.0000e+00],
        [1.6675e-43, 5.6052e-44]])
tensor([[1.2183e-35, 0.0000e+00],
        [5.6052e-45, 0.0000e+00]])
tensor([[3.6562e-35, 0.0000e+00],
        [1.7796e-43, 5.6052e-44]], grad_fn=<AddBackward0>)


In [5]:
x = torch.FloatTensor(2,2)
print (x)
y = torch.FloatTensor(2,2)
print (y)
y.requires_grad_(True)

with torch.no_grad() :
    z = (x+y) + torch.FloatTensor(2,2)
    print (z)

tensor([[1.2185e-35, 0.0000e+00],
        [1.5975e-43, 1.3873e-43]])
tensor([[1.2183e-35, 0.0000e+00],
        [5.6052e-45, 0.0000e+00]])
tensor([[3.6553e-35, 0.0000e+00],
        [1.7096e-43, 1.3873e-43]])


## 3.3.3. Feed-Forward

In [12]:
def linear(x, w, b) : return torch.mm(x, w) + b

x = torch.FloatTensor(16, 10)
w = torch.FloatTensor(10, 5)
b = torch.FloatTensor(5)

y = linear(x, w, b)

print (y)

tensor([[-2.7243e-41,  0.0000e+00, -1.3083e-41,  5.6052e-44, -1.3081e-41],
        [-2.6469e-41,  0.0000e+00,  1.6675e-43,  5.6052e-44,  1.6816e-43],
        [-3.8988e-41,  0.0000e+00, -1.3088e-41,  5.6052e-44, -1.3087e-41],
        [-2.5649e-41,  0.0000e+00, -6.8578e-41,  5.6052e-44,  1.6816e-43],
        [-2.6101e-41,  0.0000e+00, -8.3515e-41,  5.6052e-44, -1.2696e-41],
        [-1.3258e-41,  0.0000e+00, -6.8571e-41,  5.6052e-44,  1.6816e-43],
        [-2.5732e-41,  0.0000e+00, -8.3545e-41,  5.6052e-44, -1.2699e-41],
        [-1.2867e-41,  0.0000e+00, -8.1159e-41,  5.6052e-44, -1.2699e-41],
        [-2.6592e-41,  0.0000e+00, -1.3130e-41,  5.6052e-44, -1.3129e-41],
        [-1.3255e-41,  0.0000e+00, -7.4590e-41,  5.6052e-44,  1.6816e-43],
        [-1.3253e-41,  0.0000e+00, -6.8298e-41,  5.6052e-44,  1.6816e-43],
        [-2.6564e-41,  0.0000e+00,  1.6675e-43,  5.6052e-44,  1.6816e-43],
        [-2.6123e-41,  0.0000e+00, -1.2699e-41,  5.6052e-44, -1.2697e-41],
        [-3.8597e-41,  0.

## 3.3.4. nn.Module

In [0]:
import torch.nn as nn

class MyLinear(nn.Module) :
    def __init__(self, input_size, output_size) :
        super().__init__()
        self.w = torch.FloatTensor(input_size, output_size)
        self.b = torch.FloatTensor(output_size)

    def forward(self, x) :
        return torch.mm(x, self.w) + self.b

In [14]:
x = torch.FloatTensor(16, 10)
linear = MyLinear(10, 5)
print (linear(x))

tensor([[ 1.2189e-35,  0.0000e+00,  1.6816e-43,  5.6052e-44,  1.5975e-43],
        [ 1.2189e-35,  0.0000e+00, -6.5389e-41,  5.6052e-44,  1.5975e-43],
        [ 1.2189e-35,  0.0000e+00, -7.7420e-41,  5.6052e-44, -1.0703e-41],
        [ 1.2189e-35,  0.0000e+00, -1.0504e-41,  5.6052e-44, -1.0513e-41],
        [ 1.2189e-35,  0.0000e+00, -7.6256e-41,  5.6052e-44, -1.0705e-41],
        [ 1.2189e-35,  0.0000e+00, -1.0560e-41,  5.6052e-44, -1.0569e-41],
        [ 1.2189e-35,  0.0000e+00, -7.6842e-41,  5.6052e-44, -1.0703e-41],
        [ 1.2189e-35,  0.0000e+00, -6.5724e-41,  5.6052e-44,  1.5975e-43],
        [ 1.2189e-35,  0.0000e+00, -7.6109e-41,  5.6052e-44, -1.0566e-41],
        [ 1.2189e-35,  0.0000e+00, -7.6403e-41,  5.6052e-44, -1.0513e-41],
        [ 1.2189e-35,  0.0000e+00, -7.6107e-41,  5.6052e-44, -1.0567e-41],
        [ 1.2189e-35,  0.0000e+00, -6.5379e-41,  5.6052e-44,  1.5975e-43],
        [ 1.2189e-35,  0.0000e+00, -7.7937e-41,  5.6052e-44, -1.0564e-41],
        [ 1.2189e-35,  0.

위 함수에서 w, b 선언 방식 문제..

`parameters()` 함수를 통해 모듈 내 선언된, 학습이 필요한 parameter 반환

신경망의 학습 parameters는 단순한 tensor가 아니기 때문에, parameters로 등록해야 함

In [15]:
params = [p.size() for p in linear.parameters()]
print (params)

[]


In [0]:
class MyLinear(nn.Module) :
    def __init__(self, input_size, output_size) :
        super(MyLinear, self).__init__()
        self.w = nn.Parameter( torch.FloatTensor(input_size, output_size), requires_grad = True)
        self.b = nn.Parameter (torch.FloatTensor(output_size), requires_grad = True)

    def forward(self, x) :
        return torch.mm(x, self.w) + self.b

In [17]:
linear = MyLinear(10, 5)
print (linear(x))

tensor([[ 1.2187e-35,  0.0000e+00, -1.0696e-41,  0.0000e+00,         nan],
        [ 1.2187e-35,  0.0000e+00, -1.0696e-41,  0.0000e+00,         nan],
        [ 1.2187e-35,  0.0000e+00,  4.0638e-44,  0.0000e+00,         nan],
        [ 1.2187e-35,  0.0000e+00, -1.0636e-41,  0.0000e+00,         nan],
        [ 1.2187e-35,  0.0000e+00, -1.0634e-41,  0.0000e+00,         nan],
        [ 1.2187e-35,  0.0000e+00, -1.0637e-41,  0.0000e+00,         nan],
        [ 1.2187e-35,  0.0000e+00,  4.0638e-44,  0.0000e+00,         nan],
        [ 1.2187e-35,  0.0000e+00, -1.0826e-41,  0.0000e+00,         nan],
        [ 1.2187e-35,  0.0000e+00, -1.0636e-41,  0.0000e+00,         nan],
        [ 1.2187e-35,  0.0000e+00, -1.0692e-41,  0.0000e+00,         nan],
        [ 1.2187e-35,  0.0000e+00, -1.0637e-41,  0.0000e+00,         nan],
        [ 1.2187e-35,  0.0000e+00, -1.0829e-41,  0.0000e+00,         nan],
        [ 1.2187e-35,  0.0000e+00,  4.0638e-44,  0.0000e+00,         nan],
        [ 1.2187e-35,  0.

In [18]:
print ([p.size() for p in linear.parameters()])

[torch.Size([10, 5]), torch.Size([5])]


In [21]:
[p for p in linear.parameters()]

[Parameter containing:
 tensor([[2.0143e-36, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
         [0.0000e+00, 0.0000e+00, 0.0000e+00, 3.5733e-43, 0.0000e+00],
         [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
         [0.0000e+00, 3.5733e-43, 0.0000e+00, 3.6013e-43, 0.0000e+00],
         [2.0143e-36, 0.0000e+00, 2.0143e-36, 0.0000e+00, 3.5733e-43],
         [0.0000e+00, 7.8935e-42, 0.0000e+00, 2.0143e-36, 0.0000e+00],
         [2.0143e-36, 0.0000e+00, 3.5733e-43, 0.0000e+00, 0.0000e+00],
         [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
         [3.5733e-43, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
         [0.0000e+00, 0.0000e+00, 0.0000e+00, 3.5733e-43, 0.0000e+00]],
        requires_grad=True), Parameter containing:
 tensor([1.2187e-35, 0.0000e+00, 4.0638e-44, 0.0000e+00,        nan],
        requires_grad=True)]

가장 깔끔한 아래 코드를 사용


In [22]:
class MyLinear(nn.Module) : 
    def __init__(self, input_size, output_size) : 
        super(MyLinear, self).__init__()
        self.linear = nn.Linear(input_size, output_size)

    def forward(self, x) :
        return self.linear(x)

linear = MyLinear(10,5)

linear(x)

tensor([[-2.9471e-01,  9.6675e-02, -8.8960e-02, -6.2746e-02,  2.5093e-01],
        [-2.9471e-01,  9.6673e-02, -8.8963e-02, -6.2747e-02,  2.5093e-01],
        [-2.9471e-01,  9.6673e-02, -8.8961e-02, -6.2748e-02,  2.5093e-01],
        [-2.9471e-01,  9.6674e-02, -8.8962e-02, -6.2744e-02,  2.5093e-01],
        [-2.9471e-01,  9.6674e-02, -8.8962e-02, -6.2747e-02,  2.5093e-01],
        [-2.9471e-01,  9.6674e-02, -8.8962e-02, -6.2744e-02,  2.5093e-01],
        [-2.9471e-01,  9.6673e-02, -8.8961e-02, -6.2748e-02,  2.5093e-01],
        [-2.9471e-01,  9.6673e-02, -8.8963e-02, -6.2747e-02,  2.5093e-01],
        [-2.9471e-01,  9.6674e-02, -8.8962e-02, -6.2747e-02,  2.5093e-01],
        [-2.9471e-01,  9.6673e-02, -8.8963e-02, -6.2746e-02,  2.5093e-01],
        [-2.9471e-01,  9.6674e-02, -8.8962e-02, -6.2746e-02,  2.5093e-01],
        [-2.9471e-01,  9.6673e-02, -8.8963e-02, -6.2747e-02,  2.5093e-01],
        [-2.9471e-01,  9.6672e-02, -8.8962e-02, -6.2746e-02,  2.5093e-01],
        [-2.9471e-01,  9.

In [23]:
[p.size() for p in linear.parameters()]

[torch.Size([5, 10]), torch.Size([5])]

In [25]:
[p for p in linear.parameters()]

[Parameter containing:
 tensor([[ 0.3122,  0.0976,  0.0400,  0.1676, -0.2270,  0.2367,  0.0423,  0.0965,
           0.0543, -0.2136],
         [ 0.2491,  0.1351,  0.1358, -0.2782, -0.1128,  0.2338,  0.1164,  0.2237,
           0.1390, -0.1501],
         [ 0.1598, -0.2440,  0.2555, -0.2672,  0.2168,  0.2072,  0.0357,  0.1776,
           0.2133,  0.2402],
         [-0.0828,  0.0682,  0.2532, -0.0675, -0.1221, -0.2264, -0.2131, -0.1212,
          -0.2197,  0.2352],
         [-0.0745,  0.2733, -0.2222,  0.1824, -0.2569, -0.0230,  0.2870,  0.0790,
          -0.2390,  0.2252]], requires_grad=True), Parameter containing:
 tensor([-0.2947,  0.0967, -0.0890, -0.0627,  0.2509], requires_grad=True)]

In [24]:
linear

MyLinear(
  (linear): Linear(in_features=10, out_features=5, bias=True)
)

## 3.3.5. Back-Propagation

In [0]:
objective = 100

x = torch.FloatTensor(16, 10)
linear = MyLinear(10, 5)
y = linear(x)
loss = (objective - y.sum()) ** 2

loss.backward()


In [27]:
loss

tensor(nan, grad_fn=<PowBackward0>)

## 3.3.6. train() & eval()

- `train()` : default,
- `eval()` : inference mode, dropout, batch-normalization 등 학습과 추론 모드에서 서로 다른 forward() 동작을 하는 모듈들에 대해서 각 상황에 따라 동작

In [28]:
linear.eval()

MyLinear(
  (linear): Linear(in_features=10, out_features=5, bias=True)
)

In [29]:
linear.train()

MyLinear(
  (linear): Linear(in_features=10, out_features=5, bias=True)
)

## 3.3.7. an example of linear regression

In [2]:
import random

class MyModel(nn.Module) : 
    def __init__(self, input_size, output_size) : 
        super(MyModel, self).__init__()

        self.linear = nn.Linear(input_size, output_size)

    def forward(self, x) : 
        return self.linear(x)

#임의의 함수f(x) = 3x1 + 1x2 - 2x3
def ground_truth (x) :
    return 3 * x[:,0] + x[:,1] -2 * x[:, 2]

def train(model, x, y, optim):
    optim.zero_grad()
    y_hat = model(x)
    loss = ((y-y_hat) ** 2).sum() / x.size(0)
    loss.backward()
    optim.step()
    return loss.data

BATCH_SIZE = 1
N_EPOCHS = 1000
N_ITER = 10000

model = MyModel(3, 1)
optim = torch.optim.SGD(model.parameters(), lr = 1e-4, momentum=0.1)

print (model)

from time import time

start = time()

for epoch in range(N_EPOCHS) :
    print (f"time : {time()-start}")
    avg_loss = 0

    for i in range(N_ITER) : 
        x = torch.rand(BATCH_SIZE, 3)
        y = ground_truth(x.data)

        loss = train(model, x, y, optim)

        avg_loss += loss
    avg_loss /= N_ITER

    x_valid = torch.FloatTensor([[.3, .2, .1]])
    y_valid = ground_truth(x_valid.data)

    model.eval()
    y_hat = model(x_valid)

    model.train()
    print (f'avg_loss : {avg_loss} y_valid.data[0] : {y_valid.data[0]} y_hat.data[0,0] : {y_hat.data[0,0]}')

    if avg_loss < 1e-3: break

print (f"total time : {time()-start}")

MyModel(
  (linear): Linear(in_features=3, out_features=1, bias=True)
)
time : 0.00013566017150878906
avg_loss : 1.142147421836853 y_valid.data[0] : 0.9000000357627869 y_hat.data[0,0] : 0.6743265390396118
time : 4.211481094360352
avg_loss : 0.5816097259521484 y_valid.data[0] : 0.9000000357627869 y_hat.data[0,0] : 0.7645766735076904
time : 8.101830244064331
avg_loss : 0.39970049262046814 y_valid.data[0] : 0.9000000357627869 y_hat.data[0,0] : 0.7807509899139404
time : 12.065470933914185
avg_loss : 0.2833881378173828 y_valid.data[0] : 0.9000000357627869 y_hat.data[0,0] : 0.8178048729896545
time : 15.95995283126831
avg_loss : 0.19558845460414886 y_valid.data[0] : 0.9000000357627869 y_hat.data[0,0] : 0.8397765159606934
time : 19.83980679512024
avg_loss : 0.140218585729599 y_valid.data[0] : 0.9000000357627869 y_hat.data[0,0] : 0.8659509420394897
time : 23.79526400566101
avg_loss : 0.09526846557855606 y_valid.data[0] : 0.9000000357627869 y_hat.data[0,0] : 0.8728100061416626
time : 27.70073533