#### Linear regression

Imports

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [3]:
# For reproducibility
torch.manual_seed(1)

<torch._C.Generator at 0x17a12fbd8d0>

Data

In [5]:
x_train = torch.FloatTensor([[1], [2], [3]])
y_train = torch.FloatTensor([[1], [2], [3]])

In [6]:
print(x_train)
print(x_train.shape)

tensor([[1.],
        [2.],
        [3.]])
torch.Size([3, 1])


In [7]:
print(y_train)
print(y_train.shape)

tensor([[1.],
        [2.],
        [3.]])
torch.Size([3, 1])


$$X = \begin{bmatrix} x^{(1)} \\ x^{(2)} \\ x^{(3)} \end{bmatrix} = \begin{bmatrix} 1 \\ 2 \\ 3 \end{bmatrix}, \quad Y = \begin{bmatrix} y^{(1)} \\ y^{(2)} \\ y^{(3)} \end{bmatrix} = \begin{bmatrix} 1 \\ 2 \\ 3 \end{bmatrix}$$

Weight Initialization

In [8]:
W = torch.zeros(1, requires_grad=True)
print(W)

tensor([0.], requires_grad=True)


In [9]:
b = torch.zeros(1, requires_grad=True)
print(b)

tensor([0.], requires_grad=True)


- 현재 직선의 모양
$$y = 0 \cdot x + 0$$

Hypothesis

$$ H(x) = Wx + b $$

In [10]:
hypothesis = x_train * W + b
print(hypothesis)

tensor([[0.],
        [0.],
        [0.]], grad_fn=<AddBackward0>)


$$ \text{cost}(W, b) = \frac{1}{m} \sum_{i=1}^{m} \left( H(x^{(i)}) - y^{(i)} \right)^2 $$

In [11]:
print(hypothesis)

tensor([[0.],
        [0.],
        [0.]], grad_fn=<AddBackward0>)


In [12]:
print(y_train)

tensor([[1.],
        [2.],
        [3.]])


In [13]:
print(hypothesis - y_train)

tensor([[-1.],
        [-2.],
        [-3.]], grad_fn=<SubBackward0>)


In [14]:
print((hypothesis - y_train) ** 2)

tensor([[1.],
        [4.],
        [9.]], grad_fn=<PowBackward0>)


In [15]:
cost = torch.mean((hypothesis - y_train) ** 2)
print(cost)

tensor(4.6667, grad_fn=<MeanBackward0>)


Gradient Descent

In [16]:
optimizer = optim.SGD([W, b], lr=0.01)

In [17]:
optimizer.zero_grad() # 모델 변수(W, b)에 저장된 기존의 기울기 초기화
cost.backward() # 비용함수에 대해 각각 W,b 로 미분 수행
optimizer.step() # 실제로 값을 업데이트

  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


$Cost = \frac{1}{m} \sum (Wx + b - y)^2$
- $$\frac{\partial Cost}{\partial W} = \frac{2}{m} \sum (Wx + b - y) \cdot x$$
- $$\frac{\partial Cost}{\partial b} = \frac{2}{m} \sum (Wx + b - y) \cdot 1$$

- $$W = W - (\text{learning rate} \times W.grad)$$
- $$b = b - (\text{learning rate} \times b.grad)$$

- 새로운 $$W: 0 - (0.01 \times -9.3333) = \mathbf{0.0933}$$
- 새로운 $$b: 0 - (0.01 \times -4.0000) = \mathbf{0.0400}$$

In [18]:
print(W)
print(W.grad)
print(b)
print(b.grad)

tensor([0.0933], requires_grad=True)
tensor([-9.3333])
tensor([0.0400], requires_grad=True)
tensor([-4.])


$$H(x) = 0.0933x + 0.0400$$

In [19]:
hypothesis = x_train * W + b
print(hypothesis)

tensor([[0.1333],
        [0.2267],
        [0.3200]], grad_fn=<AddBackward0>)


In [20]:
cost = torch.mean((hypothesis - y_train) ** 2)
print(cost)

tensor(3.6927, grad_fn=<MeanBackward0>)


Training with Full Code

In [21]:
# 데이터
x_train = torch.FloatTensor([[1], [2], [3]])
y_train = torch.FloatTensor([[1], [2], [3]])

# 모델 초기화
W = torch.zeros(1, requires_grad=True)
b = torch.zeros(1, requires_grad=True)

# optimizer 설정
optimizer = optim.SGD([W, b], lr=0.01)

nb_epochs = 1000
for epoch in range(nb_epochs + 1):
    
    # H(x) 계산
    hypothesis = x_train * W + b
    
    # cost 계산
    cost = torch.mean((hypothesis - y_train) ** 2)

    # cost로 H(x) 개선
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    # 100번마다 로그 출력
    if epoch % 100 == 0:
        print('Epoch {:4d}/{} W: {:.3f}, b: {:.3f} Cost: {:.6f}'.format(
            epoch, nb_epochs, W.item(), b.item(), cost.item()
        ))

Epoch    0/1000 W: 0.093, b: 0.040 Cost: 4.666667
Epoch  100/1000 W: 0.873, b: 0.289 Cost: 0.012043
Epoch  200/1000 W: 0.900, b: 0.227 Cost: 0.007442
Epoch  300/1000 W: 0.921, b: 0.179 Cost: 0.004598
Epoch  400/1000 W: 0.938, b: 0.140 Cost: 0.002842
Epoch  500/1000 W: 0.951, b: 0.110 Cost: 0.001756
Epoch  600/1000 W: 0.962, b: 0.087 Cost: 0.001085
Epoch  700/1000 W: 0.970, b: 0.068 Cost: 0.000670
Epoch  800/1000 W: 0.976, b: 0.054 Cost: 0.000414
Epoch  900/1000 W: 0.981, b: 0.042 Cost: 0.000256
Epoch 1000/1000 W: 0.985, b: 0.033 Cost: 0.000158


High-level Implementation with `nn.Module`

- Remember that we had this fake data.

In [4]:
x_train = torch.FloatTensor([[1], [2], [3]])
y_train = torch.FloatTensor([[1], [2], [3]])

- PyTorch의 모든 모델은 제공되는 `nn.Module` 을 inherit 해서 만들게 된다.

In [None]:
class LinearRegressionModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(1,1)

    def forward(self, x):
        return self.linear(x)

In [7]:
model = LinearRegressionModel()
print(model.linear)

Linear(in_features=1, out_features=1, bias=True)


Hypothesis

In [8]:
hypothesis = model(x_train)

In [9]:
print(hypothesis)

tensor([[ 0.2755],
        [ 0.0816],
        [-0.1122]], grad_fn=<AddmmBackward0>)


Cost

In [10]:
print(hypothesis)
print(y_train)

tensor([[ 0.2755],
        [ 0.0816],
        [-0.1122]], grad_fn=<AddmmBackward0>)
tensor([[1.],
        [2.],
        [3.]])


In [11]:
cost = F.mse_loss(hypothesis, y_train)

In [12]:
print(cost)

tensor(4.6303, grad_fn=<MseLossBackward0>)


Gradient Descent

- W 와 b 를 바꾸어서 cost 줄여보기`
- PyTorch 의 `torch.optim` 에 있는 `optimizer` 를 사용하면 된다.

In [None]:
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [14]:
optimizer.zero_grad()
cost.backward()
optimizer.step()

  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


Training with Full Code

In [15]:
# 데이터
x_train = torch.FloatTensor([[1], [2], [3]])
y_train = torch.FloatTensor([[1], [2], [3]])
# 모델 초기화
model = LinearRegressionModel()
# optimizer 설정
optimizer = optim.SGD(model.parameters(), lr=0.01)

nb_epochs = 1000
for epoch in range(nb_epochs + 1):
    
    # H(x) 계산
    prediction = model(x_train)
    
    # cost 계산
    cost = F.mse_loss(prediction, y_train)
    
    # cost로 H(x) 개선
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    # 100번마다 로그 출력
    if epoch % 100 == 0:
        params = list(model.parameters())
        W = params[0].item()
        b = params[1].item()
        print('Epoch {:4d}/{} W: {:.3f}, b: {:.3f} Cost: {:.6f}'.format(
            epoch, nb_epochs, W, b, cost.item()
        ))

Epoch    0/1000 W: -0.784, b: 0.665 Cost: 13.291902
Epoch  100/1000 W: 0.580, b: 0.955 Cost: 0.131521
Epoch  200/1000 W: 0.670, b: 0.751 Cost: 0.081272
Epoch  300/1000 W: 0.740, b: 0.590 Cost: 0.050221
Epoch  400/1000 W: 0.796, b: 0.464 Cost: 0.031034
Epoch  500/1000 W: 0.840, b: 0.365 Cost: 0.019177
Epoch  600/1000 W: 0.874, b: 0.287 Cost: 0.011850
Epoch  700/1000 W: 0.901, b: 0.225 Cost: 0.007323
Epoch  800/1000 W: 0.922, b: 0.177 Cost: 0.004525
Epoch  900/1000 W: 0.939, b: 0.139 Cost: 0.002796
Epoch 1000/1000 W: 0.952, b: 0.109 Cost: 0.001728
