# Linear Regression
- Data definition
- Hypothesis
- Compute loss
- Gradient descent

In [9]:
import torch
from torch import optim
import numpy as np

## Data definition

In [3]:
# 입출력은 x, y 로 구분
x_train = torch.FloatTensor([[1],[2],[3]])
y_train = torch.FloatTensor([[2],[4],[6]])

## Hypothesis
$y = Wx + b$
또는 $H(x) = Wx + b$

In [4]:
# W, b 초기화(0)
W = torch.zeros(1, requires_grad = True) # 학습을 명시
b = torch.zeros(1, requires_grad = True)
hypothesis = x_train * W + b

## compute loss 
### Mean Squared Error (MSE)
$cost(W, b) =  \frac{1}{m} \sum_{i=1}^{m}(H( x^{i})- y^{i})^2)$

In [5]:
cost = torch.mean((hypothesis - y_train) ** 2) # 평균 계산

## Gradient descient

In [11]:
nb_epoches = 1000

for epoch in range(1, nb_epoches + 1):
    hypothesis = x_train * W + b
    cost = torch.mean((hypothesis - y_train) ** 2)
    

In [12]:
optimizer = optim.SGD([W,b], lr=0.01) # torch.optim 라이브러리 사용

optimizer.zero_grad() # gradient 초기화
cost.backward() # gradient 계산
optimizer.step() # 개선

# Deeper Loot at GD
## Simpler Hypothesis Function
$H(x) = Wx$ 로 가정

In [13]:
# 입출력은 x, y 로 구분
x_train = torch.FloatTensor([[1],[2],[3]])
y_train = torch.FloatTensor([[1],[2],[3]])

## cost function: Intuition
W = 1 일 때, cost = 0

In [14]:
cost = torch.mean((hypothesis - y_train) ** 2)

## Gradient Descent : Intuition
Gradient 계산하기
- $\frac{\partial cost}{\partial W} = \nabla W$
- $cost(W) = \frac{1}{m}\sum_{i=1}^{m}(Wx^{i}-y^{i})^{2}$
- $\nabla W = \frac{\partial cost}{\partial W} = \frac{2}{m}\sum_{i=1}^{m}(Wx^{i}-y^{i})x^{i}$
- $ W: = W - \alpha \nabla W $

In [16]:
gradient = 2 * torch.mean((W * x_train - y_train) * x_train)
lr = 0.1
W -= lr * gradient

RuntimeError: a leaf Variable that requires grad is being used in an in-place operation.

In [22]:
# model 초기화
W = torch.zeros(1)
# 학습률 설정
lr = 0.1

nb_epoches = 10

for epoch in range(1, nb_epoches + 1):
    
    hypothesis = x_train * W 
    
    cost = torch.mean((hypothesis - y_train) ** 2)
    gradient = torch.sum((W * x_train - y_train) * x_train)
    
    print('Epoch {:4d}/{}, W: {:.3f}, cost: {:.6f}'.format(
        epoch, nb_epoches, W.item(), cost.item()))

    # cost gradient 로 H(x) 개선
    W -= lr * gradient

Epoch    1/10, W: 0.000, cost: 4.666667
Epoch    2/10, W: 1.400, cost: 0.746666
Epoch    3/10, W: 0.840, cost: 0.119467
Epoch    4/10, W: 1.064, cost: 0.019115
Epoch    5/10, W: 0.974, cost: 0.003058
Epoch    6/10, W: 1.010, cost: 0.000489
Epoch    7/10, W: 0.996, cost: 0.000078
Epoch    8/10, W: 1.002, cost: 0.000013
Epoch    9/10, W: 0.999, cost: 0.000002
Epoch   10/10, W: 1.000, cost: 0.000000


In [23]:
# 모델 초기화
W = torch.zeros(1, requires_grad = True)

# optimizer 설정
optimizer = optim.SGD([W], lr=0.15) 

# batch size
nb_epoches = 10

for epoch in range(1, nb_epoches + 1):
    
    hypothesis = x_train * W 
    
    cost = torch.mean((hypothesis - y_train) ** 2)
    gradient = torch.sum((W * x_train - y_train) * x_train)
    
    print('Epoch {:4d}/{}, W: {:.3f}, cost: {:.6f}'.format(
        epoch, nb_epoches, W.item(), cost.item()))
  
    optimizer.zero_grad() # gradient 초기화
    cost.backward() # gradient 계산
    optimizer.step() # 개선
    

Epoch    1/10, W: 0.000, cost: 4.666667
Epoch    2/10, W: 1.400, cost: 0.746667
Epoch    3/10, W: 0.840, cost: 0.119467
Epoch    4/10, W: 1.064, cost: 0.019115
Epoch    5/10, W: 0.974, cost: 0.003058
Epoch    6/10, W: 1.010, cost: 0.000489
Epoch    7/10, W: 0.996, cost: 0.000078
Epoch    8/10, W: 1.002, cost: 0.000013
Epoch    9/10, W: 0.999, cost: 0.000002
Epoch   10/10, W: 1.000, cost: 0.000000
