In [1]:
import torch
import torch.optim as optim

### 1. Data Definition: vector
$\mathbf{X}_{train}=\begin{pmatrix}1 \\ 2 \\ 3 \end{pmatrix} \ $
$\mathbf{Y}_{train}=\begin{pmatrix}2 \\ 4 \\ 6 \end{pmatrix}$

In [2]:
x_train = torch.tensor([[1],[2],[3]], dtype=torch.float)
y_train = torch.tensor([[2],[4],[6]], dtype=torch.float)
x_train, y_train

(tensor([[1.],
         [2.],
         [3.]]),
 tensor([[2.],
         [4.],
         [6.]]))

### 2. Hypothesis
$\mathbf{y}=\mathbf{W}\mathbf{x}+\mathbf{b}$

In [3]:
W = torch.zeros(1, requires_grad=True)
b = torch.zeros(1, requires_grad=True)
W, b

(tensor([0.], requires_grad=True), tensor([0.], requires_grad=True))

In [4]:
hypothesis = x_train * W + b
hypothesis

tensor([[0.],
        [0.],
        [0.]], grad_fn=<AddBackward0>)

### 3. Loss(Cost) Function
$\frac{1}{m} \sum_{i=1}^{m}(H(x^{(i)})- y^{(i)})^2$

In [5]:
cost = torch.mean(hypothesis - y_train)**2
cost

tensor(16., grad_fn=<PowBackward0>)

### 4. Gradient descent
- optimizer
- backward: gradient 계산
- step(): update

In [6]:
optimizer = optim.SGD([W, b], lr=0.01)

In [7]:
optimizer.zero_grad()
cost.backward()
optimizer.step()
W, b

(tensor([0.1600], requires_grad=True), tensor([0.0800], requires_grad=True))

### Training

In [20]:
# initialize parameter
W = torch.zeros(1, requires_grad=True)
b = torch.zeros(1, requires_grad=True)

# hyperparameter
learning_rate = 0.01
optimizer = optim.SGD([W, b], lr=learning_rate)

epochs = 5000
for epoch in range(1, epochs+1):
    hypothesis = x_train * W + b
    cost = torch.mean((hypothesis - y_train) ** 2)

    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    if epoch % 200 == 0:
        print(f'Epoch: {epoch}, Loss: {cost}')
print('cost:', cost)
W, b

Epoch: 200, Loss: 0.029910147190093994
Epoch: 400, Loss: 0.011421176604926586
Epoch: 600, Loss: 0.004361126106232405
Epoch: 800, Loss: 0.0016652889316901565
Epoch: 1000, Loss: 0.0006358931423164904
Epoch: 1200, Loss: 0.0002428154111839831
Epoch: 1400, Loss: 9.271795715903863e-05
Epoch: 1600, Loss: 3.540363104548305e-05
Epoch: 1800, Loss: 1.3518902960640844e-05
Epoch: 2000, Loss: 5.162579327588901e-06
Epoch: 2200, Loss: 1.9716496808541706e-06
Epoch: 2400, Loss: 7.533246275670535e-07
Epoch: 2600, Loss: 2.8795551543225884e-07
Epoch: 2800, Loss: 1.1012351563977063e-07
Epoch: 3000, Loss: 4.219759830448311e-08
Epoch: 3200, Loss: 1.6227582833039378e-08
Epoch: 3400, Loss: 6.266361651796615e-09
Epoch: 3600, Loss: 2.4041166835075956e-09
Epoch: 3800, Loss: 9.151979729615789e-10
Epoch: 4000, Loss: 3.440353280037556e-10
Epoch: 4200, Loss: 1.4136958270682953e-10
Epoch: 4400, Loss: 6.861000656499527e-11
Epoch: 4600, Loss: 3.501554601825774e-11
Epoch: 4800, Loss: 2.305947967295996e-11
Epoch: 5000, Los

(tensor([2.0000], requires_grad=True),
 tensor([1.0439e-05], requires_grad=True))

### Prediction

In [21]:
def predict(x, W=W, b=b):
    return torch.tensor([[x]], dtype=torch.float)*W + b

In [25]:
predict(4)

tensor([[8.0000]], grad_fn=<AddBackward0>)