In [1]:
import torch
import torch.optim as optim

### 1. Data Definition: vector
$\mathbf{X}_{train}=\begin{pmatrix}73&80&75 \\ 93&88&93 \\ 89&91&80 \\ & \vdots \end{pmatrix}, \ $ $\mathbf{Y}_{train}=\begin{pmatrix}152 \\ 185 \\ 180 \\ \vdots \end{pmatrix}$

In [26]:
x_train = torch.tensor([[73, 80, 75],[93, 88, 93],[89,91,80],[96,98,100],[73,66,70]], dtype=torch.float)
y_train = torch.tensor([[152],[185],[180], [196],[142]], dtype=torch.float)
x_train, y_train

(tensor([[ 73.,  80.,  75.],
         [ 93.,  88.,  93.],
         [ 89.,  91.,  80.],
         [ 96.,  98., 100.],
         [ 73.,  66.,  70.]]),
 tensor([[152.],
         [185.],
         [180.],
         [196.],
         [142.]]))

### 2. Hypothesis
$\mathbf{y}=\mathbf{W}\mathbf{x}+\mathbf{b}=w_1x_1+w_2x_2+w_3x_3+b$

In [27]:
W = torch.zeros((3, 1), requires_grad=True)
b = torch.zeros(1, requires_grad=True)
W, b

(tensor([[0.],
         [0.],
         [0.]], requires_grad=True),
 tensor([0.], requires_grad=True))

In [28]:
hypothesis = x_train.matmul(W) + b
hypothesis

tensor([[0.],
        [0.],
        [0.],
        [0.],
        [0.]], grad_fn=<AddBackward0>)

### 3. Loss(Cost) Function
$\frac{1}{m} \sum_{i=1}^{m}(H(x^{(i)})- y^{(i)})^2$

In [29]:
cost = torch.mean(hypothesis - y_train)**2
cost

tensor(29241., grad_fn=<PowBackward0>)

### 4. Gradient descent
- optimizer
- backward: gradient 계산
- step(): update

In [30]:
optimizer = optim.SGD([W, b], lr=1e-5)

In [31]:
optimizer.zero_grad()
cost.backward()
optimizer.step()
W, b

(tensor([[0.2900],
         [0.2893],
         [0.2859]], requires_grad=True),
 tensor([0.0034], requires_grad=True))

### Training

In [32]:
# initialize parameter
W = torch.zeros((3,1), requires_grad=True)
b = torch.zeros(1, requires_grad=True)

# hyperparameter
learning_rate = 1e-5
optimizer = optim.SGD([W, b], lr=learning_rate)

epochs = 20
for epoch in range(1, epochs+1):
    hypothesis = x_train.matmul(W) + b
    cost = torch.mean((hypothesis - y_train) ** 2)

    optimizer.zero_grad()
    cost.backward()
    optimizer.step()


    print(f'Epoch: {epoch}, Loss: {cost}')
print('cost:', cost)
W, b

Epoch: 1, Loss: 29661.80078125
Epoch: 2, Loss: 9537.6943359375
Epoch: 3, Loss: 3069.5908203125
Epoch: 4, Loss: 990.6702880859375
Epoch: 5, Loss: 322.4819641113281
Epoch: 6, Loss: 107.7170639038086
Epoch: 7, Loss: 38.687400817871094
Epoch: 8, Loss: 16.499046325683594
Epoch: 9, Loss: 9.365655899047852
Epoch: 10, Loss: 7.071104526519775
Epoch: 11, Loss: 6.331867218017578
Epoch: 12, Loss: 6.092532157897949
Epoch: 13, Loss: 6.013822555541992
Epoch: 14, Loss: 5.986774921417236
Epoch: 15, Loss: 5.976314067840576
Epoch: 16, Loss: 5.971213340759277
Epoch: 17, Loss: 5.96779727935791
Epoch: 18, Loss: 5.96496057510376
Epoch: 19, Loss: 5.962291717529297
Epoch: 20, Loss: 5.95969295501709
cost: tensor(5.9597, grad_fn=<MeanBackward0>)


(tensor([[0.6806],
         [0.6785],
         [0.6678]], requires_grad=True),
 tensor([0.0079], requires_grad=True))

### Prediction

In [35]:
def predict(x, W=W, b=b):
    return torch.tensor([x], dtype=torch.float).matmul(W) + b

In [36]:
predict([89, 69, 82])

tensor([[162.1540]], grad_fn=<AddBackward0>)