# (3주차) 9월28일
> 파이토치를 이용하여 회귀모형 학습하기 (2) 

- toc:true
- branch: master
- badges: true
- comments: true
- author: 윤도현

### Import 

In [2]:
import torch 
import numpy as np 

### Data

`-` model: $y_i= w_0+w_1 x_i +\epsilon_i = 2.5 + 4x_i +\epsilon_i, \quad i=1,2,\dots,n$ 

`-` model: ${\bf y}={\bf X}{\bf W} +\boldsymbol{\epsilon}$

- ${\bf y}=\begin{bmatrix} y_1 \\ y_2 \\ \dots \\ y_n\end{bmatrix}, \quad {\bf X}=\begin{bmatrix} 1 & x_1 \\ 1 & x_2 \\ \dots \\ 1 & x_n\end{bmatrix}, \quad {\bf W}=\begin{bmatrix} 2.5 \\ 4 \end{bmatrix}, \quad \boldsymbol{\epsilon}= \begin{bmatrix} \epsilon_1 \\ \dots \\ \epsilon_n\end{bmatrix}$

In [13]:
torch.manual_seed(43052)
n=100
ones= torch.ones(n)
x,_ = torch.randn(n).sort()
X = torch.vstack([ones,x]).T
W = torch.tensor([2.5,4])
ϵ = torch.randn(n)*0.5
y = X@W + ϵ
ytrue = X@W
X

tensor([[ 1.0000, -2.4821],
        [ 1.0000, -2.3621],
        [ 1.0000, -1.9973],
        [ 1.0000, -1.6239],
        [ 1.0000, -1.4792],
        [ 1.0000, -1.4635],
        [ 1.0000, -1.4509],
        [ 1.0000, -1.4435],
        [ 1.0000, -1.3722],
        [ 1.0000, -1.3079],
        [ 1.0000, -1.1904],
        [ 1.0000, -1.1092],
        [ 1.0000, -1.1054],
        [ 1.0000, -1.0875],
        [ 1.0000, -0.9469],
        [ 1.0000, -0.9319],
        [ 1.0000, -0.8643],
        [ 1.0000, -0.7858],
        [ 1.0000, -0.7549],
        [ 1.0000, -0.7421],
        [ 1.0000, -0.6948],
        [ 1.0000, -0.6103],
        [ 1.0000, -0.5830],
        [ 1.0000, -0.5621],
        [ 1.0000, -0.5506],
        [ 1.0000, -0.5058],
        [ 1.0000, -0.4806],
        [ 1.0000, -0.4738],
        [ 1.0000, -0.4710],
        [ 1.0000, -0.4676],
        [ 1.0000, -0.3874],
        [ 1.0000, -0.3719],
        [ 1.0000, -0.3688],
        [ 1.0000, -0.3159],
        [ 1.0000, -0.2775],
        [ 1.0000, -0

### 이전방법요약 

`-` step1: yhat 

`-` step2: loss 

`-` step3: derivation 

`-` step4: update 

### step1: yhat

`-` feedforward 신경망을 설계하는 과정

`-` 이 단계가 잘 완료되었다면, 임의의 ${\bf\hat{W}}$을 넣었을 때 $\bf\hat{y}$를 계산할 수 있어야 함 

#### 방법1: 직접선언 (내가 공식을 알고 있어야 한다)

In [45]:
What=torch.tensor([-5.0,10.0],requires_grad=True)

In [46]:
yhat1=X@What

In [47]:
yhat1

tensor([-29.8211, -28.6215, -24.9730, -21.2394, -19.7919, -19.6354, -19.5093,
        -19.4352, -18.7223, -18.0793, -16.9040, -16.0918, -16.0536, -15.8746,
        -14.4690, -14.3193, -13.6426, -12.8578, -12.5486, -12.4213, -11.9484,
        -11.1034, -10.8296, -10.6210, -10.5064, -10.0578,  -9.8063,  -9.7380,
         -9.7097,  -9.6756,  -8.8736,  -8.7195,  -8.6880,  -8.1592,  -7.7752,
         -7.7716,  -7.7339,  -7.7208,  -7.6677,  -7.1551,  -7.0004,  -6.8163,
         -6.7081,  -6.5655,  -6.4480,  -6.3612,  -6.0566,  -5.6031,  -5.5589,
         -5.2137,  -4.3446,  -4.3165,  -3.8047,  -3.5801,  -3.4793,  -3.4325,
         -2.3545,  -2.3440,  -1.8434,  -1.7799,  -1.5386,  -1.0161,  -0.8103,
          0.4426,   0.5794,   0.9125,   1.1483,   1.4687,   1.4690,   1.5234,
          1.6738,   2.0592,   2.1414,   2.8221,   3.1536,   3.6682,   4.2907,
          4.8037,   4.8531,   4.9414,   5.3757,   5.3926,   5.6973,   6.0239,
          6.1261,   6.5317,   7.2891,   8.4032,   8.4936,   9.27

#### 방법2: torch.nn.Linear() 사용

In [48]:
net = torch.nn.Linear(in_features=2 ,out_features=1, bias=False) 

In [49]:
net.weight.data

tensor([[ 0.4210, -0.3843]])

In [50]:
net.weight.data=torch.tensor([[-5.0,10.0]])

In [51]:
net.weight.data

tensor([[-5., 10.]])

In [52]:
net(X)

tensor([[-29.8211],
        [-28.6215],
        [-24.9730],
        [-21.2394],
        [-19.7919],
        [-19.6354],
        [-19.5093],
        [-19.4352],
        [-18.7223],
        [-18.0793],
        [-16.9040],
        [-16.0918],
        [-16.0536],
        [-15.8746],
        [-14.4690],
        [-14.3193],
        [-13.6426],
        [-12.8578],
        [-12.5486],
        [-12.4213],
        [-11.9484],
        [-11.1034],
        [-10.8296],
        [-10.6210],
        [-10.5064],
        [-10.0578],
        [ -9.8063],
        [ -9.7380],
        [ -9.7097],
        [ -9.6756],
        [ -8.8736],
        [ -8.7195],
        [ -8.6880],
        [ -8.1592],
        [ -7.7752],
        [ -7.7716],
        [ -7.7339],
        [ -7.7208],
        [ -7.6677],
        [ -7.1551],
        [ -7.0004],
        [ -6.8163],
        [ -6.7081],
        [ -6.5655],
        [ -6.4480],
        [ -6.3612],
        [ -6.0566],
        [ -5.6031],
        [ -5.5589],
        [ -5.2137],


In [53]:
yhat2=net(X)

#### 방법3: torch.nn.Linear()사용, bias=True

In [28]:
net = torch.nn.Linear(in_features=1 ,out_features=1, bias=True) 

In [29]:
net.weight.data

tensor([[-0.1737]])

In [30]:
net.weight.data=torch.tensor([[10.0]])

In [33]:
net.bias.data=torch.tensor([-5.0])

In [34]:
net.weight,net.bias

(Parameter containing:
 tensor([[10.]], requires_grad=True),
 Parameter containing:
 tensor([-5.], requires_grad=True))

In [44]:
net(x.reshape(100,1))

tensor([[-29.8211],
        [-28.6215],
        [-24.9730],
        [-21.2394],
        [-19.7919],
        [-19.6354],
        [-19.5093],
        [-19.4352],
        [-18.7223],
        [-18.0793],
        [-16.9040],
        [-16.0918],
        [-16.0536],
        [-15.8746],
        [-14.4690],
        [-14.3193],
        [-13.6426],
        [-12.8578],
        [-12.5486],
        [-12.4213],
        [-11.9484],
        [-11.1034],
        [-10.8296],
        [-10.6210],
        [-10.5064],
        [-10.0578],
        [ -9.8063],
        [ -9.7380],
        [ -9.7097],
        [ -9.6756],
        [ -8.8736],
        [ -8.7195],
        [ -8.6880],
        [ -8.1592],
        [ -7.7752],
        [ -7.7716],
        [ -7.7339],
        [ -7.7208],
        [ -7.6677],
        [ -7.1551],
        [ -7.0004],
        [ -6.8163],
        [ -6.7081],
        [ -6.5655],
        [ -6.4480],
        [ -6.3612],
        [ -6.0566],
        [ -5.6031],
        [ -5.5589],
        [ -5.2137],


#### 

### step2: loss

#### 방법1: 손실함수를 직접정의하는 방법

In [58]:
loss=torch.mean((y-yhat1)**2)
loss

tensor(85.8769, grad_fn=<MeanBackward0>)

In [59]:
loss=torch.mean((y-yhat2)**2)
loss

tensor(176.2661, grad_fn=<MeanBackward0>)

- 176.2661? 이건 잘못된 결과임

In [70]:
loss=torch.mean((y.reshape(100,1)-yhat2)**2)
loss

tensor(85.8769, grad_fn=<MeanBackward0>)

#### 방법2: torch.nn.MSELoss()를 사용하여 손실함수를 정의하는 방법 

In [76]:
lossfn=torch.nn.MSELoss()

In [82]:
loss=lossfn(y,yhat1)
loss

tensor(85.8769, grad_fn=<MseLossBackward>)

In [83]:
loss=lossfn(y.reshape(100,1),yhat2)
loss

tensor(85.8769, grad_fn=<MseLossBackward>)

### [숙제](https://ieilms.jbnu.ac.kr/)

`-` model: $y_i= w_0+w_1 x_{i1}+w_2 x_{i2} +\epsilon_i = 2.5 + 4x_{1i} + -2x_{2i}+\epsilon_i, \quad i=1,2,\dots,n$ 

In [8]:
torch.manual_seed(43052)
n=100
ones= torch.ones(n)
x1,_ = torch.randn(n).sort()
x2,_ = torch.randn(n).sort()
X = torch.vstack([ones,x1,x2]).T
W = torch.tensor([2.5,4,-2])
ϵ = torch.randn(n)*0.5
y = X@W + ϵ
ytrue = X@W

`-` torch.nn.Linear() 를 이용하여 $\bf{\hat{W}}=\begin{bmatrix}1 \\ 1 \\ 1 \end{bmatrix}$ 에 대한 $\hat{y}$를 구하라. 

# Step 1~2 요약


### 방법1: 모델을 직접선언 + loss함수도 직접선언 ###

In [35]:
torch.manual_seed(43052)
n=100
ones= torch.ones(n)
x,_ = torch.randn(n).sort()
X = torch.vstack([ones,x]).T
W = torch.tensor([2.5,4])
ϵ = torch.randn(n)*0.5
y = X@W + ϵ
ytrue = X@W
x

tensor([-2.4821, -2.3621, -1.9973, -1.6239, -1.4792, -1.4635, -1.4509, -1.4435,
        -1.3722, -1.3079, -1.1904, -1.1092, -1.1054, -1.0875, -0.9469, -0.9319,
        -0.8643, -0.7858, -0.7549, -0.7421, -0.6948, -0.6103, -0.5830, -0.5621,
        -0.5506, -0.5058, -0.4806, -0.4738, -0.4710, -0.4676, -0.3874, -0.3719,
        -0.3688, -0.3159, -0.2775, -0.2772, -0.2734, -0.2721, -0.2668, -0.2155,
        -0.2000, -0.1816, -0.1708, -0.1565, -0.1448, -0.1361, -0.1057, -0.0603,
        -0.0559, -0.0214,  0.0655,  0.0684,  0.1195,  0.1420,  0.1521,  0.1568,
         0.2646,  0.2656,  0.3157,  0.3220,  0.3461,  0.3984,  0.4190,  0.5443,
         0.5579,  0.5913,  0.6148,  0.6469,  0.6469,  0.6523,  0.6674,  0.7059,
         0.7141,  0.7822,  0.8154,  0.8668,  0.9291,  0.9804,  0.9853,  0.9941,
         1.0376,  1.0393,  1.0697,  1.1024,  1.1126,  1.1532,  1.2289,  1.3403,
         1.3494,  1.4279,  1.4994,  1.5031,  1.5437,  1.6789,  2.0832,  2.2444,
         2.3935,  2.6056,  2.6057,  2.66

In [36]:
What1=torch.tensor([-5.0,10.0],requires_grad=True)
yhat1=X@What1
loss1=torch.mean((y-yhat1)**2)
loss1

tensor(85.8769, grad_fn=<MeanBackward0>)

### 방법2 모델식을 torch.nn으로 선언 (bias=False) + loss함수 직접선언

In [37]:
net2=torch.nn.Linear(in_features=2, out_features=1, bias=False)     ### in_features는 X의 열의 갯수
net2.weight.data= torch.tensor([[-5.0,10.0]],requires_grad=True)
yhat2=net2(X)
loss2=torch.mean((y.reshape(100,1)-yhat2)**2)
loss2

tensor(85.8769, grad_fn=<MeanBackward0>)

### 방법 3: 모델식을 torch.nn으로 선언(bias=True)+ loss 직접선언

In [45]:
net3=torch.nn.Linear(in_features=1, out_features=1, bias=True)     ### in_features는 X의 열의 갯수
net3.weight.data= torch.tensor([[10.0]])
net3.bias.data = torch.tensor([[-5.0]])
yhat3=net3(x.reshape(100,1))
loss3 = torch.mean((y.reshape(100,1)-yhat3)**2)
loss3

tensor(85.8769, grad_fn=<MeanBackward0>)

### 방법4: 모델식을 직접 선언 + loss함수는 torch.nn.MSELoss()

In [39]:
What4 = torch.tensor([-5.0,10.0],requires_grad=True)
yhat4=X@What4
lossfn= torch.nn.MSELoss()
loss4=lossfn(y,yhat4)
loss4

tensor(85.8769, grad_fn=<MseLossBackward>)

### 방법5: 모델식을 torch.nn으로 선언(bias=False) + loss함수는 torch.nn.MSELoss()

In [40]:
net5= torch.nn.Linear(in_features=2, out_features=1, bias=False)
net5.weight.data= torch.tensor([-5.0,10.0],requires_grad=True)
yhat5= net5(X)
lossfn = torch.nn.MSELoss()
loss5 = lossfn(y,yhat5)
loss5

tensor(85.8769, grad_fn=<MseLossBackward>)

### 방법6: 모델식을 torch.nn으로 선언(bias=True) + loss함수는 torch.nn.MSELoss()

In [47]:
net6=torch.nn.Linear(in_features=1,out_features=1, bias=True)
net6.weight.data=torch.tensor([[10.0]])
net6.bias.data = torch.tensor([[-5.0]])
yhat6= net6(x.reshape(100,1))
lossfn=torch.nn.MSELoss()
loss6=lossfn(y.reshape(100,1),yhat6)
loss6

tensor(85.8769, grad_fn=<MseLossBackward>)

# Step 3: derivation >> 미분하는 과정

#### loss1

In [48]:
loss1.backward()

In [54]:
What1.grad.data

tensor([-13.4225,  11.8893])

#### loss2

In [49]:
loss2.backward()

In [55]:
net2.weight.grad

tensor([[-13.4225,  11.8893]])

#### loss3

In [50]:
loss3.backward()

In [58]:
net3.bias.grad, net3.weight.grad

(tensor([[-13.4225]]), tensor([[11.8893]]))

#### loss4

In [51]:
loss4.backward()

In [59]:
What4.grad.data

tensor([-13.4225,  11.8893])

#### loss5

In [52]:
loss5.backward()

In [61]:
net5.weight.grad

tensor([-13.4225,  11.8893])

#### loss6

In [53]:
loss6.backward()

In [63]:
net6.bias.grad,net6.weight.grad

(tensor([[-13.4225]]), tensor([[11.8893]]))

# Step 4: update

#### loss1

In [69]:
What1.data ## update 전

tensor([-2.3155,  7.6221])

In [70]:
lr=0.1
What1.data = What1.data- lr*What1.grad.data ## update 후 
What1

tensor([-0.9732,  6.4332], requires_grad=True)

#### loss2

In [71]:
net2.weight.data

tensor([[-5., 10.]])

In [72]:
optmz2 = torch.optim.SGD(net2.parameters(),lr=0.1)

In [73]:
optmz2.step() ## update 후

In [74]:
net2.weight.data

tensor([[-3.6577,  8.8111]])

#### loss3

In [76]:
net3.bias.data, net3.weight.data

(tensor([[-5.]]), tensor([[10.]]))

In [77]:
optmz3= torch.optim.SGD(net3.parameters(),lr=0.1)

In [78]:
optmz3.step()

In [79]:
net3.bias.data,net3.weight.data

(tensor([[-3.6577]]), tensor([[8.8111]]))

In [81]:
list(net3.parameters())

[Parameter containing:
 tensor([[8.8111]], requires_grad=True),
 Parameter containing:
 tensor([[-3.6577]], requires_grad=True)]

#### loss4

In [80]:
What4.data

tensor([-5., 10.])

In [82]:
lr=0.1
What4.data = What4.data- lr*What4.grad.data ## update 후 
What4

tensor([-3.6577,  8.8111], requires_grad=True)

#### loss5   ## 최신버전!!

In [83]:
net5.weight.data

tensor([-5., 10.])

In [84]:
optmz5= torch.optim.SGD(net5.parameters(),lr=0.1)
optmz5.step()

In [85]:
net5.weight.data

tensor([-3.6577,  8.8111])

#### loss6

In [86]:
net6.bias.data,net6.weight.data

(tensor([[-5.]]), tensor([[10.]]))

In [87]:
optmz6= torch.optim.SGD(net6.parameters(),lr=0.1)
optmz6.step()

In [88]:
net6.bias.data,net6.weight.data

(tensor([[-3.6577]]), tensor([[8.8111]]))

### step1~4를 반복

In [94]:
net= torch.nn.Linear(in_features=2,out_features=1, bias=False) ##모형 정의
optmz=torch.optim.SGD(net.parameters(),lr=0.1)
mseloss=torch.nn.MSELoss()

for epoc in range(100):
    # step1: yhat
    yhat=net(X)
    # step2: loss
    loss=mseloss(y.reshape(100,1),yhat)
    # step3: derivation
    loss.backward()
    # step4: update
    optmz.step()
    optmz.zero_grad() ## 기울기 초기화를 위해 꼭 외우기

In [93]:
list(net.parameters())

[Parameter containing:
 tensor([[2.4459, 4.0043]], requires_grad=True)]

## 숙제

In [95]:
net= torch.nn.Linear(in_features=2,out_features=1, bias=False) ##모형 정의
optmz=torch.optim.SGD(net.parameters(),lr=0.1)
mseloss=torch.nn.MSELoss()

for epoc in range(100):
    # step1: yhat
    yhat=net(X)
    # step2: loss
    loss=mseloss(y.reshape(100,1),yhat)
    # step3: derivation
    loss.backward()
    # step4: update
    optmz.step()

In [96]:
list(net.parameters())

[Parameter containing:
 tensor([[-0.6880,  0.5531]], requires_grad=True)]