# Lab-04 Multivariable Linear regression and Loading Data

이전에는 Simple Linear regression이었다면 이번에는 여러개의 변수에 대해 하나의 예측값이 나오게 하는 Multivariable Linear regression을 해보려고 한다.

In [1]:
import torch
from torch import optim

In [2]:
x_train = torch.FloatTensor([[73, 80, 75],
                            [93, 88, 93],
                            [89, 91, 90],
                            [96, 98, 100],
                            [73, 66, 70]])

y_train = torch.FloatTensor([[152], [185], [180], [196], [142]])

### Compute Hypothesis
$$y=Wx+b$$
Simple Linear Regression과 식자체는 일치하지만 $x$가 scalar 값에서 vector로 바뀐 것이다.

예를 들어 입력 변수가 3개라고 했을때, $$y=w_1x_1+w_2x_2+w_3x_3+b$$라고 볼 수 있다.


In [5]:
W = torch.zeros((3,1), requires_grad = True)
b = torch.zeros(1,requires_grad = True)


optimizer = optim.SGD([W, b], lr = 1e-5)

epochs = 1000
for epoch in range(1,epochs+1):
    
    # matmul을 이용하여 hypothesis를 표현하였다. 
    hypothesis = x_train.matmul(W)+b
    
    # 이후 행동은 simple linear regression과 동일하다.
    cost = torch.mean((y_train-hypothesis)**2)
    
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    if epoch %100 ==0:
        print('Epoch {:4d}/{}, hypothesis: {}, Cost:{:.6f}'.format(
        epoch,epochs, hypothesis.squeeze().detach(), cost.item()))

Epoch  100/1000, hypothesis: tensor([152.7695, 183.6982, 180.9592, 197.0628, 140.1332]), Cost:1.564299
Epoch  200/1000, hypothesis: tensor([152.7277, 183.7271, 180.9466, 197.0518, 140.1727]), Cost:1.498234
Epoch  300/1000, hypothesis: tensor([152.6870, 183.7551, 180.9344, 197.0410, 140.2112]), Cost:1.435647
Epoch  400/1000, hypothesis: tensor([152.6474, 183.7825, 180.9225, 197.0305, 140.2487]), Cost:1.376296
Epoch  500/1000, hypothesis: tensor([152.6089, 183.8091, 180.9109, 197.0202, 140.2852]), Cost:1.320047
Epoch  600/1000, hypothesis: tensor([152.5714, 183.8349, 180.8997, 197.0102, 140.3208]), Cost:1.266736
Epoch  700/1000, hypothesis: tensor([152.5350, 183.8601, 180.8888, 197.0004, 140.3554]), Cost:1.216203
Epoch  800/1000, hypothesis: tensor([152.4995, 183.8846, 180.8781, 196.9908, 140.3891]), Cost:1.168279
Epoch  900/1000, hypothesis: tensor([152.4651, 183.9085, 180.8678, 196.9815, 140.4220]), Cost:1.122853
Epoch 1000/1000, hypothesis: tensor([152.4315, 183.9316, 180.8578, 196.97

### nn.module을 사용하여 모델을 만들어 보자!

nn.module의 경우 custom한 모델을 만들기 위해서 상속받는 것이며, forward을 정의해서 자신만의 모듈을 만들 수 있다.

In [6]:
import torch.nn as nn

class Multi_Linear(nn.Module):
    def __init__(self):
        super().__init__()
        ## nn이라는 library안에 Linear함수가 이미 내장되어 있어 그것을 꺼내 사용하고, (input size, output size)를 의미한다.
        self.linear = nn.Linear(3, 1)
        
    # forward에서 hypothesis를 계산한다고 보면 된다.
    def forward(self, x):
        return self.linear(x)



### torch.nn.functional library를 이용하여 loss함수를 불러보자!

In [7]:
import torch.nn.functional as F

# mse loss함수 또한 이미 내장되어있다. 이런식으로 할 경우 내장되어있는 다른 함수와 교체가 쉬워진다.
cost = F.mse_loss(hypothesis, y_train)

### 그렇다면 바꾼 것들을 이용하여 모델을 새로 코딩해보자!

In [9]:
######## W = torch.zeros((3,1), requires_grad = True)
######## b = torch.zeros(1,requires_grad = True)
model = Multi_Linear()


######## optimizer = optim.SGD([W, b], lr = 1e-5)
optimizer = optim.SGD(model.parameters(), lr=1e-5)

epochs = 1000
for epoch in range(1,epochs+1):
    
    ######## hypothesis = x_train.matmul(W)+b
    hypothesis = model(x_train)
    
    ######## cost = torch.mean((y_train-hypothesis)**2)
    cost = F.mse_loss(hypothesis, y_train)
    
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    if epoch %100 ==0:
        print('Epoch {:4d}/{}, hypothesis: {}, Cost:{:.6f}'.format(
        epoch,epochs, hypothesis.squeeze().detach(), cost.item()))

Epoch  100/1000, hypothesis: tensor([154.4220, 182.4948, 181.3660, 198.0656, 137.9721]), Cost:6.899693
Epoch  200/1000, hypothesis: tensor([154.3283, 182.5594, 181.3378, 198.0415, 138.0600]), Cost:6.571748
Epoch  300/1000, hypothesis: tensor([154.2371, 182.6223, 181.3103, 198.0181, 138.1455]), Cost:6.261016
Epoch  400/1000, hypothesis: tensor([154.1485, 182.6835, 181.2836, 197.9951, 138.2288]), Cost:5.966525
Epoch  500/1000, hypothesis: tensor([154.0622, 182.7429, 181.2577, 197.9727, 138.3099]), Cost:5.687458
Epoch  600/1000, hypothesis: tensor([153.9783, 182.8008, 181.2324, 197.9509, 138.3889]), Cost:5.422935
Epoch  700/1000, hypothesis: tensor([153.8967, 182.8571, 181.2079, 197.9296, 138.4659]), Cost:5.172279
Epoch  800/1000, hypothesis: tensor([153.8172, 182.9120, 181.1840, 197.9087, 138.5408]), Cost:4.934695
Epoch  900/1000, hypothesis: tensor([153.7400, 182.9653, 181.1608, 197.8884, 138.6138]), Cost:4.709536
Epoch 1000/1000, hypothesis: tensor([153.6649, 183.0172, 181.1382, 197.86

In [17]:
nb_epochs = 20

for epoch in range(nb_epochs+1):
    
    hypo = model(x_train)
#     hypo = x_train.matmul(W) + b
    cost = F.mse_loss(hypo, y_train)
#     cost = torch.mean((hypo - y_train) ** 2)
    
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    print("Epoch {:4d}/{} hypothesis: {} Cost : {:.6f}".
          format(epoch, nb_epochs, hypo.squeeze().detach(), cost.item()))

Epoch    0/20 hypothesis: tensor([14.7394, 18.4595, 17.8038, 19.7524, 13.9007]) Cost : 24071.289062
Epoch    1/20 hypothesis: tensor([75.3282, 91.2835, 89.5581, 97.8911, 69.4472]) Cost : 7546.069824
Epoch    2/20 hypothesis: tensor([109.2495, 132.0550, 129.7307, 141.6380, 100.5458]) Cost : 2366.290283
Epoch    3/20 hypothesis: tensor([128.2406, 154.8817, 152.2218, 166.1303, 117.9569]) Cost : 742.704346
Epoch    4/20 hypothesis: tensor([138.8729, 167.6615, 164.8137, 179.8425, 127.7049]) Cost : 233.795853
Epoch    5/20 hypothesis: tensor([144.8253, 174.8166, 171.8634, 187.5195, 133.1626]) Cost : 74.279953
Epoch    6/20 hypothesis: tensor([148.1578, 178.8226, 175.8102, 191.8175, 136.2184]) Cost : 24.279486
Epoch    7/20 hypothesis: tensor([150.0233, 181.0656, 178.0198, 194.2237, 137.9293]) Cost : 8.606831
Epoch    8/20 hypothesis: tensor([151.0676, 182.3214, 179.2569, 195.5708, 138.8874]) Cost : 3.693863
Epoch    9/20 hypothesis: tensor([151.6521, 183.0246, 179.9494, 196.3250, 139.4239]) 

In [25]:
from torch.utils.data import Dataset

class CustomDataset(Dataset):
    def __init__(self):
        self.x_data = [[73, 80, 75],
                      [93, 88, 93],
                      [89, 91, 90],
                      [96, 98, 100],
                      [73, 66, 70]]
        self.y_data = [[152], [185], [180], [196], [142]]
        
    def __len__(self):
        return len(self.x_data)
    
    def __getitem__(self, idx):
        x = torch.FloatTensor(self.x_data[idx])
        y = torch.FloatTensor(self.y_data[idx])
        
        return x, y
    
dataset = CustomDataset()

In [22]:
dataset

<__main__.CustomDataset at 0x7f0e340fe810>

In [23]:
from torch.utils.data import DataLoader

dataloader = DataLoader(dataset,
                       batch_size = 2,
                       shuffle = True)

## 함수 다 만들고서 한 모델

In [24]:
nb_epochs = 100

for epoch in range(nb_epochs+1):
    for batch_idx, samples in enumerate(dataloader):
        x_train, y_train = samples
        pred = model(x_train)
        
        cost = F.mse_loss(pred, y_train)
        
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()
    
        print("Epoch {:4d}/{} hypothesis: {} Cost : {:.6f}".
              format(epoch, nb_epochs, pred.squeeze().detach(), cost.item()))

Epoch    0/100 hypothesis: tensor([185.3281, 195.9916]) Cost : 0.053858
Epoch    0/100 hypothesis: tensor([142.7910, 150.4188]) Cost : 1.562913
Epoch    0/100 hypothesis: 180.38885498046875 Cost : 0.151208
Epoch    1/100 hypothesis: tensor([195.8915, 150.4081]) Cost : 1.272889
Epoch    1/100 hypothesis: tensor([185.5946, 143.0548]) Cost : 0.733121
Epoch    1/100 hypothesis: 180.2103729248047 Cost : 0.044257
Epoch    2/100 hypothesis: tensor([195.7915, 142.7082]) Cost : 0.272525
Epoch    2/100 hypothesis: tensor([185.0610, 180.0301]) Cost : 0.002316
Epoch    2/100 hypothesis: 150.24664306640625 Cost : 3.074260
Epoch    3/100 hypothesis: tensor([196.4660, 143.1865]) Cost : 0.812405
Epoch    3/100 hypothesis: tensor([185.4161, 150.5629]) Cost : 1.119143
Epoch    3/100 hypothesis: 180.5736083984375 Cost : 0.329027
Epoch    4/100 hypothesis: tensor([185.3280, 142.8507]) Cost : 0.415632
Epoch    4/100 hypothesis: tensor([180.0540, 150.2871]) Cost : 1.468428
Epoch    4/100 hypothesis: 196.100