## Linear Regression
### y = Wx + b

In [1]:
import torch

In [2]:
x_train = torch.FloatTensor([[1], [2], [3]])
y_train = torch.FloatTensor([[2], [4], [6]])

In [3]:
########### 모델 정의 ###########
W = torch.zeros(1, requires_grad=True)
b = torch.zeros(1, requires_grad=True)
hypothesis = x_train * W + b

In [5]:
########### cost (loss) ###########
# e.g., MSE (mean squared error)
cost = torch.mean((hypothesis - y_train) ** 2)

In [6]:
########### optimizer 설정 ###########
optimizer = torch.optim.SGD([W, b], lr=0.01)

########### 아래 셋은 항상 같이 다니는 것! ###########
optimizer.zero_grad() # gradient 초기화
cost.backward() # gradient 계산
optimizer.step() # update

########### 다음을 계산 ###########
# gradient = 2 * torch.mean((W * x_train + b - y_train ) * x_train)
# W -= 0.01 * gradient

In [7]:
print(W, b, x_train * W + b)

tensor([0.1867], requires_grad=True) tensor([0.0800], requires_grad=True) tensor([[0.2667],
        [0.4533],
        [0.6400]], grad_fn=<AddBackward0>)


### Full training

In [8]:
import torch

########### 한번 ###########
# 데이터 정의
# hypothesis (parameters) 정의
# optimizer 정의

x_train = torch.FloatTensor([[1], [2], [3]])
y_train = torch.FloatTensor([[2], [4], [6]])

W = torch.zeros(1, requires_grad=True)
b = torch.zeros(1, requires_grad=True)

optimizer = torch.optim.SGD([W, b], lr=0.01)

In [9]:
########### 반복 ###########
# hypothesis 계산 (예측)
# cost 계산
# optimizer 로 학습

n_epoch = 1000
for epoch in range(n_epoch):
    hypothesis = x_train * W + b
    cost = torch.mean((hypothesis - y_train) ** 2)

    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

In [10]:
print(W, b, x_train * W + b)

tensor([1.9708], requires_grad=True) tensor([0.0664], requires_grad=True) tensor([[2.0372],
        [4.0080],
        [5.9788]], grad_fn=<AddBackward0>)


### Multivariate LR

In [46]:
x_train = torch.FloatTensor([[73, 80, 75],
                             [93, 88, 93],
                             [89, 91, 90],
                             [96, 98, 100],
                             [73, 66, 70],])
y_train = torch.FloatTensor([152, 185, 180, 196, 142])

W = torch.zeros(3, requires_grad=True)
b = torch.zeros(1, requires_grad=True)

optimizer = torch.optim.SGD([W, b], lr=1e-5) # 0.00001

print(x_train.shape)
print(W.shape)
print(x_train.matmul(W).shape)

In [48]:
n_epoch = 20
for epoch in range(n_epoch):
    hypothesis = x_train.matmul(W) + b
    cost = torch.mean((hypothesis - y_train) ** 2)
    
    print(hypothesis.squeeze().detach(), cost.item())

    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

tensor([0., 0., 0., 0., 0.]) 29661.80078125
tensor([67.2578, 80.8397, 79.6523, 86.7394, 61.6605]) 9298.5205078125
tensor([104.9128, 126.0990, 124.2466, 135.3015,  96.1821]) 2915.71240234375
tensor([125.9942, 151.4381, 149.2133, 162.4896, 115.5097]) 915.04052734375
tensor([137.7967, 165.6247, 163.1911, 177.7112, 126.3307]) 287.93609619140625
tensor([144.4044, 173.5674, 171.0168, 186.2332, 132.3891]) 91.3710708618164
tensor([148.1035, 178.0143, 175.3980, 191.0042, 135.7812]) 29.758249282836914
tensor([150.1744, 180.5042, 177.8509, 193.6753, 137.6805]) 10.445266723632812
tensor([151.3336, 181.8983, 179.2240, 195.1707, 138.7440]) 4.391237258911133
tensor([151.9824, 182.6789, 179.9928, 196.0079, 139.3396]) 2.493121385574341
tensor([152.3454, 183.1161, 180.4231, 196.4765, 139.6732]) 1.8976876735687256
tensor([152.5485, 183.3609, 180.6640, 196.7389, 139.8602]) 1.7105515003204346
tensor([152.6620, 183.4982, 180.7988, 196.8857, 139.9651]) 1.6514164209365845
tensor([152.7253, 183.5752, 180.8742,

In [37]:
x_train = torch.FloatTensor([[73, 80, 75],
                             [93, 88, 93],
                             [89, 91, 90],
                             [96, 98, 100],
                             [73, 66, 70],])
y_train = torch.FloatTensor([[152], [185], [180], [196], [142]])

W = torch.zeros((3,1), requires_grad=True) # W = torch.zeros(3, requires_grad=True)
b = torch.zeros(1, requires_grad=True)

optimizer = torch.optim.SGD([W, b], lr=1e-5) # 0.00001

print(x_train.shape)
print(W.shape)
print(x_train.matmul(W).shape)

In [44]:
n_epoch = 20
for epoch in range(n_epoch):
    hypothesis = x_train.matmul(W) + b
    cost = torch.mean((hypothesis - y_train) ** 2)
    
    print(hypothesis.squeeze().detach(), cost.item())

    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

tensor([152.7948, 183.6807, 180.9668, 197.0694, 140.1094]) 1.6049751043319702
tensor([152.7944, 183.6810, 180.9666, 197.0693, 140.1098]) 1.6042795181274414
tensor([152.7939, 183.6814, 180.9665, 197.0692, 140.1102]) 1.603571891784668
tensor([152.7935, 183.6817, 180.9664, 197.0691, 140.1106]) 1.602870225906372
tensor([152.7931, 183.6819, 180.9663, 197.0690, 140.1110]) 1.602174162864685
tensor([152.7926, 183.6822, 180.9661, 197.0688, 140.1114]) 1.6014728546142578
tensor([152.7922, 183.6825, 180.9660, 197.0687, 140.1118]) 1.60076105594635
tensor([152.7918, 183.6828, 180.9659, 197.0686, 140.1122]) 1.6000869274139404
tensor([152.7913, 183.6831, 180.9657, 197.0685, 140.1126]) 1.599369764328003
tensor([152.7909, 183.6834, 180.9656, 197.0684, 140.1130]) 1.5986852645874023
tensor([152.7905, 183.6837, 180.9655, 197.0683, 140.1134]) 1.5979793071746826
tensor([152.7901, 183.6840, 180.9653, 197.0682, 140.1138]) 1.5972920656204224
tensor([152.7896, 183.6843, 180.9652, 197.0681, 140.1143]) 1.596589565

### Cautions for Broadcasting

In [53]:
x_train = torch.FloatTensor([[73, 80, 75],
                             [93, 88, 93],
                             [89, 91, 90],
                             [96, 98, 100],
                             [73, 66, 70],])
y_train = torch.FloatTensor([[152], [185], [180], [196], [142]])

# W = torch.zeros((3,1), requires_grad=True) 
W = torch.zeros(3, requires_grad=True)
b = torch.zeros(1, requires_grad=True)

prediction = x_train.matmul(W) + b
diff = prediction - y_train

print(x_train.shape)
print(W.shape)
print(y_train.shape)
print(prediction.shape)
print(diff.shape) # broadcasting is not error for code..

torch.Size([5, 3])
torch.Size([3])
torch.Size([5, 1])
torch.Size([5])
torch.Size([5, 5])


## nn.Module & pre-defined loss functions

In [54]:
import torch.nn as nn

In [55]:
class MLR(nn.Module): # nn.Module 상속
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(3, 1) # (input, output)
        
    def forward(self, x):
        return self.linear(x)

In [56]:
import torch.nn.functional as F

In [62]:
x_train = torch.FloatTensor([[73, 80, 75],
                             [93, 88, 93],
                             [89, 91, 90],
                             [96, 98, 100],
                             [73, 66, 70],])
y_train = torch.FloatTensor([[152], [185], [180], [196], [142]])

model = MLR()

In [64]:
for param in model.parameters():
    print(param.data)
    
optimizer = torch.optim.SGD(model.parameters(), lr=1e-5) # 0.00001

tensor([[0.4473, 0.4564, 0.2259]])
tensor([0.1456])


In [65]:
print(prediction.shape)

torch.Size([5, 1])


In [61]:
n_epoch = 20
for epoch in range(n_epoch):
    prediction = model(x_train)
    cost = F.mse_loss(prediction, y_train)
    
    print(prediction.squeeze().detach(), cost.item())

    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

tensor([32.9862, 34.3992, 36.5326, 40.4243, 24.4337]) 19090.681640625
tensor([ 86.9277,  99.2354, 100.4156, 109.9913,  73.8879]) 5992.08056640625
tensor([117.1271, 135.5352, 136.1811, 148.9391, 101.5760]) 1886.3616943359375
tensor([134.0340, 155.8585, 156.2047, 170.7445, 117.0781]) 599.4320678710938
tensor([143.4991, 167.2371, 167.4151, 182.9524, 125.7577]) 196.0438995361328
tensor([148.7976, 173.6080, 173.6912, 189.7870, 130.6176]) 69.59921264648438
tensor([151.7635, 177.1752, 177.2047, 193.6132, 133.3389]) 29.9615478515625
tensor([153.4235, 179.1727, 179.1717, 195.7553, 134.8631]) 17.53302001953125
tensor([154.3523, 180.2915, 180.2728, 196.9544, 135.7169]) 13.633204460144043
tensor([154.8717, 180.9182, 180.8890, 197.6256, 136.1954]) 12.406759262084961
tensor([155.1619, 181.2695, 181.2339, 198.0013, 136.4639]) 12.018228530883789
tensor([155.3239, 181.4665, 181.4268, 198.2115, 136.6147]) 11.892332077026367
tensor([155.4140, 181.5772, 181.5346, 198.3290, 136.6996]) 11.848773002624512
te