In [6]:
# 1. numpy 로 gradient descent 구현

import numpy as np

xy = np.array([
    [1.,2.,3.,4.,5.,6.,],
    [3.,6.,9.,12.,15.,18.],
    ])

x_train = xy[0]
y_train = xy[1]

beta_gd = np.random.rand(1)
bias = np.random.rand(1)

learning_rate = 0.01
n_data = len(x_train)

for i in range(1000):
    hypothesis = x_train * beta_gd + bias
    cost = np.sum( ((hypothesis - y_train)**2)/ n_data)
    
    gradient_w = np.sum((beta_gd * x_train - y_train + bias) * 2 * x_train) / n_data
    gradient_b = np.sum((beta_gd * x_train - y_train + bias) * 2) / n_data
    
    beta_gd -= learning_rate * gradient_w
    bias -= learning_rate * gradient_b
    
    if i % 100 == 0:
        print('Epoch ({:10d}/{:10d}) cost: {:10f}, W: {:10f}, b:{:10f}'.format(i, 1000, cost, float(beta_gd), float(bias)))
        
print('W: {:10f}'.format(float(beta_gd)))
print('b: {:10f}'.format(float(bias)))
print('result : ')
print(x_train * beta_gd + bias)

Epoch (         0/      1000) cost: 127.179154, W:   0.881821, b:  0.639048
Epoch (       100/      1000) cost:   0.107849, W:   2.825771, b:  0.745910
Epoch (       200/      1000) cost:   0.051910, W:   2.879124, b:  0.517492
Epoch (       300/      1000) cost:   0.024985, W:   2.916140, b:  0.359022
Epoch (       400/      1000) cost:   0.012026, W:   2.941820, b:  0.249079
Epoch (       500/      1000) cost:   0.005788, W:   2.959636, b:  0.172804
Epoch (       600/      1000) cost:   0.002786, W:   2.971997, b:  0.119887
Epoch (       700/      1000) cost:   0.001341, W:   2.980572, b:  0.083174
Epoch (       800/      1000) cost:   0.000645, W:   2.986522, b:  0.057704
Epoch (       900/      1000) cost:   0.000311, W:   2.990649, b:  0.040033
W:   2.993489
b:   0.027876
result : 
[ 3.02136459  6.01485338  9.00834216 12.00183094 14.99531973 17.98880851]


In [2]:
# 2. pytorch auto_grad 로 구현
# 참조 사이트 : https://machinelearningmastery.com/implementing-gradient-descent-in-pytorch/
import torch

x = torch.tensor([1.,2.,3.,4.,5.,6.])
y = torch.tensor([3.,6.,9.,12.,15.,18.])

w = torch.rand(1, requires_grad=True) # computation_graph 를 생성하기 위해서
b = torch.rand(1, requires_grad=True) # requires_grad = True 로 설정하고 
learning_rate = 0.01                  # 설정시, 미분값을 자동적으로 저장하게 된다.
                                      # w,b 는 leaf node 이다.              
for epoch in range(1001):
    y_hat = x * w + b
    loss = ((y_hat - y)**2).mean()
    
    w.retain_grad() # retain_grad 를 하지 않을시 non-leaf 들은 .grad 속성이 None으로저장됨
    b.retain_grad()
    loss.backward()
    
    w = w - learning_rate * w.grad
    b = b - learning_rate * b.grad
    if epoch % 100 == 0:
        print(f'Epoch ({str(epoch).rjust(4)}/1000) cost: {round(float(loss),6)}, W: {round(float(w),7)}, b:{round(float(b),7)}')
        
print('W: {:10f}'.format(float(w)))
print('b: {:10f}'.format(float(b)))
print('result : ')
print(x * w + b)    
    

Epoch (   0/1000) cost: 86.153435, W: 1.127606, b:1.0885795
Epoch ( 100/1000) cost: 0.195346, W: 2.7655146, b:1.0038788
Epoch ( 200/1000) cost: 0.094024, W: 2.8373203, b:0.6964635
Epoch ( 300/1000) cost: 0.045256, W: 2.8871374, b:0.4831872
Epoch ( 400/1000) cost: 0.021782, W: 2.921699, b:0.335222
Epoch ( 500/1000) cost: 0.010484, W: 2.945677, b:0.2325676
Epoch ( 600/1000) cost: 0.005046, W: 2.962312, b:0.1613492
Epoch ( 700/1000) cost: 0.002429, W: 2.9738533, b:0.1119396
Epoch ( 800/1000) cost: 0.001169, W: 2.9818602, b:0.0776607
Epoch ( 900/1000) cost: 0.000563, W: 2.9874151, b:0.0538786
Epoch (1000/1000) cost: 0.000271, W: 2.9912691, b:0.0373794
W:   2.991269
b:   0.037379
result : 
tensor([ 3.0286,  6.0199,  9.0112, 12.0025, 14.9937, 17.9850],
       grad_fn=<AddBackward0>)


In [27]:
# 3. Neural Network 로 gradient_descent 구현

# 클래스 정의하기
# 신경망 모델을 nn.Module 의 하위클래스로 정의하고,
# __init__ 에서 신경망 계층들을 초기화합니다. 
# nn.Module 을 상속받은 모든 클래스는 forward 메소드에
# 입력 데이터에 대한 연산들을 구현합니다.
import torch
import torch.nn as nn
import torch.optim as optim

# 신경망 정의
class MyNeuralNetwork(nn.Module):
    def __init__(self):
        super(MyNeuralNetwork, self).__init__()
        self.layer=nn.Linear(in_features=1, out_features=1, bias=True)
        
        # 아래 네줄은 써도 상관없고 안써도 상관없다. 같은 결과를 내뱉음 
        # weight = torch.rand(1).view(1,1) # 2d matrix 로 초기화
        # bias = torch.rand(1).view(1,1)
        # self.layer.weight = nn.Parameter(weight)
        # self.layer.bias = nn.Parameter(bias)
    
    def forward(self, input):
        output = self.layer(input)
        return output
    
model = MyNeuralNetwork().to("cpu")

print(f"model : {model}")
print(f"weight : {model.layer.weight}")
print(f"bias : {model.layer.bias}")

input = torch.tensor([1.,2.,3.,4.,5.,6.,]).view(-1,1)
y = torch.tensor([3.,6.,9.,12.,15.,18.]).view(-1,1)


optimizer = optim.SGD(model.parameters(),lr=0.01)

epochs = 1001
loss_fn = nn.MSELoss()
for epoch in range(epochs):
    
    optimizer.zero_grad()
    loss = loss_fn(model(input), y)
    loss.backward()  # backward : w, b에 대한 기울기 진행
    optimizer.step() # model.parameters() 에서 리턴되는 변수들의 기울기에
                     # 학습률 0.01 을 곱해서 빼준뒤에 업데이트 한다.

    if epoch % 100 == 0:
        print(f'Epoch ({str(epoch).rjust(4)}/1000) cost: {round(float(loss),6)}, W: {round(float(model.layer.weight),7)}, b:{round(float(model.layer.bias),7)}')

print('W: {:10f}'.format(float(model.layer.weight)))
print('b: {:10f}'.format(float(model.layer.bias)))
print('result : ')
print(model(input))    



model : MyNeuralNetwork(
  (layer): Linear(in_features=1, out_features=1, bias=True)
)
weight : Parameter containing:
tensor([[0.6738]], requires_grad=True)
bias : Parameter containing:
tensor([-0.4946], requires_grad=True)
Epoch (   0/1000) cost: 90.370064, W: 1.4140239, b:-0.3218891
Epoch ( 100/1000) cost: 0.000198, W: 2.9925377, b:0.0319481
Epoch ( 200/1000) cost: 9.5e-05, W: 2.9948227, b:0.0221648
Epoch ( 300/1000) cost: 4.6e-05, W: 2.996408, b:0.0153775
Epoch ( 400/1000) cost: 2.2e-05, W: 2.997508, b:0.0106685
Epoch ( 500/1000) cost: 1.1e-05, W: 2.998271, b:0.0074019
Epoch ( 600/1000) cost: 5e-06, W: 2.9988005, b:0.0051356
Epoch ( 700/1000) cost: 2e-06, W: 2.9991677, b:0.0035634
Epoch ( 800/1000) cost: 1e-06, W: 2.9994223, b:0.0024726
Epoch ( 900/1000) cost: 1e-06, W: 2.999599, b:0.0017157
Epoch (1000/1000) cost: 0.0, W: 2.9997218, b:0.0011905
W:   2.999722
b:   0.001190
result : 
tensor([[ 3.0009],
        [ 6.0006],
        [ 9.0004],
        [12.0001],
        [14.9998],
      