In [1]:
# 1. numpy 로 gradient descent 구현

import numpy as np

xy = np.array([
    [1.,2.,3.,4.,5.,6.,], # y = 3x
    [3.,6.,9.,12.,15.,18.],
    ])

x_train = xy[0]
y_train = xy[1]

beta_gd = np.random.rand(1) # y= random한숫자 곱하기 x
bias = np.random.rand(1)  # y= ax +b  

learning_rate = 0.01
n_data = len(x_train)

for i in range(1000):
    hypothesis = x_train * beta_gd + bias
    cost = np.sum( ((hypothesis - y_train)**2)/ n_data)
    
    gradient_w = np.sum((beta_gd * x_train - y_train + bias) * 2 * x_train) / n_data
    gradient_b = np.sum((beta_gd * x_train - y_train + bias) * 2) / n_data
    
    beta_gd -= learning_rate * gradient_w
    bias -= learning_rate * gradient_b
    
    if i % 100 == 0:
        print('Epoch ({:10d}/{:10d}) cost: {:10f}, W: {:10f}, b:{:10f}'.format(i, 1000, cost, float(beta_gd), float(bias)))
        
print('W: {:10f}'.format(float(beta_gd)))
print('b: {:10f}'.format(float(bias)))
print('result : ')
print(x_train * beta_gd + bias)

Epoch (         0/      1000) cost: 102.904395, W:   1.070070, b:  0.681684
Epoch (       100/      1000) cost:   0.107595, W:   2.825976, b:  0.745032
Epoch (       200/      1000) cost:   0.051788, W:   2.879267, b:  0.516883
Epoch (       300/      1000) cost:   0.024926, W:   2.916239, b:  0.358599
Epoch (       400/      1000) cost:   0.011998, W:   2.941889, b:  0.248786
Epoch (       500/      1000) cost:   0.005775, W:   2.959684, b:  0.172601
Epoch (       600/      1000) cost:   0.002779, W:   2.972030, b:  0.119746
Epoch (       700/      1000) cost:   0.001338, W:   2.980595, b:  0.083076
Epoch (       800/      1000) cost:   0.000644, W:   2.986537, b:  0.057636
Epoch (       900/      1000) cost:   0.000310, W:   2.990660, b:  0.039986
W:   2.993496
b:   0.027843
result : 
[ 3.02133943  6.01483589  9.00833234 12.00182879 14.99532524 17.98882169]


In [2]:
# 2. pytorch auto_grad 로 구현
# 참조 사이트 : https://machinelearningmastery.com/implementing-gradient-descent-in-pytorch/
import torch

x = torch.tensor([1.,2.,3.,4.,5.,6.])
y = torch.tensor([3.,6.,9.,12.,15.,18.])

w = torch.rand(1, requires_grad=True) # computation_graph 를 생성하기 위해서
b = torch.rand(1, requires_grad=True) # requires_grad = True 로 설정하고 
learning_rate = 0.01                  # 설정시, 미분값을 자동적으로 저장하게 된다.
                                      # w,b 는 leaf node 이다.              
for epoch in range(1001):
    y_hat = x * w + b
    loss = ((y_hat - y)**2).mean()
    
    w.retain_grad() # retain_grad 를 하지 않을시 non-leaf 들은 .grad 속성이 None으로저장됨
    b.retain_grad()
    loss.backward()
    
    w = w - learning_rate * w.grad
    b = b - learning_rate * b.grad
    if epoch % 100 == 0:
        print(f'Epoch ({str(epoch).rjust(4)}/1000) cost: {round(float(loss),6)}, W: {round(float(w),7)}, b:{round(float(b),7)}')
        
print('W: {:10f}'.format(float(w)))
print('b: {:10f}'.format(float(b)))
print('result : ')
print(x * w + b)    
    

Epoch (   0/1000) cost: 52.476868, W: 1.5300145, b:0.8875664
Epoch ( 100/1000) cost: 0.127115, W: 2.8108475, b:0.8097996
Epoch ( 200/1000) cost: 0.061183, W: 2.8687713, b:0.5618165
Epoch ( 300/1000) cost: 0.029449, W: 2.9089572, b:0.3897727
Epoch ( 400/1000) cost: 0.014174, W: 2.936837, b:0.2704135
Epoch ( 500/1000) cost: 0.006822, W: 2.9561794, b:0.1876054
Epoch ( 600/1000) cost: 0.003284, W: 2.9695985, b:0.1301552
Epoch ( 700/1000) cost: 0.00158, W: 2.9789081, b:0.090298
Epoch ( 800/1000) cost: 0.000761, W: 2.9853673, b:0.0626462
Epoch ( 900/1000) cost: 0.000366, W: 2.9898481, b:0.0434624
Epoch (1000/1000) cost: 0.000176, W: 2.9929569, b:0.030153
W:   2.992957
b:   0.030153
result : 
tensor([ 3.0231,  6.0161,  9.0090, 12.0020, 14.9949, 17.9879],
       grad_fn=<AddBackward0>)


In [3]:
# 3. Neural Network 로 gradient_descent 구현

# 클래스 정의하기
# 신경망 모델을 nn.Module 의 하위클래스로 정의하고,
# __init__ 에서 신경망 계층들을 초기화합니다. 
# nn.Module 을 상속받은 모든 클래스는 forward 메소드에
# 입력 데이터에 대한 연산들을 구현합니다.
import torch
import torch.nn as nn
import torch.optim as optim

# 신경망 정의
class MyNeuralNetwork(nn.Module):
    def __init__(self):
        super(MyNeuralNetwork, self).__init__()
        self.layer=nn.Linear(in_features=1, out_features=1, bias=True)
        
        # 아래 네줄은 써도 상관없고 안써도 상관없다. 같은 결과를 내뱉음 
        # weight = torch.rand(1).view(1,1) # 2d matrix 로 초기화
        # bias = torch.rand(1).view(1,1)
        # self.layer.weight = nn.Parameter(weight)
        # self.layer.bias = nn.Parameter(bias)
    
    def forward(self, input):
        output = self.layer(input)
        return output
    
model = MyNeuralNetwork().to("cpu")

print(f"model : {model}")
print(f"weight : {model.layer.weight}")
print(f"bias : {model.layer.bias}")

input = torch.tensor([1.,2.,3.,4.,5.,6.,]).view(-1,1)
y = torch.tensor([3.,6.,9.,12.,15.,18.]).view(-1,1)


############################## 학습률 -> 0.01
optimizer = optim.SGD(model.parameters(),lr=0.01)

epochs = 1001
loss_fn = nn.MSELoss() # 파이토치 에서 구현되있고 제곱빼는 Mean Squared Error 
for epoch in range(epochs):
    
    optimizer.zero_grad() # 이전epoch시의 미분값 삭제해버림 
    loss = loss_fn(model(input), y)
    loss.backward()  # backward : w, b에 대한 기울기 진행
    
    optimizer.step() # model.parameters() 에서 리턴되는 변수들의 기울기에
                     # 학습률 0.01 을 곱해서 빼준뒤에 업데이트 한다.

    if epoch % 100 == 0:
        print(f'Epoch ({str(epoch).rjust(4)}/1000) cost: {round(float(loss),6)}, W: {round(float(model.layer.weight),7)}, b:{round(float(model.layer.bias),7)}')

print('W: {:10f}'.format(float(model.layer.weight)))
print('b: {:10f}'.format(float(model.layer.bias)))
print('result : ')
print(model(input))    



model : MyNeuralNetwork(
  (layer): Linear(in_features=1, out_features=1, bias=True)
)
weight : Parameter containing:
tensor([[0.3685]], requires_grad=True)
bias : Parameter containing:
tensor([0.0889], requires_grad=True)
Epoch (   0/1000) cost: 103.39817, W: 1.1604834, b:0.2713251
Epoch ( 100/1000) cost: 0.041226, W: 2.8922796, b:0.4611719
Epoch ( 200/1000) cost: 0.019843, W: 2.9252665, b:0.3199482
Epoch ( 300/1000) cost: 0.009551, W: 2.9481521, b:0.2219715
Epoch ( 400/1000) cost: 0.004597, W: 2.9640293, b:0.1539977
Epoch ( 500/1000) cost: 0.002213, W: 2.9750445, b:0.1068392
Epoch ( 600/1000) cost: 0.001065, W: 2.9826868, b:0.0741222
Epoch ( 700/1000) cost: 0.000513, W: 2.9879885, b:0.0514237
Epoch ( 800/1000) cost: 0.000247, W: 2.9916668, b:0.0356764
Epoch ( 900/1000) cost: 0.000119, W: 2.9942186, b:0.0247512
Epoch (1000/1000) cost: 5.7e-05, W: 2.9959891, b:0.0171719
W:   2.995989
b:   0.017172
result : 
tensor([[ 3.0132],
        [ 6.0092],
        [ 9.0051],
        [12.0011],
   