# 선형 회귀 모델의 학습에서 다양한 옵티마이저를 적용해보기

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

from sklearn.datasets import load_boston
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

### Boston data loader 

In [2]:
boston_dataset = load_boston()
x_data = boston_dataset.data   # 학습 데이터 
y_data = boston_dataset.target # 라벨 데이터 

# 데이터 스케일 
scaler = StandardScaler()
x_data = scaler.fit_transform(x_data)

# 데이터 분할 
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.1, random_state=42)
print("x_train len >> ", len(x_train))
print("x_test len >> ", len(x_test))
print("y_train len >> ", len(y_train))
print("y_test len >> ", len(y_test))
print(x_train)

x_train len >>  455
x_test len >>  51
y_train len >>  455
y_test len >>  51
[[-0.41620579  0.370669   -1.13908197 ... -1.64394538  0.38984838
  -1.13023008]
 [-0.2770909  -0.48772236 -0.43725801 ...  1.17646583  0.44105193
   0.85042548]
 [-0.40721363 -0.48772236 -0.37597609 ...  1.13022958  0.42657898
  -0.69849483]
 ...
 [-0.41877066  2.94584308 -1.3316823  ... -0.67298414  0.37570436
  -0.93398678]
 [ 0.87825441 -0.48772236  1.01599907 ...  0.80657583 -2.70626713
   1.48821619]
 [-0.39389588 -0.48772236 -0.37597609 ...  1.13022958 -3.13442533
  -0.28358043]]



    The Boston housing prices dataset has an ethical problem. You can refer to
    the documentation of this function for further details.

    The scikit-learn maintainers therefore strongly discourage the use of this
    dataset unless the purpose of the code is to study and educate about
    ethical issues in data science and machine learning.

    In this special case, you can fetch the dataset from the original
    source::

        import pandas as pd
        import numpy as np


        data_url = "http://lib.stat.cmu.edu/datasets/boston"
        raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
        data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
        target = raw_df.values[1::2, 2]

    Alternative datasets include the California housing dataset (i.e.
    :func:`~sklearn.datasets.fetch_california_housing`) and the Ames housing
    dataset. You can load the datasets as follows::

        from sklearn.datasets import fetch_california_h

### 모델 생성 및 하이퍼파라미터 설정

In [3]:
# 하이퍼 파라미터 설정 
input_dim = x_data.shape[1] # 13
output_dim = 1
lr = 0.000005
epochs = 2000

# 모델 생성 
model = nn.Linear(input_dim, output_dim)

### 다양한 옵티마이저 설정 


In [4]:
optimizers = {"SGD" : optim.SGD(model.parameters(), lr=lr),
              "Momentum" : optim.SGD(model.parameters(), lr=lr, momentum=0.9),
              "Adagrad" : optim.Adagrad(model.parameters(), lr=lr),
              "RMSprop" : optim.RMSprop(model.parameters(), lr=lr),
              "Adam" : optim.Adam(model.parameters(), lr=lr)}

### 모델 학습

In [5]:
for optimizer_name, optimizer in optimizers.items() : 
    print(optimizer_name, optimizer)
    
    
#     criterion = nn.MSELoss()
#     optimizer.zero_grad()
    
#     for epoch in range(epochs) : 
#         inputs = torch.tensor(x_train, dtype=torch.float32)
#         labels = torch.tensor(y_train, dtype=torch.float32) 
# #         print(inputs)
# #         print(labels)
        
#         # Forward pass
#         outputs = model(inputs)
#         loss = criterion(outputs, labels)
        
#         # Backward and optimize 
#         loss.backward()
#         optimizer.step()
        
#         # print progress 
#         if (epoch + 1) % 100 == 0 :
#             print(f"{optimizer_name} - EPOCH [{epoch+1}/{epochs}, Loss : {loss.item():.4f}]")
        

SGD SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    lr: 5e-06
    maximize: False
    momentum: 0
    nesterov: False
    weight_decay: 0
)
Momentum SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    lr: 5e-06
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 0
)
Adagrad Adagrad (
Parameter Group 0
    eps: 1e-10
    foreach: None
    initial_accumulator_value: 0
    lr: 5e-06
    lr_decay: 0
    maximize: False
    weight_decay: 0
)
RMSprop RMSprop (
Parameter Group 0
    alpha: 0.99
    centered: False
    differentiable: False
    eps: 1e-08
    foreach: None
    lr: 5e-06
    maximize: False
    momentum: 0
    weight_decay: 0
)
Adam Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: False
    lr: 5e-06
    maximize: False
    weight_decay: 0
)
