In [8]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler

In [11]:
import numpy as np
data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep="\\s+", skiprows=22, header=None)
data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2,:2]])
target = raw_df.values[1::2,2]

In [12]:
feature_names = ['CRIM','ZN','INDUS','CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD','TAX','PTRATIO', 'B','LSTAT']
df = pd.DataFrame(data, columns=feature_names)
df["TARGET"] = target
df.tail()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,TARGET
501,0.06263,0.0,11.93,0.0,0.573,6.593,69.1,2.4786,1.0,273.0,21.0,391.99,9.67,22.4
502,0.04527,0.0,11.93,0.0,0.573,6.12,76.7,2.2875,1.0,273.0,21.0,396.9,9.08,20.6
503,0.06076,0.0,11.93,0.0,0.573,6.976,91.0,2.1675,1.0,273.0,21.0,396.9,5.64,23.9
504,0.10959,0.0,11.93,0.0,0.573,6.794,89.3,2.3889,1.0,273.0,21.0,393.45,6.48,22.0
505,0.04741,0.0,11.93,0.0,0.573,6.03,80.8,2.505,1.0,273.0,21.0,396.9,7.88,11.9


In [13]:
scaler = StandardScaler()
scaler.fit(df.values[:,:-1])
df.values[:,:-1] = scaler.transform(df.values[:,:-1]).round(4)

df.tail()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,TARGET
501,0.06263,0.0,11.93,0.0,0.573,6.593,69.1,2.4786,1.0,273.0,21.0,391.99,9.67,22.4
502,0.04527,0.0,11.93,0.0,0.573,6.12,76.7,2.2875,1.0,273.0,21.0,396.9,9.08,20.6
503,0.06076,0.0,11.93,0.0,0.573,6.976,91.0,2.1675,1.0,273.0,21.0,396.9,5.64,23.9
504,0.10959,0.0,11.93,0.0,0.573,6.794,89.3,2.3889,1.0,273.0,21.0,393.45,6.48,22.0
505,0.04741,0.0,11.93,0.0,0.573,6.03,80.8,2.505,1.0,273.0,21.0,396.9,7.88,11.9


In [14]:
# 학습코드 구현
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [15]:
data = torch.from_numpy(df.values).float()

x = data[:, -1:]
y = data[:, :-1]

In [16]:
# 하이퍼 파라미터 설정
n_epochs = 200000
learning_rate = 1e-4
print_interval = 10000

In [20]:
# 심층신경망 정의
# 4개의 선형계층과 비선형 함수를 갖도록 정의
# 비선형 함수는 ReLU 사용
# 각 선형계층들은 각각 다른 가중치 파라미터를 가지므로 다른 객체로 선언
# 비선형 함수는 학습되는 파라미터를 가지지 않으므로 모든 계층에서 동일하게 동작하기 때문에 재활용
# x라는 샘플 개수 곱하기 입력 차원(batch_size, input_size) 크기의 2차원 텐서가 주어지면 최종적으로 샘플 개수 곱하기 출력 차원(batch_size, output_dim) 크기의 2차원 텐서를 뱉어냄
# 마지막 계층에는 활성함수(비선형 함수)를 씌우지 않음

class MyModel(nn.Module):
    def __init__(self,input_dim,output_dim):
        self.input_dim = input_dim
        self.output_dim = output_dim

        super().__init__()

        # 4개의 선형계층 구성 (각각 다른 객체로 구성)
        self.linear1 = nn.Linear(input_dim,3)
        self.linear2 = nn.Linear(3,3)
        self.linear3 = nn.Linear(3,3)
        self.linear4 = nn.Linear(3,output_dim)
        self.act = nn.ReLU()

    def forward(self,x):
        h = self.act(sl.linear1(x))
        h = self.act(slef.linear2(x))
        h = self.act(slef.linear3(x))
        y = self.linear(4) # 일반 선형회귀
       
        return y        

In [21]:
model = MyModel(x.size(-1),y.size(-1))
print(model)

MyModel(
  (linear1): Linear(in_features=1, out_features=3, bias=True)
  (linear2): Linear(in_features=3, out_features=3, bias=True)
  (linear3): Linear(in_features=3, out_features=3, bias=True)
  (linear4): Linear(in_features=3, out_features=13, bias=True)
  (act): ReLU()
)


In [24]:
# 입력 텐서를 받아 순차적으로 앞으로 하나씩 계산해 나가는 것에 불과한 아주 간단한 모델 구조
# 위에서 정의한 MyModel과 같은 구조를 nn.Sequential 클래스를 활용하여 똑같이 정의 가능
# 위의 모델과 차이점 : 리키렐루 사용, 하나의 리키렐루를 재활용 하지 않고 매번 새로운 객체를 넣어줌

model = nn.Sequential(
    nn.Linear(x.size(-1),3),
    nn.LeakyReLU(),
    nn.Linear(3,3),
    nn.LeakyReLU(),
    nn.Linear(3,3),
    nn.LeakyReLU(),
    nn.Linear(3,y.size(-1)),
)

print(model)

Sequential(
  (0): Linear(in_features=1, out_features=3, bias=True)
  (1): LeakyReLU(negative_slope=0.01)
  (2): Linear(in_features=3, out_features=3, bias=True)
  (3): LeakyReLU(negative_slope=0.01)
  (4): Linear(in_features=3, out_features=3, bias=True)
  (5): LeakyReLU(negative_slope=0.01)
  (6): Linear(in_features=3, out_features=13, bias=True)
)


In [26]:
# 모델의 가중치 파라미터들을 옵티마이저에 등록
optimizer = optim.SGD(model.parameters(),
                      lr = learning_rate)

In [29]:
for i in range(n_epochs):
    y_hat = model(x)
    loss = F.mse_loss(y_hat,y)

    optimizer.zero_grad()
    loss.backward()

    optimizer.step()

    if (i+1) % print_interval == 0:
        print('Epoch %d: loss=%.4e' % (i+1, loss))

Consider using tensor.detach() first. (Triggered internally at /Users/runner/work/pytorch/pytorch/pytorch/torch/csrc/autograd/generated/python_variable_methods.cpp:836.)
  print('Epoch %d: loss=%.4e' % (i+1, loss))


Epoch 10000: loss=2.7492e+03
Epoch 20000: loss=2.7329e+03
Epoch 30000: loss=2.7253e+03
Epoch 40000: loss=2.7238e+03
Epoch 50000: loss=2.7219e+03
Epoch 60000: loss=2.7166e+03
Epoch 70000: loss=2.7115e+03
Epoch 80000: loss=2.7101e+03
Epoch 90000: loss=2.7084e+03
Epoch 100000: loss=1.9939e+03
Epoch 110000: loss=1.9161e+03
Epoch 120000: loss=1.9014e+03
Epoch 130000: loss=1.8959e+03
Epoch 140000: loss=1.8946e+03
Epoch 150000: loss=1.8940e+03
Epoch 160000: loss=1.8936e+03
Epoch 170000: loss=1.8934e+03
Epoch 180000: loss=1.8929e+03
Epoch 190000: loss=1.8927e+03
Epoch 200000: loss=1.8926e+03
