In [1]:
import pandas as pd
import torch
import torch.nn as nn
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

# 데이터 불러오기
df = sns.load_dataset('mpg')

# 결측치 제거
df.dropna(inplace=True)

# 입력 데이터와 타깃 데이터 분리하기
inputs = df.drop(['mpg', 'name'], axis=1)
inputs = pd.get_dummies(inputs)
outputs = df['mpg']

# 데이터를 훈련용과 검증용으로 나누기
X_train, X_test, y_train, y_test = train_test_split(inputs, outputs, test_size=0.2, random_state=42)

X_train.shape

(313, 9)

In [2]:
X_train.values

array([[  6., 225., 110., ...,   0.,   0.,   1.],
       [  4., 140.,  92., ...,   0.,   0.,   1.],
       [  6., 171.,  97., ...,   0.,   0.,   1.],
       ...,
       [  4., 151.,  85., ...,   0.,   0.,   1.],
       [  4.,  98.,  65., ...,   0.,   0.,   1.],
       [  8., 400., 150., ...,   0.,   0.,   1.]])

In [3]:
# 텐서로 변환하기
X_train_tensor = torch.tensor(X_train.values).float()
y_train_tensor = torch.tensor(y_train.values).float()
X_test_tensor = torch.tensor(X_test.values).float()
y_test_tensor = torch.tensor(y_test.values).float()

# 모델 정의하기
class MultivariateLinearRegression(nn.Module):
    def __init__(self, input_size, output_size):
        super(MultivariateLinearRegression, self).__init__()
        self.linear = nn.Linear(input_size, output_size)

    def forward(self, x):
        out = self.linear(x)
        return out

# 모델 생성하기
model = MultivariateLinearRegression(X_train_tensor.shape[1], 1)

# 손실 함수와 최적화 알고리즘 정의하기
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [4]:
# 모델 훈련하기
num_epochs = 5000
for epoch in range(num_epochs):
    # Forward 연산
    y_pred = model(X_train_tensor)
    
    # 손실 계산
    loss = criterion(y_pred.view(-1), y_train_tensor)
    
    # Backward 연산 및 가중치 갱신
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    # 로그 정보 출력
    if (epoch+1) % 1000 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# 모델 평가하기
with torch.no_grad():
    y_pred_test = model(X_test_tensor)
    r2 = r2_score(y_test, y_pred_test.view(-1))
    print(f'R^2 Score: {r2:.4f}')

Epoch [1000/5000], Loss: 41.2031
Epoch [2000/5000], Loss: 15.2679
Epoch [3000/5000], Loss: 12.1595
Epoch [4000/5000], Loss: 11.7283
Epoch [5000/5000], Loss: 11.5145
R^2 Score: 0.7771
