In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

sns.set(style="whitegrid")
%matplotlib inline

In [2]:
df = sns.load_dataset("diamonds")
df

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
0,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43
1,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31
2,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31
3,0.29,Premium,I,VS2,62.4,58.0,334,4.20,4.23,2.63
4,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75
...,...,...,...,...,...,...,...,...,...,...
53935,0.72,Ideal,D,SI1,60.8,57.0,2757,5.75,5.76,3.50
53936,0.72,Good,D,SI1,63.1,55.0,2757,5.69,5.75,3.61
53937,0.70,Very Good,D,SI1,62.8,60.0,2757,5.66,5.68,3.56
53938,0.86,Premium,H,SI2,61.0,58.0,2757,6.15,6.12,3.74


In [3]:
label_name = "price"

In [4]:
X_raw = df.drop(label_name, axis=1)
y = df[label_name]

X_raw.shape, y.shape

((53940, 9), (53940,))

In [5]:
X = pd.get_dummies(X_raw)
X

Unnamed: 0,carat,depth,table,x,y,z,cut_Ideal,cut_Premium,cut_Very Good,cut_Good,...,color_I,color_J,clarity_IF,clarity_VVS1,clarity_VVS2,clarity_VS1,clarity_VS2,clarity_SI1,clarity_SI2,clarity_I1
0,0.23,61.5,55.0,3.95,3.98,2.43,1,0,0,0,...,0,0,0,0,0,0,0,0,1,0
1,0.21,59.8,61.0,3.89,3.84,2.31,0,1,0,0,...,0,0,0,0,0,0,0,1,0,0
2,0.23,56.9,65.0,4.05,4.07,2.31,0,0,0,1,...,0,0,0,0,0,1,0,0,0,0
3,0.29,62.4,58.0,4.20,4.23,2.63,0,1,0,0,...,1,0,0,0,0,0,1,0,0,0
4,0.31,63.3,58.0,4.34,4.35,2.75,0,0,0,1,...,0,1,0,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53935,0.72,60.8,57.0,5.75,5.76,3.50,1,0,0,0,...,0,0,0,0,0,0,0,1,0,0
53936,0.72,63.1,55.0,5.69,5.75,3.61,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0
53937,0.70,62.8,60.0,5.66,5.68,3.56,0,0,1,0,...,0,0,0,0,0,0,0,1,0,0
53938,0.86,61.0,58.0,6.15,6.12,3.74,0,1,0,0,...,0,0,0,0,0,0,0,0,1,0


In [6]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim

scaler = StandardScaler()
X = scaler.fit_transform(X)

# train, test 데이터로 나누기
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# PyTorch 모델을 위한 데이터 변환
X_train = torch.FloatTensor(X_train)
y_train = torch.FloatTensor(y_train.values.reshape(-1, 1))
X_test = torch.FloatTensor(X_test)
y_test = torch.FloatTensor(y_test.values.reshape(-1, 1))


X_train.shape, y_train.shape, X_test.shape, y_test.shape

(torch.Size([43152, 26]),
 torch.Size([43152, 1]),
 torch.Size([10788, 26]),
 torch.Size([10788, 1]))

* [예제로 배우는 파이토치(PyTorch) — 파이토치 한국어 튜토리얼 (PyTorch tutorials in Korean)](https://tutorials.pytorch.kr/beginner/pytorch_with_examples.html)
* [Linear — PyTorch documentation](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html)

In [7]:
# Multivariate Linear Regression 모델 구현
class MultivariateLinearRegression(nn.Module):
    def __init__(self, input_size, output_size):
        super(MultivariateLinearRegression, self).__init__()
        self.linear = nn.Linear(input_size, output_size)

    def forward(self, x):
        out = self.linear(x)
        return out

# 모델 인스턴스 생성
model = MultivariateLinearRegression(input_size=X_train.shape[1], output_size=1)

# 손실 함수 및 최적화 알고리즘 설정
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# 모델 학습하기
num_epochs = 1000
for epoch in range(num_epochs):
    # 순전파 계산
    y_pred = model(X_train)

    # 손실 계산
    loss = criterion(y_pred, y_train)

    # 역전파 계산 및 가중치 갱신
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # 100 epoch마다 로그 출력
    if (epoch+1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}]')

# 테스트 데이터에 대한 예측 결과 출력
with torch.no_grad():
    y_pred_test = model(X_test)
    print(f'Test Loss: {criterion(y_pred_test, y_test).item():.4f}')

Epoch [100/1000], Loss: 2440188.0000]
Epoch [200/1000], Loss: 1959769.7500]
Epoch [300/1000], Loss: 1842775.3750]
Epoch [400/1000], Loss: 1752777.7500]
Epoch [500/1000], Loss: 1678621.7500]
Epoch [600/1000], Loss: 1617262.7500]
Epoch [700/1000], Loss: 1566396.1250]
Epoch [800/1000], Loss: 1524144.5000]
Epoch [900/1000], Loss: 1488972.7500]
Epoch [1000/1000], Loss: 1459623.1250]
Test Loss: 1458685.0000


In [8]:
y_test.shape, y_pred_test.shape

(torch.Size([10788, 1]), torch.Size([10788, 1]))

In [9]:
from sklearn.metrics import r2_score

r2_score(y_test, y_pred_test)

0.9082404672840824