In [1]:
import torch
import pandas as pd
import numpy as np

In [26]:
data = pd.read_csv("stock.csv")
data

Unnamed: 0,Open,High,Low,Volume,Close
0,828.659973,833.450012,828.349976,1247700,831.659973
1,823.020020,828.070007,821.655029,1597800,828.070007
2,819.929993,824.400024,818.979980,1281700,824.159973
3,819.359985,823.000000,818.469971,1304000,818.979980
4,819.000000,823.000000,816.000000,1053600,820.450012
...,...,...,...,...,...
727,599.992707,604.832763,562.192568,147100,567.002574
728,558.712504,568.452595,558.712504,7900,567.162558
729,566.892592,567.002574,556.932537,10800,556.972503
730,561.202549,566.432590,558.672539,41200,559.992565


In [27]:
size = len(data)
size

732

In [28]:
price = data["Close"].values.astype(float)

# 데이터 스케일링

스케일링은 특성들이 서로 다른 범위를 가지고 있을 때, 동일한 범위의 값을 가질 수 있게 스케일링 하는 것을 의미.

>예시
* x1은 0 ~ 1 사이의 값을 가지고
* x2는 1000 ~ 100000 사이의 값을 가지며
* y는 100000 ~ 10000000 사이의 값을 가진다면
>

x1의 값은 y의 결과치에 영향을 주지 못함. 따라서 같은 범위 내의 값들로 변환시킬 필요가 있음.

* 정규화 Normalization
  - 이상치에 민감.
  - 분류보다 회귀에 적합.
  - 최대값 1, 최솟값 0으로 크기를 제한.
  - 즉 값이 0~1 사이의 값이 나온다.

* 표준화 Standardization
  - 이상치에 민감.
  - 회귀보다 분류에 적합.
  - 평균을 0, 분산을 1로 스케일링.
  - 최댓값 및 최솟값의 크기를 제한하지 않음.

In [64]:
from sklearn.preprocessing import RobustScaler

# Scaling - 로버스틱 방식(이상치 영향 적음)
scaler_x = RobustScaler()
price = scaler_x.fit_transform(price.reshape(-1, 1))

# 훈련 - 학습 데이터 분리

In [65]:
from sklearn.model_selection import train_test_split

X, y = [], []

seq_length = 10

for i in range(len(price) - seq_length):
  X.append(price[i:i+seq_length])
  y.append(price[i+seq_length])

X = np.array(X)
y = np.array(y)

In [66]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [67]:
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

# LSTM 모델 정의

In [68]:
import torch.nn as nn

# LSTM 모델 정의
class StockPredictor(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers):
        super(StockPredictor, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.linear = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.linear(out[:, -1, :])
        return out


In [69]:
input_dim = 1
hidden_dim = 64
num_layers = 2

model = StockPredictor(input_dim, hidden_dim, num_layers)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# 훈련

In [70]:
num_epochs = 100
for epoch in range(num_epochs):
    outputs = model(X_train)
    optimizer.zero_grad()
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()

    if (epoch+1) % 10 == 0:
        print(f'Epoch: {epoch+1}, Loss: {loss.item()}')


Epoch: 10, Loss: 0.1150452196598053
Epoch: 20, Loss: 0.07292639464139938
Epoch: 30, Loss: 0.053822215646505356
Epoch: 40, Loss: 0.030254926532506943
Epoch: 50, Loss: 0.0064279185608029366
Epoch: 60, Loss: 0.005744013004004955
Epoch: 70, Loss: 0.004311870317906141
Epoch: 80, Loss: 0.0037505992222577333
Epoch: 90, Loss: 0.003430562559515238
Epoch: 100, Loss: 0.003161706728860736


# 검증 진행

In [72]:
from sklearn.metrics import mean_squared_error

# 테스트
model.eval()
with torch.no_grad():
    test_outputs = model(X_test)
    mse = mean_squared_error(y_test, test_outputs)
    print(f'Mean Squared Error on Test Data: {mse}')

predicted_prices = scaler_x.inverse_transform(test_outputs.numpy())
actual_prices = scaler_x.inverse_transform(y_test.numpy())

for i in range(len(predicted_prices)):
    print(f"Actual Price: {actual_prices[i][0]:.2f}, Predicted Price: {predicted_prices[i][0]:.2f}")

Mean Squared Error on Test Data: 0.0033427339512854815
Actual Price: -0.11, Predicted Price: -0.16
Actual Price: -0.92, Predicted Price: -0.73
Actual Price: -0.72, Predicted Price: -0.71
Actual Price: 0.74, Predicted Price: 0.76
Actual Price: -0.74, Predicted Price: -0.69
Actual Price: -0.74, Predicted Price: -0.69
Actual Price: 0.70, Predicted Price: 0.62
Actual Price: 0.61, Predicted Price: 0.64
Actual Price: 0.37, Predicted Price: 0.07
Actual Price: -0.52, Predicted Price: -0.63
Actual Price: -0.67, Predicted Price: -0.71
Actual Price: 0.79, Predicted Price: 0.74
Actual Price: -0.61, Predicted Price: -0.21
Actual Price: 0.57, Predicted Price: 0.61
Actual Price: -0.52, Predicted Price: -0.53
Actual Price: -0.68, Predicted Price: -0.67
Actual Price: -0.73, Predicted Price: -0.71
Actual Price: -0.72, Predicted Price: -0.72
Actual Price: 0.16, Predicted Price: 0.23
Actual Price: -0.74, Predicted Price: -0.72
Actual Price: 0.61, Predicted Price: 0.66
Actual Price: 0.65, Predicted Price: 