In [1]:
import torch
import pandas as pd
import numpy as np

In [2]:
data = pd.read_csv("stock.csv")
data

Unnamed: 0,Open,High,Low,Volume,Close
0,828.659973,833.450012,828.349976,1247700,831.659973
1,823.020020,828.070007,821.655029,1597800,828.070007
2,819.929993,824.400024,818.979980,1281700,824.159973
3,819.359985,823.000000,818.469971,1304000,818.979980
4,819.000000,823.000000,816.000000,1053600,820.450012
...,...,...,...,...,...
727,599.992707,604.832763,562.192568,147100,567.002574
728,558.712504,568.452595,558.712504,7900,567.162558
729,566.892592,567.002574,556.932537,10800,556.972503
730,561.202549,566.432590,558.672539,41200,559.992565


In [3]:
size = len(data)
size

732

In [4]:
price = data["Close"].values.astype(float)

# 데이터 스케일링

스케일링은 특성들이 서로 다른 범위를 가지고 있을 때, 동일한 범위의 값을 가질 수 있게 스케일링 하는 것을 의미.

>예시
* x1은 0 ~ 1 사이의 값을 가지고
* x2는 1000 ~ 100000 사이의 값을 가지며
* y는 100000 ~ 10000000 사이의 값을 가진다면
>

x1의 값은 y의 결과치에 영향을 주지 못함. 따라서 같은 범위 내의 값들로 변환시킬 필요가 있음.

* 정규화 Normalization
  - 이상치에 민감.
  - 분류보다 회귀에 적합.
  - 최대값 1, 최솟값 0으로 크기를 제한.
  - 즉 값이 0~1 사이의 값이 나온다.

* 표준화 Standardization
  - 이상치에 민감.
  - 회귀보다 분류에 적합.
  - 평균을 0, 분산을 1로 스케일링.
  - 최댓값 및 최솟값의 크기를 제한하지 않음.

In [5]:
from sklearn.preprocessing import MinMaxScaler

# Scaling - 최대 / 최소 설정
scaler_x = MinMaxScaler()
price = scaler_x.fit_transform(price.reshape(-1, 1))

# 훈련 - 학습 데이터 분리

In [6]:
from sklearn.model_selection import train_test_split

X, y = [], []

seq_length = 10

for i in range(len(price) - seq_length):
  X.append(price[i:i+seq_length])
  y.append(price[i+seq_length])

X = np.array(X)
y = np.array(y)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [8]:
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

# LSTM 모델 정의

In [9]:
import torch.nn as nn

# LSTM 모델 정의
class StockPredictor(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers):
        super(StockPredictor, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.linear = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.linear(out[:, -1, :])
        return out


In [10]:
input_dim = 1
hidden_dim = 64
num_layers = 2

model = StockPredictor(input_dim, hidden_dim, num_layers)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# 훈련

In [11]:
num_epochs = 100
for epoch in range(num_epochs):
    outputs = model(X_train)
    optimizer.zero_grad()
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()

    if (epoch+1) % 10 == 0:
        print(f'Epoch: {epoch+1}, Loss: {loss.item()}')


Epoch: 10, Loss: 0.11253084242343903
Epoch: 20, Loss: 0.07794826477766037
Epoch: 30, Loss: 0.05177939310669899
Epoch: 40, Loss: 0.023042693734169006
Epoch: 50, Loss: 0.005160838831216097
Epoch: 60, Loss: 0.006082016509026289
Epoch: 70, Loss: 0.003860107623040676
Epoch: 80, Loss: 0.0035770798567682505
Epoch: 90, Loss: 0.0030869892798364162
Epoch: 100, Loss: 0.0028889260720461607


# 검증 진행

In [12]:
from sklearn.metrics import mean_squared_error

# 테스트
model.eval()
with torch.no_grad():
    test_outputs = model(X_test)
    mse = mean_squared_error(y_test, test_outputs)
    print(f'Mean Squared Error on Test Data: {mse}')

predicted_prices = scaler_x.inverse_transform(test_outputs.numpy())
actual_prices = scaler_x.inverse_transform(y_test.numpy())

data = {
    "Actual": [round(val[0], 3) for val in actual_prices],
    "Predicted": [round(val[0], 3) for val in predicted_prices]
}

result_frame = pd.DataFrame(data)
result_frame["Error"] = result_frame["Actual"] - result_frame["Predicted"]
result_frame

Mean Squared Error on Test Data: 0.003112873062491417


Unnamed: 0,Actual,Predicted,Error
0,645.440002,638.596985,6.843018
1,506.902008,535.690002,-28.787994
2,541.081970,540.307007,0.774963
3,790.799988,794.296021,-3.496033
4,536.512024,543.143005,-6.630981
...,...,...,...
212,718.270020,683.216980,35.053040
213,731.250000,724.952026,6.297974
214,566.072021,564.825989,1.246033
215,501.791992,533.398010,-31.606018
