In [None]:
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim

In [None]:
df = pd.read_csv('./total_data.csv', encoding = 'cp949')
df.head()
df = df[df['종목명'] == 'KB금융']

In [None]:
df_np = df['종가'].values
print(len(df_np))

987


In [50]:
# data split
# input_dim = 50
# forecast_dim = 3

window_size = 50
horizon_size = 3
lr = 0.001
num_epochs = 2000

In [None]:
from sklearn.preprocessing import MinMaxScaler
train_x = []
train_y = []

mm = MinMaxScaler()
fitted = mm.fit(df_np.reshape(-1,1))
out = mm.transform(df_np.reshape(-1,1))
df_np = out.reshape(-1)
for i in range(len(df_np) - window_size - horizon_size):
  train_x.append(df_np[i:i+window_size]) # len == window_size
  train_y.append(df_np[i+window_size: i+window_size + horizon_size]) # len == horizon_size

x_tensor = torch.FloatTensor(train_x)
y_tensor = torch.FloatTensor(train_y)
print(f'src shape : {x_tensor.shape}')
print(f'label shape : {y_tensor.shape}')

src shape : torch.Size([934, 50])
label shape : torch.Size([934, 3])


In [None]:
class Transformer(nn.Module):
  def __init__(self,d_model, n_head, num_enc):
    super(Transformer, self).__init__()

    # encoder layer parameter
    self.d_model = d_model
    self.n_head = n_head
    self.num_enc = num_enc

    self.encoderBlock = nn.TransformerEncoderLayer(
      d_model = self.d_model,
      nhead = self.n_head,
      batch_first = True
    )

    self.encoder = nn.TransformerEncoder(
        encoder_layer = self.encoderBlock,
        num_layers = self.num_enc
    )
    
    self.decoder = nn.Linear(d_model, d_model//2)
    self.fc = nn.Linear(d_model//2, 3)
    self.relu = nn.ReLU()

  def forward(self, x):
      # encoder
    out = self.encoder(x)
      # encoder output == decoder input
      # encoder output shape = encoder input shape

      # decoder
    out = self.decoder(out)
    out = self.relu(out)
    out = self.fc(out)

      # forecast horizon which has a length of 3
    return out

In [44]:
class sMAPE(nn.Module):
  def __init__(self):
    super(sMAPE, self).__init__()
  
  def forward(self, src,tgt):
    # src shape =  (700,1,3)
    # tgt shape = (700,1,3)
    tot = 0
    s = src.view(-1,3)
    t = tgt.view(-1,3)

    up = torch.abs(s - t) # shape ( 700 , 3)
    down =torch.abs(s) + torch.abs(t)

    tot = torch.sum( up / down)
    return 200 * tot / 3

In [None]:
if torch.cuda.is_available():
  device = torch.device("cuda")
else:
  device = torch.device("cpu")

In [None]:
x_tensor = x_tensor.view(-1,1,window_size).to(device)
y_tensor = y_tensor.view(-1,1,horizon_size).to(device)
print(f'src shape : {x_tensor.shape}')
print(f'label shape : {y_tensor.shape}')

src shape : torch.Size([934, 1, 50])
label shape : torch.Size([934, 1, 3])


In [51]:
# model init
model = Transformer(window_size,2,2).to(device)
optimizer = optim.Adam(model.parameters(), lr = lr)
criterion = sMAPE()

In [None]:
# train_test_split
train_x = x_tensor[:700]
train_y = y_tensor[:700]
test_x = x_tensor[700:]
test_y = y_tensor[700:]

In [52]:
for epoch in range(num_epochs+1):
  optimizer.zero_grad()
  pred = model(train_x)
  loss = criterion(pred, train_y)
  loss.backward()

  optimizer.step()
  if epoch % 100 == 0:
    print(f'Epoch : {epoch} , Loss : {loss.item()}')

Epoch : 0 , Loss : 134891.609375
Epoch : 100 , Loss : 51858.82421875
Epoch : 200 , Loss : 6823.18310546875
Epoch : 300 , Loss : 5581.8740234375
Epoch : 400 , Loss : 4735.8330078125
Epoch : 500 , Loss : 4566.5869140625
Epoch : 600 , Loss : 3713.9169921875
Epoch : 700 , Loss : 4058.033203125
Epoch : 800 , Loss : 3298.237060546875
Epoch : 900 , Loss : 3268.35205078125
Epoch : 1000 , Loss : 3040.9814453125
Epoch : 1100 , Loss : 2817.36962890625
Epoch : 1200 , Loss : 2706.423828125
Epoch : 1300 , Loss : 2808.43896484375
Epoch : 1400 , Loss : 2544.36962890625
Epoch : 1500 , Loss : 2466.831787109375
Epoch : 1600 , Loss : 2401.7685546875
Epoch : 1700 , Loss : 2190.896484375
Epoch : 1800 , Loss : 2355.681396484375
Epoch : 1900 , Loss : 2133.4501953125
Epoch : 2000 , Loss : 2102.663818359375


In [53]:
model.eval()
y_pred = model(test_x)
tot_loss = criterion(y_pred, test_y)
print(f'Loss : {tot_loss.item():.4f}')

Loss : 2950.2622
