In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
data = pd.read_csv('./.data/kospi_1year_data.csv',
                   encoding='euc-kr', usecols=[1])
print(data)

         현재지수
0    2,232.56
1    2,226.60
2    2,234.79
3    2,195.44
4    2,190.66
..        ...
242  2,208.88
243  2,210.34
244  2,195.50
245  2,162.84
246  2,079.04

[247 rows x 1 columns]


In [6]:
data = data.replace('[,]', '', regex=True).astype(float)

In [7]:
scaler = MinMaxScaler()
scaler.fit(data)
data_normalized = scaler.transform(data)

print(data_normalized)

[[0.90297589]
 [0.88630643]
 [0.90921296]
 [0.79915534]
 [0.78578621]
 [0.75381775]
 [0.74366504]
 [0.71622755]
 [0.63693573]
 [0.63878167]
 [0.69214633]
 [0.66761761]
 [0.687951  ]
 [0.74509146]
 [0.75454495]
 [0.74931476]
 [0.74786038]
 [0.76962018]
 [0.77540974]
 [0.65768865]
 [0.6687084 ]
 [0.65981429]
 [0.61081278]
 [0.64596968]
 [0.72319181]
 [0.74808413]
 [0.82105499]
 [0.83017285]
 [0.83878727]
 [0.84155619]
 [0.84983498]
 [0.8801253 ]
 [0.88026515]
 [0.90546512]
 [0.93183979]
 [0.94792191]
 [0.94025843]
 [0.85042233]
 [0.85707893]
 [0.85847737]
 [0.86927337]
 [0.81478995]
 [0.7853387 ]
 [0.75404151]
 [0.85786206]
 [0.82194999]
 [0.8475695 ]
 [0.8016166 ]
 [0.74755272]
 [0.72243665]
 [0.53784192]
 [0.55470717]
 [0.47351345]
 [0.48142865]
 [0.51202663]
 [0.44185266]
 [0.40859764]
 [0.40834592]
 [0.42384069]
 [0.43393746]
 [0.41919785]
 [0.37925826]
 [0.37618169]
 [0.38910332]
 [0.31775466]
 [0.36105051]
 [0.36927337]
 [0.44230016]
 [0.4398389 ]
 [0.44582424]
 [0.45483023]
 [0.53

In [38]:
def create_inout_sequences(input_data, tw):
    inout_seq = []
    label = []
    L = len(input_data)
    for i in range(L-tw):
        train_seq = input_data[i:i+tw]
        inout_seq.append(train_seq)
        _label = input_data[i+tw:i+tw+1]
        label.append(_label)
    return inout_seq, label


train_size = 20
train_set = data_normalized[:-train_size]
test_set = data_normalized[-train_size:]

train_set, label = create_inout_sequences(train_set, 20)

train_set = torch.Tensor(train_set)
label = torch.Tensor(label)
test_set = torch.Tensor(test_set)

# train_set = train_set.to(device)
# label = label.to(device)
# test_set = test_set.to(device)

print(train_set.shape)

torch.Size([207, 20, 1])


In [55]:
class Stock(nn.Module):
    def __init__(self, input_size=1, hidden_size=128, output_size=1):
        super(Stock, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size)
        self.linear = nn.Linear(hidden_size, output_size)
        self.hidden_state = [torch.zeros(1, 1, self.hidden_size),
                             torch.zeros(1, 1, self.hidden_size)]

    def forward(self, input_data):
        lstm_out, self.hidden_state = self.lstm(
            input_data.view(len(input_data), 1, -1), self.hidden_state)
        predictions = self.linear(lstm_out.view(len(input_data), -1))
        return predictions[-1]

In [56]:
model = Stock().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
print(model)

Stock(
  (lstm): LSTM(1, 128)
  (linear): Linear(in_features=128, out_features=1, bias=True)
)


In [58]:
epochs = 100

model.train()
for epoch in range(epochs):
    for i, seq in enumerate(train_set):
        _label = label[i]
        seq = seq.to(device)
        _label = _label.to(device)

        optimizer.zero_grad()
        model.hidden_state = [torch.zeros(1, 1, model.hidden_size),
                              torch.zeros(1, 1, model.hidden_size)]
        model.hidden_state[0] = model.hidden_state[0].to(device)
        model.hidden_state[1] = model.hidden_state[1].to(device)

        y_pred = model(seq)
        loss = criterion(y_pred, _label)
        loss.backward()
        optimizer.step()

        if i % 50 == 0 and (epoch+1) % 20 == 0:
            print("[{}/{}], epoch: [{}/{}], loss:{}"
                  .format(i, len(train_set), epoch+1, epochs, loss.item()))

  return F.mse_loss(input, target, reduction=self.reduction)


[0/207], epoch: [20/100], loss:0.0043453434482216835
[50/207], epoch: [20/100], loss:9.175501327263191e-05
[100/207], epoch: [20/100], loss:0.0007533509051427245
[150/207], epoch: [20/100], loss:0.002453601686283946
[200/207], epoch: [20/100], loss:0.0029449707362800837
[0/207], epoch: [40/100], loss:0.005310308653861284
[50/207], epoch: [40/100], loss:0.00010409277456346899
[100/207], epoch: [40/100], loss:0.00011969469778705388
[150/207], epoch: [40/100], loss:0.0026539929676800966
[200/207], epoch: [40/100], loss:0.0006957473233342171
[0/207], epoch: [60/100], loss:0.003947725053876638
[50/207], epoch: [60/100], loss:0.00010685963206924498
[100/207], epoch: [60/100], loss:0.004673877265304327
[150/207], epoch: [60/100], loss:0.0017128509934991598
[200/207], epoch: [60/100], loss:8.32198395528394e-07
[0/207], epoch: [80/100], loss:0.0007843570201657712
[50/207], epoch: [80/100], loss:0.0005239153397269547
[100/207], epoch: [80/100], loss:0.001238870550878346
[150/207], epoch: [80/100

In [148]:
model.eval()

test_set = test_set.to(device)
train_set = train_set.to(device)

seq = model(train_set[0])

actual_pred = scaler.inverse_transform(np.array([[seq.item()]]))
actual_pred

array([[2158.17892246]])