In [0]:
!pip install torch torchvision
!pip install -U finance-datareader


Collecting finance-datareader
  Downloading https://files.pythonhosted.org/packages/2a/46/4e398780dfb1af6dba7b4ee8b440829b65e94b2726f1d2cc13015d41d172/finance_datareader-0.9.6-py3-none-any.whl
Collecting requests-file
  Downloading https://files.pythonhosted.org/packages/77/86/cdb5e8eaed90796aa83a6d9f75cfbd37af553c47a291cd47bc410ef9bdb2/requests_file-1.5.1-py2.py3-none-any.whl
Installing collected packages: requests-file, finance-datareader
Successfully installed finance-datareader-0.9.6 requests-file-1.5.1


In [0]:
import FinanceDataReader as fdr
import datetime
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import numpy as np
import argparse
import time
from copy import deepcopy # Add Deepcopy for args
from sklearn.metrics import mean_absolute_error
import FinanceDataReader as fdr

In [0]:
class StockDataset(Dataset):
    
    def __init__(self, symbol, x_frames, y_frames, start, end):
        
        self.symbol = symbol
        self.x_frames = x_frames
        self.y_frames = y_frames
        
        self.start = datetime.datetime(*start)
        self.end = datetime.datetime(*end)

        self.data = fdr.DataReader(self.symbol, self.start, self.end)
        print(self.data.isna().sum())
        
    def __len__(self):
        return len(self.data) - (self.x_frames + self.y_frames) + 1
    
    def __getitem__(self, idx):
        idx += self.x_frames
        data = self.data.iloc[idx-self.x_frames:idx+self.y_frames]
        data = data[['High', 'Low', 'Open', 'Close', 'Change', 'Volume']]
        data = data.apply(lambda x: (x+1) / (x[self.x_frames-1]+1))
        data = data.values
        X = data[:self.x_frames]
        y = data[self.x_frames:]
        
        return X, y

In [0]:
class TestDataset(Dataset):
    
    def __init__(self, symbol, x_frames, y_frames):
        
        self.symbol = symbol
        self.x_frames = x_frames
        self.y_frames = y_frames
        
        self.start = datetime.datetime.now() - datetime.timedelta(days=10)
        self.end = datetime.date.today()
        print(self.start)
        print(self.end)
        self.data = fdr.DataReader(self.symbol, self.start, self.end)
        self.data = self.data.tail(6)
        print(self.data)
        print(self.data.isna().sum())
        
    def __len__(self):
        return len(self.data) - (self.x_frames + self.y_frames) + 1
    
    def __getitem__(self, idx):
        idx += self.x_frames
        data = self.data.iloc[idx-self.x_frames:idx+self.y_frames]
        data = data[['High', 'Low', 'Open', 'Close', 'Change', 'Volume']]
        #data = data.apply(lambda x: (x+1) / (x[self.x_frames-1]+1))
        data = data.values
        X = data[:self.x_frames]
        y = data[self.x_frames:]
        
        return X, y

In [0]:
class LSTM(nn.Module):
    
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers, batch_size):
        super(LSTM, self).__init__()
        self.input_dim = input_dim 
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.num_layers = num_layers

        self.batch_size = batch_size
        
        self.lstm = nn.LSTM(self.input_dim, self.hidden_dim, self.num_layers)
        self.hidden = self.init_hidden()
        self.regressor = self.make_regressor()
        
    def init_hidden(self):
        return (torch.zeros(self.num_layers, self.batch_size, self.hidden_dim),
                torch.zeros(self.num_layers, self.batch_size, self.hidden_dim))
    
    def make_regressor(self):
        layers = []
        
        layers.append(nn.Linear(self.hidden_dim, self.hidden_dim // 2))
        layers.append(nn.ReLU())
        layers.append(nn.Linear(self.hidden_dim // 2, self.output_dim))
        regressor = nn.Sequential(*layers)
        return regressor
    
    def forward(self, x):
        lstm_out, self.hidden = self.lstm(x, self.hidden)
        y_pred = self.regressor(lstm_out[-1].view(self.batch_size, -1))
        return y_pred

In [0]:
def metric(y_pred, y_true):
    perc_y_pred = np.exp(y_pred.cpu().detach().numpy())
    perc_y_true = np.exp(y_true.cpu().detach().numpy())
    mae = mean_absolute_error(perc_y_true, perc_y_pred, multioutput='raw_values')
    return mae*100

In [0]:

#trainloader = torch.cat(trainloader).view(len(trainloader), batch_size, -1)
def train(model, trainStock, optimizer, loss_fn,batch_size,device):
    trainloader = DataLoader(trainStock, batch_size, shuffle=True, drop_last=True)

    model.train()
    model.zero_grad()
    optimizer.zero_grad()

    train_acc = 0.0
    train_loss = 0.0
    for i, (X, y) in enumerate(trainloader):

        X = X.transpose(0, 1).float().to(device)
        y_true = y[:, :, 3].float().to(device)

        model.zero_grad()
        optimizer.zero_grad()
        model.hidden = [hidden.to(device) for hidden in model.init_hidden()]

        y_pred = model(X)
        loss = loss_fn(y_pred.view(-1), y_true.view(-1))
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        train_acc += metric(y_pred, y_true)[0]

    train_loss = train_loss / len(trainloader)
    train_acc = train_acc / len(trainloader)
    return model, train_loss, train_acc

In [0]:
def validate(model, valStock, loss_fn,batch_size,device):
    valloader = DataLoader(valStock, batch_size, shuffle=False, drop_last=True)
    model.eval()

    val_acc = 0.0
    val_loss = 0.0
    with torch.no_grad():
        for i, (X, y) in enumerate(valloader):

            X = X.transpose(0, 1).float().to(device)
            y_true = y[:, :, 3].float().to(device)
            model.hidden = [hidden.to(device) for hidden in model.init_hidden()]

            y_pred = model(X)
            loss = loss_fn(y_pred.view(-1), y_true.view(-1))

            val_loss += loss.item()
            val_acc += metric(y_pred, y_true)[0]

    val_loss = val_loss / len(valloader)
    val_acc = val_acc / len(valloader)
    return val_loss, val_acc


In [0]:
def test(model, testStock, loss_fn,batch_size,device):
    testloader = DataLoader(testStock, batch_size, shuffle=False, drop_last=True)
    model.eval()

    test_acc = 0.0
    with torch.no_grad():
        for i, (X, y) in enumerate(testloader):

            X = X.transpose(0, 1).float().to(device)
            y_true = y[:, :, 3].float().to(device)
            model.hidden = [hidden.to(device) for hidden in model.init_hidden()]

            y_pred = model(X)
            test_acc += metric(y_pred, y_true)[0]

    test_acc = test_acc / len(testloader)
    return test_acc

In [0]:
stock = StockDataset('001040',3,3,(2010,5,1),(2016,5,20))
valstock = StockDataset('001040',3,3,(2017,5,1),(2018,5,20))
teststock = StockDataset('001040',3,3,(2019,5,1),(2020,5,20))

Open      0
High      0
Low       0
Close     0
Volume    0
Change    0
dtype: int64
Open      0
High      0
Low       0
Close     0
Volume    0
Change    0
dtype: int64
Open      0
High      0
Low       0
Close     0
Volume    0
Change    0
dtype: int64


In [0]:
batch_size = 1
input_dim = 6
hidden_dim = 50
output_dim = 3
num_layers = 1
epoch = 5

model = LSTM(input_dim,hidden_dim,output_dim,num_layers,batch_size)
loss_fn = torch.nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model.to(device)

'''
for i in range(5):
    model, tl = train(model,stock,optimizer,loss_fn,batch_size,device)
    vl = validate(model,valstock,loss_fn,batch_size,device)
    print(tl,vl)
ta = test(model,teststock,loss_fn,batch_size,device)
print(tl,vl,ta)
'''

'\nfor i in range(5):\n    model, tl = train(model,stock,optimizer,loss_fn,batch_size,device)\n    vl = validate(model,valstock,loss_fn,batch_size,device)\n    print(tl,vl)\nta = test(model,teststock,loss_fn,batch_size,device)\nprint(tl,vl,ta)\n'

In [0]:
#학습
for e in range(epoch):  # loop over the dataset multiple times
        ts = time.time()
        model, train_loss, train_acc = train(model, stock ,optimizer, loss_fn, batch_size,device)
        val_loss, val_acc = validate(model, valstock, loss_fn, batch_size,device)
        te = time.time()
        
        print('Epoch {}, Acc(train/val): {:2.2f}/{:2.2f}, Loss(train/val) {:2.5f}/{:2.5f}. Took {:2.2f} sec'.format(e, train_acc, val_acc, train_loss, val_loss, te-ts))
        
test_acc = test(model, teststock, loss_fn, batch_size, device ) 

Epoch 0, Acc(train/val): 5.66/5.33, Loss(train/val) 0.00133/0.00070. Took 18.41 sec
Epoch 1, Acc(train/val): 5.44/4.04, Loss(train/val) 0.00124/0.00060. Took 18.29 sec
Epoch 2, Acc(train/val): 5.24/3.48, Loss(train/val) 0.00120/0.00056. Took 18.32 sec
Epoch 3, Acc(train/val): 5.34/3.47, Loss(train/val) 0.00120/0.00055. Took 18.34 sec
Epoch 4, Acc(train/val): 5.39/3.47, Loss(train/val) 0.00121/0.00060. Took 18.45 sec


In [0]:
print(test_acc)

4.471289086529589


In [0]:
testset = TestDataset('005930',3,3)

2020-05-11 02:06:53.767845
2020-05-21
             Open   High    Low  Close    Volume    Change
Date                                                      
2020-05-14  47750  48100  47650  48000  19305974 -0.011329
2020-05-15  48400  48450  47700  47850  18463118 -0.003125
2020-05-18  47950  49100  47600  48800  20481981  0.019854
2020-05-19  50100  50500  49700  50300  25168295  0.030738
2020-05-20  50000  50200  49800  50000  14896899 -0.005964
2020-05-21  50300  50400  49900  50000   6860796  0.000000
Open      0
High      0
Low       0
Close     0
Volume    0
Change    0
dtype: int64


In [0]:
model.eval()
plr = DataLoader(testset,batch_size,shuffle=True)


In [0]:

for (X,y) in plr:
    print(X.shape)
    X = X.transpose(0, 1).float().to(device)
    y_true = y[:, :, 3].float().to(device)
    model.hidden = [hidden.to(device) for hidden in model.init_hidden()]
    print(X.shape)
    y_pred = model(X)
    print(y_pred,y_true)
    
print(y_pred[0][2]*y_true[0][2])

torch.Size([1, 3, 6])
torch.Size([3, 1, 6])
tensor([[0.9998, 1.0084, 1.0094]], device='cuda:0', grad_fn=<AddmmBackward>) tensor([[50300., 50000., 50000.]], device='cuda:0')
tensor(50468.1211, device='cuda:0', grad_fn=<MulBackward0>)


In [0]:
'''from google.colab import drive
drive.mount('/content/gdrive')
modelPath = '/content/gdrive/My Drive/LSTM_batch_1_model.pt'
torch.save(model.state_dict(), modelPath)'''