In [1]:
!pip install torch torchvision
!pip install -U finance-datareader


Requirement already up-to-date: finance-datareader in /usr/local/lib/python3.6/dist-packages (0.9.6)


In [0]:
import FinanceDataReader as fdr
import datetime
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import numpy as np
import argparse
import time
from copy import deepcopy # Add Deepcopy for args
from sklearn.metrics import mean_absolute_error
import FinanceDataReader as fdr

In [0]:
class StockDataset(Dataset):
    
    def __init__(self, symbol, x_frames, y_frames, start, end):
        
        self.symbol = symbol
        self.x_frames = x_frames
        self.y_frames = y_frames
        
        self.start = datetime.datetime(*start)
        self.end = datetime.datetime(*end)

        self.data = fdr.DataReader(self.symbol, self.start, self.end)
        print(self.data.isna().sum())
        
    def __len__(self):
        return len(self.data) - (self.x_frames + self.y_frames) + 1
    
    def __getitem__(self, idx):
        idx += self.x_frames
        data = self.data.iloc[idx-self.x_frames:idx+self.y_frames]
        data = data[['High', 'Low', 'Open', 'Close', 'Change', 'Volume']]
        data = data.apply(lambda x: (x+1) / (x[self.x_frames-1]+1))
        data = data.values
        X = data[:self.x_frames]
        y = data[self.x_frames:]
        
        return X, y

In [0]:
class TestDataset(Dataset):
    
    def __init__(self, symbol, x_frames, y_frames):
        
        self.symbol = symbol
        self.x_frames = x_frames
        self.y_frames = y_frames
        
        self.start = datetime.datetime.now() - datetime.timedelta(days=10)
        self.end = datetime.date.today()
        print(self.start)
        print(self.end)
        self.data = fdr.DataReader(self.symbol, self.start, self.end)
        self.data = self.data.tail(6)
        print(self.data)
        print(self.data.isna().sum())
        
    def __len__(self):
        return len(self.data) - (self.x_frames + self.y_frames) + 1
    
    def __getitem__(self, idx):
        idx += self.x_frames
        data = self.data.iloc[idx-self.x_frames:idx+self.y_frames]
        data = data[['High', 'Low', 'Open', 'Close', 'Change', 'Volume']]
        #data = data.apply(lambda x: (x+1) / (x[self.x_frames-1]+1))
        data = data.values
        X = data[:self.x_frames]
        y = data[self.x_frames:]
        
        return X, y

In [0]:
class LSTM(nn.Module):
    
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers, batch_size):
        super(LSTM, self).__init__()
        self.input_dim = input_dim 
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.num_layers = num_layers

        self.batch_size = batch_size
        
        self.lstm = nn.LSTM(self.input_dim, self.hidden_dim, self.num_layers)
        self.hidden = self.init_hidden()
        self.regressor = self.make_regressor()
        
    def init_hidden(self):
        return (torch.zeros(self.num_layers, self.batch_size, self.hidden_dim),
                torch.zeros(self.num_layers, self.batch_size, self.hidden_dim))
    
    def make_regressor(self):
        layers = []
        
        layers.append(nn.Linear(self.hidden_dim, self.hidden_dim // 2))
        layers.append(nn.ReLU())
        layers.append(nn.Linear(self.hidden_dim // 2, self.output_dim))
        regressor = nn.Sequential(*layers)
        return regressor
    
    def forward(self, x):
        lstm_out, self.hidden = self.lstm(x, self.hidden)
        y_pred = self.regressor(lstm_out[-1].view(self.batch_size, -1))
        return y_pred

In [0]:

#trainloader = torch.cat(trainloader).view(len(trainloader), batch_size, -1)
def train(model, trainStock, optimizer, loss_fn,batch_size,device):
    trainloader = DataLoader(trainStock, batch_size, shuffle=True, drop_last=True)

    model.train()
    model.zero_grad()
    optimizer.zero_grad()

    train_loss = 0.0
    for i, (X, y) in enumerate(trainloader):

        X = X.transpose(0, 1).float().to(device)
        y_true = y[:, :, 3].float().to(device)
        #print(torch.max(X[:, :, 3]), torch.max(y_true))

        model.zero_grad()
        optimizer.zero_grad()
        model.hidden = [hidden.to(device) for hidden in model.init_hidden()]

        y_pred = model(X)
        
        loss = loss_fn(y_pred.view(-1), y_true.view(-1))
        train_loss += loss.item()
        loss.backward()
        optimizer.step()

    train_loss = train_loss / len(trainloader)
    return model, train_loss

In [0]:
def validate(model, valStock, loss_fn,batch_size,device):
    valloader = DataLoader(valStock, batch_size, shuffle=False, drop_last=True)
    model.eval()

    val_acc = 0.0
    val_loss = 0.0
    with torch.no_grad():
        for i, (X, y) in enumerate(valloader):

            X = X.transpose(0, 1).float().to(device)
            y_true = y[:, :, 3].float().to(device)
            model.hidden = [hidden.to(device) for hidden in model.init_hidden()]

            y_pred = model(X)
            loss = loss_fn(y_pred.view(-1), y_true.view(-1))

            val_loss += loss.item()

    val_loss = val_loss / len(valloader)
    return val_loss


In [0]:
def test(model, testStock, loss_fn,batch_size,device):
    testloader = DataLoader(testStock, batch_size, shuffle=False, drop_last=True)
    model.eval()

    test_acc = 0.0
    with torch.no_grad():
        for i, (X, y) in enumerate(testloader):

            X = X.transpose(0, 1).float().to(device)
            y_true = y[:, :, 3].float().to(device)
            model.hidden = [hidden.to(device) for hidden in model.init_hidden()]

            y_pred = model(X)
            test_acc += loss_fn(y_pred, y_true)

    test_acc = test_acc / len(testloader)
    return test_acc

In [14]:
stock = StockDataset('005930',3,3,(2010,5,1),(2016,5,20))
valstock = StockDataset('005930',3,3,(2017,5,1),(2018,5,20))
teststock = StockDataset('005930',3,3,(2019,5,1),(2020,5,20))

Open      0
High      0
Low       0
Close     0
Volume    0
Change    0
dtype: int64
Open      0
High      0
Low       0
Close     0
Volume    0
Change    0
dtype: int64
Open      0
High      0
Low       0
Close     0
Volume    0
Change    0
dtype: int64


In [20]:
batch_size = 1
input_dim = 6
hidden_dim = 50
output_dim = 3
num_layers = 1

model = LSTM(input_dim,hidden_dim,output_dim,num_layers,batch_size)
loss_fn = torch.nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
device = 'cuda' if torch.cuda.is_available() else 'cpu'

model.to(device)
for i in range(5):
    model, tl = train(model,stock,optimizer,loss_fn,batch_size,device)
    vl = validate(model,valstock,loss_fn,batch_size,device)
    print(tl,vl)

ta = test(model,teststock,loss_fn,batch_size,device)
print(tl,vl,ta)

0.004328358084011837 0.0006262275970448173
0.0012233729799368531 0.000597141644480871
0.0010077941842779176 0.0006804529313194507
0.0009652380185455503 0.0006695814957929543
0.0008382914666227327 0.0006003089172772889
0.0008382914666227327 0.0006003089172772889 tensor(0.0007, device='cuda:0')


In [16]:
testset = TestDataset('005930',3,3)

2020-05-10 11:45:47.027765
2020-05-20
             Open   High    Low  Close    Volume    Change
Date                                                      
2020-05-13  47250  48550  47200  48550  20223277  0.013570
2020-05-14  47750  48100  47650  48000  19305974 -0.011329
2020-05-15  48400  48450  47700  47850  18463118 -0.003125
2020-05-18  47950  49100  47600  48800  20481981  0.019854
2020-05-19  50100  50500  49700  50300  25168295  0.030738
2020-05-20  50000  50200  49800  50000  12855032 -0.005964
Open      0
High      0
Low       0
Close     0
Volume    0
Change    0
dtype: int64


In [0]:
model.eval()
plr = DataLoader(testset,batch_size,shuffle=True)


In [22]:

for (X,y) in plr:
    print(X.shape)
    X = X.transpose(0, 1).float().to(device)
    y_true = y[:, :, 3].float().to(device)
    model.hidden = [hidden.to(device) for hidden in model.init_hidden()]
    print(X.shape)
    y_pred = model(X)
    print(y_pred,y_true)
    
print(y_pred[0][2]*y_true[0][2])

torch.Size([1, 3, 6])
torch.Size([3, 1, 6])
tensor([[1.0226, 1.0097, 1.0143]], device='cuda:0', grad_fn=<AddmmBackward>) tensor([[48800., 50300., 50000.]], device='cuda:0')
tensor(50712.9375, device='cuda:0', grad_fn=<MulBackward0>)
