In [1]:
!mkdir /root/.kaggle
!mv kaggle.json /root/.kaggle

!rm -r sample_data
!kaggle competitions download -c jpx-tokyo-stock-exchange-prediction
!unzip ./jpx-tokyo-stock-exchange-prediction.zip -d jpx-tokyo-stock-exchange-prediction

In [2]:
import torch
import numpy as np
import pandas as pd

from torch import nn
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F

In [102]:
class TSDataset(Dataset):
  def __init__(self, df, seq_len=128, padding_token=0):
    self.df = df
    self.indices = []
    self.seq_len = seq_len
    self.padding_token = padding_token
    
    #Creating indices
    start = 0
    for _ in range(-(len(self.df) // -self.seq_len)):
      self.indices.append((start, start+self.seq_len))
      start+=self.seq_len
    
    #fixing non-perfect intervals, --in place
    idx = 0
    while idx<len(self.indices):
      start, end = self.indices[idx]
      intervals = self.df[start:end]['SecuritiesCode'].value_counts(sort=False).values
      if len(intervals) != 1:
        self.indices = self.indices[:idx] + [(start, start+intervals[0]), (start+intervals[0], end)] + self.indices[idx+1:]
        idx+=1
      idx+=1
  
  def __len__(self):
    return len(self.indices)
  
  def __getitem__(self, idx):
    start, end = self.indices[idx]

    target = self.df[start:end]['Target'].values[-1]
    sequence = np.expand_dims(self.df[start:end]['Close'].values, 1)
    if sequence.shape[0] != self.seq_len:
     sequence = np.pad(sequence, pad_width=[(self.seq_len-sequence.shape[0], 0), (0, 0)], constant_values=self.padding_token, mode='constant')

    #careful here padding_mask shape shouldn't be the same as sequence's
    padding_mask = (sequence == self.padding_token)
    
    return {'sequence':sequence,
            'mask':padding_mask,
            'target':target}

In [4]:
class time2vec(nn.Module):
  def __init__(self, in_features, out_features):
    super().__init__()
    self.w_linear = nn.Parameter(data=torch.rand(in_features, 1))
    self.b_linear = nn.Parameter(data=torch.rand(1))
    self.w_function = nn.Parameter(data=torch.rand(in_features, out_features-1))
    self.b_function = nn.Parameter(data=torch.rand(out_features-1))

    #maybe a bit more straightforward
    #self.linear_params = nn.Linear(in_features, 1, bias=True)
    #self.function_params = nn.Linear(in_features, out_features-1, bias=True)

    #initialize params?
    #nn.init.kaiming_normal_(self.w_linear)
    #nn.init.kaiming_normal_(self.b_linear)
    #nn.init.kaiming_normal_(self.w_function)
    #nn.init.kaiming_normal_(self.b_function)

  def forward(self, x):
    linear_out = torch.matmul(x, self.w_linear)+self.b_linear
    func_out = torch.sin(torch.matmul(x, self.w_function)+self.b_function)
    return torch.concat((linear_out, func_out), dim=-1)

In [233]:
class TSTransformer(nn.Module):
  def __init__(self, in_features, time_features=1, mlp_dim=1024, enc_layers=2, enc_heads=2):
    super().__init__()
    self.time2vec = time2vec(in_features, time_features)
    self.encoder_layer = nn.TransformerEncoderLayer(d_model=in_features+time_features, nhead=enc_heads, 
                                                    dropout=0, activation=F.gelu, batch_first=True, 
                                                    norm_first=True)
    self.encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=enc_layers)

    self.mlp = nn.Linear(in_features+time_features, mlp_dim)
    self.regressor = nn.Linear(mlp_dim, 1)

  def forward(self, x, padding_mask):
    time_embeddings = self.time2vec(x)
    x = torch.concat((x, time_embeddings), dim=-1)
    x = self.encoder(src=x, src_key_padding_mask=padding_mask)

    x = F.relu(self.mlp(x))
    x = self.regressor(x)

    return x[:, -1, :] #returning only last seq element

In [6]:
seq_len = 128

padding_token = 0.0
missing_token = -1.0


dframe = pd.read_csv('jpx-tokyo-stock-exchange-prediction/train_files/stock_prices.csv', parse_dates=['Date'])

stock_list = dframe.SecuritiesCode.unique()
dframe_1 = dframe.drop(['Open', 'High', 'Low', 'Volume', 'RowId', 'AdjustmentFactor', 'ExpectedDividend', 'SupervisionFlag'], axis=1)
dframe_1 = dframe_1[~dframe_1['Close'].isnull()] #Getting rid of null values for this experiment
dframe_1 = dframe_1.sort_values(['SecuritiesCode', 'Date'], ascending=[True, True]).reset_index(drop=True)

In [221]:
dset = TSDataset(dframe_1, seq_len=128)
dloader = DataLoader(dset, batch_size=128, shuffle=False, num_workers=1)

train_batch = next(iter(dloader))
train_batch['sequence'].shape, train_batch['mask'].shape, train_batch['target'].shape

(torch.Size([128, 128, 1]), torch.Size([128, 128, 1]), torch.Size([128]))

In [264]:
####Overfitting a single batch

#May be a good idea to normalize the data
#consider another criterion
#consider modifying model params
#consider encoding the vector dates instead of the sequence
model = TSTransformer(in_features=1)
crit = nn.MSELoss()
optim = torch.optim.Adam(model.parameters(), lr = 1e-4)
for epoch in range(10):
  out = model(train_batch['sequence'].float(), padding_mask=train_batch['mask'].squeeze(-1).float())
  loss = crit(out.squeeze(-1), train_batch['target'].float())
  loss.backward()
  optim.step()
  print(loss)

tensor(958358.9375, grad_fn=<MseLossBackward0>)
tensor(848255.7500, grad_fn=<MseLossBackward0>)
tensor(748055.8750, grad_fn=<MseLossBackward0>)
tensor(654654.2500, grad_fn=<MseLossBackward0>)
tensor(567155.5000, grad_fn=<MseLossBackward0>)
tensor(485306.2188, grad_fn=<MseLossBackward0>)
tensor(409115.7500, grad_fn=<MseLossBackward0>)
tensor(338707.1250, grad_fn=<MseLossBackward0>)
tensor(274258.9062, grad_fn=<MseLossBackward0>)
tensor(215993.0312, grad_fn=<MseLossBackward0>)


In [312]:
optim = torch.optim.Adam(model.parameters(), lr = 1e-9)
for epoch in range(5):
  out = model(train_batch['sequence'].float(), padding_mask=train_batch['mask'].squeeze(-1).float())
  loss = crit(out.squeeze(-1), train_batch['target'].float())
  loss.backward()
  optim.step()
  print(loss)

tensor(0.0028, grad_fn=<MseLossBackward0>)
tensor(0.0028, grad_fn=<MseLossBackward0>)
tensor(0.0029, grad_fn=<MseLossBackward0>)
tensor(0.0029, grad_fn=<MseLossBackward0>)
tensor(0.0029, grad_fn=<MseLossBackward0>)


In [313]:
out.squeeze(-1)

tensor([-0.0198, -0.0041, -0.0135, -0.0249, -0.0224, -0.0251, -0.0256, -0.0186,
        -0.0211, -0.0842, -0.0843, -0.0449, -0.0438, -0.0449, -0.0442, -0.0443,
        -0.0431, -0.0426, -0.0434, -0.0778, -0.0824, -0.0200, -0.0188, -0.0090,
        -0.0070, -0.0244, -0.0314, -0.0322, -0.0287, -0.0267, -0.0958, -0.0741,
        -0.0396, -0.0938, -0.0938, -0.0402, -0.0395, -0.0425, -0.0431, -0.0414,
        -0.0426, -0.0409, -0.0419, -0.0776, -0.0833, -0.0169, -0.0143, -0.0145,
        -0.0094, -0.0107, -0.0195, -0.0106, -0.0096, -0.0154, -0.0840, -0.0803,
        -0.0348, -0.0346, -0.0349, -0.0372, -0.0354, -0.0336, -0.0327, -0.0359,
        -0.0914, -0.0951, -0.0153,  0.0022, -0.0231, -0.0304, -0.0290, -0.0286,
        -0.0198, -0.0154, -0.0275, -0.0952, -0.0459, -0.0452, -0.0463, -0.0455,
        -0.0460, -0.0444, -0.0243, -0.0188,  0.0088, -0.0409, -0.0870, -0.0244,
        -0.0240, -0.0309, -0.0364, -0.0385, -0.0354, -0.0283, -0.0315, -0.0958,
        -0.0447,  0.0374,  0.0415,  0.05

In [314]:
train_batch['target'].float()

tensor([ 0.0096,  0.0144,  0.0056,  0.0132, -0.0142, -0.0362,  0.0033,  0.0031,
         0.0000,  0.0316, -0.0092, -0.0269,  0.0124,  0.0028, -0.0154, -0.0454,
        -0.0209, -0.0048,  0.0186, -0.0560,  0.0108, -0.0032,  0.0079, -0.0066,
         0.0025,  0.0246, -0.0013, -0.0124,  0.0142, -0.0475,  0.0135,  0.0027,
         0.0103,  0.0327,  0.0029,  0.0006, -0.0150, -0.0042,  0.0145,  0.0020,
        -0.0097, -0.0044, -0.0067,  0.0326,  0.0140, -0.0015,  0.0193,  0.0028,
        -0.0052,  0.0040,  0.0045,  0.0188,  0.0000, -0.0087,  0.0266,  0.0010,
         0.0129,  0.0000, -0.0144,  0.0038, -0.0010,  0.0155, -0.0055,  0.0010,
         0.0191,  0.0055,  0.0000, -0.0086,  0.0295,  0.0326, -0.0125,  0.0024,
        -0.0078, -0.0088,  0.0048, -0.0195, -0.0025,  0.0000, -0.0101,  0.0461,
        -0.0059,  0.0182, -0.0061, -0.0031, -0.0226,  0.0193,  0.0069, -0.0211,
        -0.0200,  0.0031, -0.0082,  0.0105,  0.0020, -0.0085, -0.0144,  0.0274,
         0.0039, -0.0379, -0.0119,  0.00