In [None]:
!mkdir /root/.kaggle
!mv kaggle.json /root/.kaggle

!rm -r sample_data
!kaggle competitions download -c jpx-tokyo-stock-exchange-prediction
!unzip ./jpx-tokyo-stock-exchange-prediction.zip -d jpx-tokyo-stock-exchange-prediction

In [2]:
import torch
import numpy as np
import pandas as pd

from torch import nn
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F

In [186]:
class TSDataset(Dataset):
  def __init__(self, df, seq_len=128, padding_token=0):
    self.df = df
    self.indices = []
    self.seq_len = seq_len
    self.padding_token = padding_token
    
    #Creating indices
    start = 0
    for _ in range(-(len(self.df) // -self.seq_len)):
      self.indices.append((start, start+self.seq_len))
      start+=self.seq_len
    
    #fixing non-perfect intervals, --in place
    idx = 0
    while idx<len(self.indices):
      start, end = self.indices[idx]
      intervals = self.df[start:end]['SecuritiesCode'].value_counts(sort=False).values
      if len(intervals) != 1:
        self.indices = self.indices[:idx] + [(start, start+intervals[0]), (start+intervals[0], end)] + self.indices[idx+1:]
        idx+=1
      idx+=1
  
  def __len__(self):
    return len(self.indices)
  
  def __getitem__(self, idx):
    start, end = self.indices[idx]

    target = self.df[start:end]['Target'].values[-1]
    sequence = np.expand_dims(self.df[start:end]['Close'].values, 1)
    if sequence.shape[0] != self.seq_len:
      sequence = np.pad(sequence, pad_width=[(0, self.seq_len-sequence.shape[0]), (0, 0)], constant_values=self.padding_token, mode='constant')
    
    return sequence, target

In [218]:
class time2vec(nn.Module):
  def __init__(self, in_features, out_features):
    super().__init__()
    self.w_linear = nn.Parameter(data=torch.rand(in_features, 1))
    self.b_linear = nn.Parameter(data=torch.rand(1))
    self.w_function = nn.Parameter(data=torch.rand(in_features, out_features-1))
    self.b_function = nn.Parameter(data=torch.rand(out_features-1))

    #maybe a bit more straightforward
    #self.linear_params = nn.Linear(in_features, 1, bias=True)
    #self.function_params = nn.Linear(in_features, out_features-1, bias=True)

    #initialize params?
    #nn.init.kaiming_normal_(self.w_linear)
    #nn.init.kaiming_normal_(self.b_linear)
    #nn.init.kaiming_normal_(self.w_function)
    #nn.init.kaiming_normal_(self.b_function)

  def forward(self, x):
    linear_out = torch.matmul(x, self.w_linear)+self.b_linear
    func_out = torch.sin(torch.matmul(x, self.w_function)+self.b_function)
    return torch.concat((linear_out, func_out), dim=-1)

In [219]:
class TSTransformer(nn.Module):
  def __init__(self, in_features, time_features=1, mlp_dim=1024, enc_layers=2, enc_heads=2):
    super().__init__()
    self.time2vec = time2vec(in_features, time_features)
    self.encoder_layer = nn.TransformerEncoderLayer(d_model=in_features+time_features, nhead=enc_heads, 
                                                    dropout=0, activation=F.gelu, batch_first=True, norm_first=False)
    self.encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=enc_layers)

    self.mlp = nn.Linear(in_features+time_features, mlp_dim)
    self.regressor = nn.Linear(mlp_dim, 1)

  def forward(self, x):
    time_embeddings = self.time2vec(x)
    x = torch.concat((x, time_embeddings), dim=-1)
    x = self.encoder(x)

    x = F.relu(self.mlp(x))
    x = self.regressor(x)

    return x[:, -1, :] #returning only last seq element

In [86]:
seq_len = 128

padding_token = 0.0
missing_token = -1.0


dframe = pd.read_csv('jpx-tokyo-stock-exchange-prediction/train_files/stock_prices.csv', parse_dates=['Date'])

stock_list = dframe.SecuritiesCode.unique()
dframe_1 = dframe.drop(['Open', 'High', 'Low', 'Volume', 'RowId', 'AdjustmentFactor', 'ExpectedDividend', 'SupervisionFlag'], axis=1)
dframe_1 = dframe_1[~dframe_1['Close'].isnull()] #Getting rid of null values for this experiment
dframe_1 = dframe_1.sort_values(['SecuritiesCode', 'Date'], ascending=[True, True]).reset_index(drop=True)

In [36]:
####Testing dataset ideas ----------
start = 0
indices = []
for _ in range(-(len(dframe_1) // -seq_len)):
  indices.append((start, start+seq_len))
  start+= seq_len


#Creates new idx array
new_idxs = []
for pos, elemen in enumerate(indices):
  start, end = elemen
  intervals = dframe_1[start:end]['SecuritiesCode'].value_counts(sort=False).values #False keeps the OG order(according to github)
  if len(intervals) != 1:
    new_idxs.extend([(start, start+intervals[0]), (start+intervals[0], end)])
  else:
    new_idxs.append((start, end))

#Works in place
idx = 0
while idx<len(indices):
  start, end = indices[idx]
  intervals = dframe_1[start:end]['SecuritiesCode'].value_counts(sort=False).values
  if len(intervals)!=1:
    indices = indices[:idx] + [(start, start+intervals[0]), (start+intervals[0], end)] + indices[idx+1:]
    idx+=1
  idx+=1

In [220]:
dset = TSDataset(dframe_1, seq_len=128)
dloader = DataLoader(dset, batch_size=64, shuffle=False, num_workers=1)

train_batch, target_batch = next(iter(dloader))
train_batch.shape, target_batch.shape

(torch.Size([64, 128, 1]), torch.Size([64]))

In [221]:
#Overfitting a single batch
model = TSTransformer(in_features = 1)

crit = nn.MSELoss()
optim = torch.optim.Adam(model.parameters(), lr = 1e-4)

for epoch in range(10):
  out = model(train_batch.float())
  loss = crit(out.squeeze(-1), target_batch.float())
  loss.backward()
  optim.step()
  print(loss)

tensor(0.0972, grad_fn=<MseLossBackward0>)
tensor(0.0803, grad_fn=<MseLossBackward0>)
tensor(0.0653, grad_fn=<MseLossBackward0>)
tensor(0.0520, grad_fn=<MseLossBackward0>)
tensor(0.0401, grad_fn=<MseLossBackward0>)
tensor(0.0297, grad_fn=<MseLossBackward0>)
tensor(0.0209, grad_fn=<MseLossBackward0>)
tensor(0.0136, grad_fn=<MseLossBackward0>)
tensor(0.0080, grad_fn=<MseLossBackward0>)
tensor(0.0039, grad_fn=<MseLossBackward0>)


In [222]:
optim = torch.optim.Adam(model.parameters(), lr = 1e-5)

for epoch in range(10):
  out = model(train_batch.float())
  loss = crit(out.squeeze(-1), target_batch.float())
  loss.backward()
  optim.step()
  print(loss)

tensor(0.0013, grad_fn=<MseLossBackward0>)
tensor(0.0011, grad_fn=<MseLossBackward0>)
tensor(0.0009, grad_fn=<MseLossBackward0>)
tensor(0.0008, grad_fn=<MseLossBackward0>)
tensor(0.0006, grad_fn=<MseLossBackward0>)
tensor(0.0005, grad_fn=<MseLossBackward0>)
tensor(0.0004, grad_fn=<MseLossBackward0>)
tensor(0.0004, grad_fn=<MseLossBackward0>)
tensor(0.0003, grad_fn=<MseLossBackward0>)
tensor(0.0003, grad_fn=<MseLossBackward0>)


In [223]:
out.squeeze(-1)

tensor([-0.0029, -0.0029, -0.0029, -0.0029, -0.0029, -0.0029, -0.0029, -0.0029,
        -0.0029,  0.0002,  0.0002, -0.0029, -0.0029, -0.0029, -0.0029, -0.0029,
        -0.0029, -0.0029, -0.0029,  0.0002,  0.0002, -0.0029, -0.0029, -0.0029,
        -0.0029, -0.0029, -0.0029, -0.0029, -0.0029, -0.0029,  0.0002,  0.0002,
        -0.0029,  0.0002,  0.0002, -0.0029, -0.0029, -0.0029, -0.0029, -0.0029,
        -0.0029, -0.0029, -0.0029,  0.0002,  0.0002, -0.0029, -0.0029, -0.0029,
        -0.0029, -0.0029, -0.0029, -0.0029, -0.0029, -0.0029,  0.0002,  0.0002,
        -0.0029, -0.0029, -0.0029, -0.0029, -0.0029, -0.0029, -0.0029, -0.0029],
       grad_fn=<SqueezeBackward1>)

In [224]:
target_batch

tensor([ 0.0096,  0.0144,  0.0056,  0.0132, -0.0142, -0.0362,  0.0033,  0.0031,
         0.0000,  0.0316, -0.0092, -0.0269,  0.0124,  0.0028, -0.0154, -0.0454,
        -0.0209, -0.0048,  0.0186, -0.0560,  0.0108, -0.0032,  0.0079, -0.0066,
         0.0025,  0.0246, -0.0013, -0.0124,  0.0142, -0.0475,  0.0135,  0.0027,
         0.0103,  0.0327,  0.0029,  0.0006, -0.0150, -0.0042,  0.0145,  0.0020,
        -0.0097, -0.0044, -0.0067,  0.0326,  0.0140, -0.0015,  0.0193,  0.0028,
        -0.0052,  0.0040,  0.0045,  0.0188,  0.0000, -0.0087,  0.0266,  0.0010,
         0.0129,  0.0000, -0.0144,  0.0038, -0.0010,  0.0155, -0.0055,  0.0010],
       dtype=torch.float64)