In [1]:
import torch
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn

import pytorch_lightning as pl
import torch
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
import torchmetrics as TM
pl.utilities.seed.seed_everything(seed=42)
import numpy as np
import pandas as pd

import sys, os
source_path = os.path.join(os.getcwd(), os.pardir, 'src')
sys.path.append(source_path)
source_path = os.path.join(os.getcwd(), os.pardir, 'preprocessing')
sys.path.append(source_path)

from dl import NeuralNetwork, Trainer
from preprocess import (
    show_df, 
    date_features, 
    preprocess, 
    ToTorch, 
    get_loader, 
    ts_split,
    cont_cat_split
)

Global seed set to 42
Global seed set to 42


##  Get Data and train a Neural Network

In [2]:
ROOT_PATH = 'c:/Users/gilbe/Documents/TokyoData'


'/kaggle/input/jpx-tokyo-stock-exchange-prediction/train_files/financials.csv'
'/train_files/trades.csv'

train_df = pd.read_csv(f'{ROOT_PATH}/train_files/stock_prices.csv')
train_df['Date'] = pd.to_datetime(train_df['Date']) 
train_df.set_index('Date', inplace=True)
# train_df = date_features(train_df)

train_options = pd.read_csv(f'{ROOT_PATH}/train_files/options.csv', low_memory=False)
train_financials = pd.read_csv(f'{ROOT_PATH}/train_files/financials.csv', low_memory=False)
train_trades = pd.read_csv(f'{ROOT_PATH}/train_files/trades.csv', low_memory=False)


import matplotlib.pyplot as plt

df_1301 = train_df[train_df['SecuritiesCode'] == 1301].drop(['SecuritiesCode', 'Volume'], axis=1)

df_1301 = date_features(df_1301)

cont, cat = cont_cat_split(df_1301, 'int64')
df_train_cat, df_val_cat = ts_split(cat)
df_train, df_val = ts_split(cont)


xtrain, ytrain = preprocess(df_train, 'Target', 1, continous_cols=['Close'])
xval, yval = preprocess(df_val, 'Target', 1, continous_cols=['Close'])


print('xtrain.shape:', xtrain.shape)
print(xtrain[:5])
print()
print('ytrain.shape:', ytrain.shape)
print(ytrain[:5]
print('df_train_cat.shape:', df_train_cat.shape)
print(df_train_cat.head())

""" xtrain and df_train_cat have different shapes!!!!!"""

xtrain.shape: (900, 1)
[[-0.00145879]
 [ 0.00073046]
 [ 0.00291971]
 [-0.0010917 ]
 [-0.00510018]]

ytrain.shape: (901, 1)
df_train_cat.shape: (901, 3)
            day_of_year  month  day_of_week
Date                                       
2017-01-04            4      1            4
2017-01-05            5      1            5
2017-01-06            6      1            6
2017-01-10           10      1           10
2017-01-11           11      1           11


' xtrain and df_train_cat have different shapes!!!!!'

### Train the model

In [3]:
import torch
from sklearn.impute import SimpleImputer


imp = SimpleImputer(missing_values=np.nan, strategy='mean')
batch_size = 512
train_dataloader = get_loader(x=xtrain, y=ytrain, batch_size=batch_size, x_cat=df_train_cat.to_numpy())
val_dataloader = get_loader(x=xval, y=yval, batch_size=batch_size, x_cat=df_val_cat.to_numpy())


cat_features = 3 
embedding_dim = 10
# cat_features = cat_features * embedding_dim
# print('in_features:', xtrain.shape[1] + cat_features)

model = NeuralNetwork(
    in_features=xtrain.shape[1], 
    units=2000,
    out_features=1, 
    categorical_dim=cat_features,
    no_embedding=len(df_train_cat), 
    emb_dim=embedding_dim
)

print(model)

trainer = Trainer(model, lr=3e-7)
trainer.fit_epochs(train_dataloader, val_dataloader, use_cyclic_lr=True, x_cat=True, epochs=50)


NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (embedding): Embedding(901, 10)
  (emb_input): Linear(in_features=10, out_features=2000, bias=True)
  (emb_output): Linear(in_features=2000, out_features=1, bias=True)
  (cont_input): Linear(in_features=1, out_features=2000, bias=True)
  (hidden_layer): Linear(in_features=2003, out_features=2003, bias=True)
  (output_layer): Linear(in_features=2003, out_features=1, bias=True)
)
Using cpu-device
Epoch: <<< 0 >>>
Train-Loss: 0.001410624710842967 [0/2]

  Variable._execution_engine.run_backward(



Train-Loss: 0.0004964582622051239 [1/2]
Val-Loss: 9.502199172973633 [1/1]
Epoch: <<< 1 >>>
Train-Loss: 9.50171184539795 [0/2]
Train-Loss: 9.515542984008789 [1/2]
Val-Loss: 4.173070907592773 [1/1]
Epoch: <<< 2 >>>
Train-Loss: 4.171909332275391 [0/2]
Train-Loss: 4.126032829284668 [1/2]
Val-Loss: 0.10173272341489792 [1/1]
Epoch: <<< 3 >>>
Train-Loss: 0.10225516557693481 [0/2]
Train-Loss: 0.10854484140872955 [1/2]
Val-Loss: 1.2115488052368164 [1/1]
Epoch: <<< 4 >>>
Train-Loss: 1.2131855487823486 [0/2]
Train-Loss: 1.2163621187210083 [1/2]
Val-Loss: 0.6355776786804199 [1/1]
Epoch: <<< 5 >>>
Train-Loss: 0.6365172266960144 [0/2]
Train-Loss: 0.6316795945167542 [1/2]
Val-Loss: 0.006435192655771971 [1/1]
Epoch: <<< 6 >>>
Train-Loss: 0.0064803361892700195 [0/2]
Train-Loss: 0.007263641804456711 [1/2]
Val-Loss: 0.4949086010456085 [1/1]
Epoch: <<< 7 >>>
Train-Loss: 0.4950636327266693 [0/2]
Train-Loss: 0.49819087982177734 [1/2]
Val-Loss: 0.4497569799423218 [1/1]
Epoch: <<< 8 >>>
Train-Loss: 0.4492644