# Data Loading

In [1]:
PKL_PATH = r'..\data\normalized\dukascopy-usdjpy-m5-2020-01-01-2024-12-31_normalized.pkl'
SEQUENCE_LENGTH=24
HORIZON=1
STRIDE=1

TIME_COLS = [
    'timestamp',
]

FEATURES_COLS = [
    # Basic Data
    'close_log_return',
    'log_volume',
    'spread',

    # Other
    'ret_mean_5',
    'ret_mean_10',


    # TA
    'rsi_14',
    'ema_21',
    'sma_50',
    'atr_14',

    'bb_upper',
    'bb_lower',
    'bb_mavg',
    'bb_width',

    'donchian_upper',
    'donchian_lower',
    'donchian_mid',

    'stoch_k',
    'stoch_d',

    'macd',
    'macd_signal',
    'macd_diff',
]

TARGET_COLS = ['train_label']

In [2]:
import pandas as pd
import numpy as np

In [3]:
df = pd.read_pickle(PKL_PATH)
df['datetime'] = pd.to_datetime(df['timestamp'])
df = df[df['datetime'].dt.year >= 2022]
df = df[TIME_COLS + FEATURES_COLS + TARGET_COLS]

print(f"Data shape after filtering: {df.shape}")
print(f"Date range: {df['timestamp'].min()} to {df['timestamp'].max()}")

# Check for NaN values
nan_counts = df.isna().sum()
print(f"NaN counts in features and targets:\n{nan_counts}")

Data shape after filtering: (224940, 23)
Date range: 2022-01-02 22:00:00+00:00 to 2024-12-30 22:55:00+00:00
NaN counts in features and targets:
timestamp           0
close_log_return    0
log_volume          0
spread              0
ret_mean_5          0
ret_mean_10         0
rsi_14              0
ema_21              0
sma_50              0
atr_14              0
bb_upper            0
bb_lower            0
bb_mavg             0
bb_width            0
donchian_upper      0
donchian_lower      0
donchian_mid        0
stoch_k             0
stoch_d             0
macd                0
macd_signal         0
macd_diff           0
train_label         0
dtype: int64


In [4]:
df.head()

Unnamed: 0,timestamp,close_log_return,log_volume,spread,ret_mean_5,ret_mean_10,rsi_14,ema_21,sma_50,atr_14,...,bb_width,donchian_upper,donchian_lower,donchian_mid,stoch_k,stoch_d,macd,macd_signal,macd_diff,train_label
150277,2022-01-02 22:00:00+00:00,-0.216908,-2.726675,10.065136,0.236488,0.301347,0.585331,-0.701586,-0.701989,-0.929855,...,-0.64161,-0.704677,-0.698356,-0.701536,0.603346,0.994202,0.101068,0.02383,0.264045,1
150278,2022-01-02 22:05:00+00:00,-0.808892,-2.402975,7.882981,-0.198408,0.078574,0.460073,-0.701652,-0.701945,-0.892176,...,-0.642099,-0.704677,-0.698356,-0.701536,-0.976002,0.235216,0.064605,0.032766,0.113237,2
150279,2022-01-02 22:10:00+00:00,0.085336,-2.299184,7.72446,-0.192755,0.030824,0.473348,-0.701695,-0.701937,-0.897142,...,-0.645187,-0.704677,-0.698356,-0.701536,-0.81257,-0.425334,0.040386,0.034778,0.0255,2
150280,2022-01-02 22:15:00+00:00,0.400206,-1.505381,4.246432,-0.164493,0.158132,0.530212,-0.701655,-0.701939,-0.858876,...,-0.650972,-0.704677,-0.698356,-0.701536,-0.065454,-0.665343,0.044895,0.037344,0.032519,1
150281,2022-01-02 22:20:00+00:00,-0.04062,-2.866749,4.412698,-0.260494,0.046732,0.524495,-0.701626,-0.701945,-0.865246,...,-0.65816,-0.704677,-0.698356,-0.701536,-0.135496,-0.363715,0.04546,0.039517,0.027532,2


## Time2Vec

In [5]:
import torch
import torch.nn as nn

class Time2Vec(nn.Module):
    def __init__(self, input_dim: int, kernel_size: int = 1):
        super(Time2Vec, self).__init__()
        self.input_dim = input_dim
        self.k = kernel_size

        # Linear term per feature
        self.wb = nn.Parameter(torch.rand(1, 1, input_dim))
        self.bb = nn.Parameter(torch.rand(1, 1, input_dim))

        # Periodic terms per feature
        self.wa = nn.Parameter(torch.rand(1, input_dim, kernel_size))
        self.ba = nn.Parameter(torch.rand(1, input_dim, kernel_size))



    def forward(self, x):
        # x: (B, T, input_dim)
        trend = self.wb * x + self.bb  # (B, T, input_dim)

        # For periodic, we want: (B, T, input_dim, k)
        x_exp = x.unsqueeze(-1)  # (B, T, input_dim, 1)
        wa = self.wa  # (1, input_dim, k)
        ba = self.ba  # (1, input_dim, k)

        periodic = torch.sin(x_exp * wa + ba)  # (B, T, input_dim, k)
        periodic = periodic.view(x.shape[0], x.shape[1], -1)  # (B, T, input_dim * k)
        out = torch.cat([trend, periodic], dim=-1)  # (B, T, input_dim + input_dim * k)
        return out

In [6]:
t2v = Time2Vec(len(TIME_COLS), 1)

In [7]:
X = torch.rand(64,30,5)

In [8]:
t2v(X).shape

torch.Size([64, 30, 10])

### Multihead Attention

# Model Building

In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math

In [23]:
import torch
import torch.nn as nn
from torch.nn import TransformerEncoder, TransformerEncoderLayer

class Time2VecTransformer(nn.Module):
    def __init__(self, time_dim, feature_dim, n_classes, d_model, nhead, dim_feedforward, num_layers, kernel_size=1):
        super().__init__()
        self.time_dim = time_dim
        self.feature_dim = feature_dim
        self.n_classes = n_classes
        self.d_model = d_model
        
        # Time2Vec embedding
        self.time2vec = Time2Vec(time_dim, kernel_size)
        
        # Project Time2Vec output to d_model
        time2vec_output_dim = time_dim + time_dim * kernel_size
        self.input_proj = nn.Linear(time2vec_output_dim + feature_dim, d_model)
        
        # Transformer layers
        encoder_layer = TransformerEncoderLayer(
            d_model=d_model, 
            nhead=nhead, 
            dim_feedforward=dim_feedforward, 
            batch_first=True,
            dropout=0.1,
        )
        self.encoder = TransformerEncoder(encoder_layer, num_layers)
        self.fc_out = nn.Linear(d_model, n_classes)
        
    def forward(self, X):
        X_time = X[:, :, :self.time_dim] # (B, T, time_dim)
        X_other = X[:, :, self.time_dim:] # (B, T, feature_dim)
        X_time = self.time2vec(X_time)  # (B, T, input_dim + input_dim * k)
        X = torch.cat([X_time, X_other], dim=-1) # (B, T, feature_dim + time_dim * (1+k)
        X = self.input_proj(X) # (B, T, d_model)
        
        X = self.encoder(X)
        logits = self.fc_out(X)
        return logits

In [24]:
model = Time2VecTransformer(time_dim=1, feature_dim=len(FEATURES_COLS), n_classes=3, d_model=64, nhead=4, dim_feedforward=128, num_layers=2, kernel_size=1)

In [25]:
X = torch.rand(64, 30, len(TIME_COLS)+len(FEATURES_COLS))

## Experimenting

In [14]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [15]:
from dataset.dataset import ForexDataset
from torch.utils.data import DataLoader

In [16]:
df = pd.read_pickle(PKL_PATH)
df.head()

Unnamed: 0,timestamp,open,high,low,close,volume,spread,log_volume,close_delta,close_return,...,dow_sin,dow_cos,dom,dom_sin,dom_cos,month,month_sin,month_cos,label,train_label
0,2020-01-02 02:55:00+00:00,108.675,108.679,108.675,108.6785,103450.000763,-0.376685,-1.382631,0.004,3.7e-05,...,0.433884,-0.900969,2,0.394356,0.918958,1,0.5,0.866025,0,1
1,2020-01-02 03:00:00+00:00,108.68,108.6895,108.68,108.683,218760.001183,-0.366104,-0.946493,0.0045,4.1e-05,...,0.433884,-0.900969,2,0.394356,0.918958,1,0.5,0.866025,0,1
2,2020-01-02 03:05:00+00:00,108.682,108.682,108.6765,108.6785,162950.000763,-0.398451,-1.118023,-0.0045,-4.1e-05,...,0.433884,-0.900969,2,0.394356,0.918958,1,0.5,0.866025,0,1
3,2020-01-02 03:10:00+00:00,108.68,108.682,108.6725,108.6765,108110.00061,-0.389706,-1.356971,-0.002,-1.8e-05,...,0.433884,-0.900969,2,0.394356,0.918958,1,0.5,0.866025,0,1
4,2020-01-02 03:15:00+00:00,108.676,108.6915,108.676,108.6895,222470.000625,-0.385972,-0.936699,0.013,0.00012,...,0.433884,-0.900969,2,0.394356,0.918958,1,0.5,0.866025,0,1


In [17]:

# Convert to seconds
df['timestamp'] = df['timestamp'].astype('int64') / 1e9

In [18]:
fx_dataset = ForexDataset(data=df, features=TIME_COLS+FEATURES_COLS, target=TARGET_COLS, sequence_length=30, stride=1)

done initializing dataset


In [19]:
fx_loader = DataLoader(fx_dataset, batch_size=64, shuffle=True)

In [20]:
X, y, i = next(iter(fx_loader))

In [21]:
model = Time2VecTransformer(
    time_dim=1, 
    feature_dim=len(FEATURES_COLS), 
    n_classes=3, 
    d_model=64, 
    nhead=4, 
    dim_feedforward=128, 
    num_layers=2, 
    kernel_size=1
)

In [28]:
model(X)

tensor([[[-0.4685,  0.5035,  0.2673],
         [-0.3613,  0.7350,  0.2658],
         [-1.0205,  1.1776, -0.1523],
         ...,
         [-0.0595,  0.9498,  0.5834],
         [-0.2833,  0.5300,  0.1420],
         [-0.5355,  1.2652,  0.2004]],

        [[-0.4603,  0.7197,  0.1848],
         [-0.5856,  0.7584, -0.0427],
         [-0.8650,  1.2960,  0.0788],
         ...,
         [-0.3942,  0.9250, -0.0863],
         [-0.3308,  0.3503,  0.1332],
         [-0.6479,  1.0631, -0.2753]],

        [[-0.4614,  0.9829,  0.2222],
         [-0.6588,  0.1768,  0.1839],
         [-0.6562,  0.7980,  0.0377],
         ...,
         [-0.7243,  0.9977, -0.1412],
         [-0.1574,  0.4756,  0.2405],
         [-0.9331,  0.8093, -0.2368]],

        ...,

        [[-0.3041,  0.7973, -0.0404],
         [-0.3749,  0.9591,  0.2850],
         [-0.6224,  0.8823, -0.0219],
         ...,
         [-0.2866,  0.8105,  0.2487],
         [-0.2599,  0.6175,  0.2487],
         [-0.0791,  0.6475,  0.4271]],

        [[

In [30]:
df.index

RangeIndex(start=0, stop=375217, step=1)

# Setup

In [None]:
import lightning as L

In [None]:
class Time2VecTransformerModule(L.LightningModule):
    def __init__(self, n_time=1, n_features=1, n_classes=3, n_kernel=1, d_model=64, nhead=4, num_layers=2, dim_feedforward=128, dropout=0.1):
        super().__init__()
        self.save_hyperparameters()

        self.model = Time2VecTransformer(
            time_dim=self.hparams.n_time, 
            feature_dim=self.hparams.n_features,
            n_classes=self.hparams.n_classes, 
            d_model=self.hparams.d_model, 
            nhead=self.hparams.n_head, 
            num_layers=self.hparams.num_layers, 
            kernel_size=self.hparams.n_kernel,
        )

        self.criterion = nn.CrossEntropyLoss(label_smoothing=0.1)

    def forward(self, x, labels=None):
        output = self.model(x)
        loss = 0
        if labels is not None:
            labels = labels.view(-1).long()
            loss = self.criterion(output, labels)
        return loss, output

    def training_step(self, batch, batch_idx):
        x, y, _ = batch
        loss, out = self(x, y)
        self.log('train_loss', loss, prog_bar=True, logger=True)
        return {'loss': loss}

    def validation_step(self, batch, batch_idx):
        x, y, _ = batch
        loss, out = self(x, y)
        self.log('val_loss', loss, prog_bar=True, logger=True)
        return {'loss': loss}

    def test_step(self, batch, batch_idx):
        x, y, _ = batch
        loss, out = self(x, y)
        self.log('test_loss', loss, prog_bar=True, logger=True)
        return {'loss': loss}

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3, weight_decay=1e-5)
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
        return [optimizer], [scheduler]