In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter

import os

In [2]:
from pathlib import Path

# Configurable parts
SYMBOL = "USDJPY"
SAMPLE_TYPE = "time"
MINUTES = 15

START_DATE = "20240101"
END_DATE = "20241231"

# Build base name
RESAMPLED_NAME = f"{SYMBOL}-{MINUTES}m-{START_DATE}-{END_DATE}"

# Base directories
BASE_DIR = Path("../data")
RESAMPLED_DIR = BASE_DIR / "resampled"
PROCESSED_DIR = BASE_DIR / "processed"
EVENTS_DIR = BASE_DIR / "events"

# Final paths
RESAMPLED_FILE_PATH = RESAMPLED_DIR / f"{RESAMPLED_NAME}.pkl"
PROCESSED_FILE_PATH = PROCESSED_DIR / f"{RESAMPLED_NAME}_FEATURE.pkl"

In [3]:
EVENT_NAME = "Z-SCORE-W100-2Z"
EVENT_PATH = EVENTS_DIR / f"{RESAMPLED_NAME}_{EVENT_NAME}.pkl"

In [4]:
DIRECTION_LABEL_FILE_PATH = BASE_DIR / 'direction_labels' / 'USDJPY-15m-20240101-20241231-EMA_CROSS_8_34-TB.pkl'

In [5]:
df = pd.read_pickle(PROCESSED_FILE_PATH)
labels = pd.read_pickle(DIRECTION_LABEL_FILE_PATH)

In [6]:
print(df.head())
print(labels.head())

                         open      high      low     close        volume  \
timestamp                                                                  
2024-02-15 18:00:00  1.076250  1.076450  1.07606  1.076245  5.488560e+12   
2024-02-15 18:15:00  1.076250  1.076490  1.07611  1.076455  5.812690e+12   
2024-02-15 18:30:00  1.076460  1.076555  1.07624  1.076475  4.481140e+12   
2024-02-15 18:45:00  1.076480  1.076760  1.07634  1.076700  3.894920e+12   
2024-02-15 19:00:00  1.076705  1.077240  1.07666  1.076985  4.145560e+12   

                       spread    return  log_return  fd_return  log_fd_return  \
timestamp                                                                       
2024-02-15 18:00:00  0.000025  0.000000    0.000000   0.154042       0.010528   
2024-02-15 18:15:00  0.000024  0.000210    0.000195   0.154228       0.010701   
2024-02-15 18:30:00  0.000025  0.000020    0.000019   0.154186       0.010661   
2024-02-15 18:45:00  0.000025  0.000225    0.000209   0.154375

In [7]:
val_split = 0.1
test_split = 0.1

In [8]:
n_test = int(len(labels) * test_split)
n_test

53

In [9]:
n_val = int(len(labels) * val_split)
n_val

53

## Splitting Y

In [10]:
train_labels = labels[:-n_test-n_val]
val_labels = labels[-n_test-n_val:-n_test]
test_labels = labels[-n_test:]

In [11]:
print('Train Size:', train_labels.shape[0])
print('Validation Size:', val_labels.shape[0])
print('Test Size:', test_labels.shape[0])

Train Size: 428
Validation Size: 53
Test Size: 53


## Splitting X

In [12]:
train_labels.index[-1]

Timestamp('2024-11-13 09:30:00')

In [13]:
train_features = df.loc[:train_labels.index[-1]]
val_features = df.loc[train_labels.index[-1]:val_labels.index[-1]]
test_features = df.loc[val_labels.index[-1]:test_labels.index[-1]]

## Create Dataset

In [14]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [15]:
from dataset.direction_dataset import DirectionDataset

In [35]:
full_dataset = DirectionDataset(
    data=df,
    events=labels,
    sequence_length=60,
    features_cols=FEATURES_COLS,
    target_col='bin_class',
)

----- Start Creating Sequences -----
----- End Creating Sequences -----


In [16]:
train_features.columns

Index(['open', 'high', 'low', 'close', 'volume', 'spread', 'return',
       'log_return', 'fd_return', 'log_fd_return', 'SMA_5', 'SMA_15', 'SMA_30',
       'SMA_60', 'EMA_5', 'EMA_15', 'EMA_30', 'EMA_60', 'MACD_12_26_9',
       'MACDh_12_26_9', 'MACDs_12_26_9', 'RSI_5', 'RSI_14', 'RSI_20', 'ADX_5',
       'ADXR_5_2', 'DMP_5', 'DMN_5', 'ADX_15', 'ADXR_15_2', 'DMP_15', 'DMN_15',
       'ADX_30', 'ADXR_30_2', 'DMP_30', 'DMN_30', 'ADX_60', 'ADXR_60_2',
       'DMP_60', 'DMN_60', 'ATRr_5', 'ATRr_10', 'ATRr_15', 'ATRr_20',
       'BBL_5_2.0_2.0', 'BBM_5_2.0_2.0', 'BBU_5_2.0_2.0', 'BBB_5_2.0_2.0',
       'BBP_5_2.0_2.0', 'BBL_14_2.0_2.0', 'BBM_14_2.0_2.0', 'BBU_14_2.0_2.0',
       'BBB_14_2.0_2.0', 'BBP_14_2.0_2.0'],
      dtype='object')

In [17]:
train_labels.columns

Index(['t1', 'trgt', 'ret', 'bin', 'bin_class'], dtype='object')

In [18]:
FEATURES_COLS = [
    'log_fd_return'
]

In [26]:
train_dataset = DirectionDataset(
    data=train_features,
    events=train_labels,
    sequence_length=60,
    features_cols=FEATURES_COLS,
    target_col='bin_class',
)
val_dataset = DirectionDataset(
    data=val_features,
    events=val_labels,
    sequence_length=60,
    features_cols=FEATURES_COLS,
    target_col='bin_class',
)

----- Start Creating Sequences -----
----- End Creating Sequences -----
----- Start Creating Sequences -----
----- End Creating Sequences -----


## Load Model

In [20]:
from models.classification.simple_transformer_model import SimpleTransformerModule

In [21]:
model = SimpleTransformerModule(
    n_features=len(FEATURES_COLS),
    output_size=3,
    num_layers=2,
    d_model=64,
    nhead=4,
    dim_feedforward=256,
    dropout=0.4,
    label_smoothing=0.0,
    pool="mean",
    use_class_weights=False,
)

## Train Model

In [22]:
from lightning.pytorch import Trainer
from torch.utils.data import DataLoader

In [32]:
train_dataloader = DataLoader(train_dataset, batch_size=64)
val_dataloader = DataLoader(val_dataset, batch_size=64)

In [33]:
trainer = Trainer(
    accelerator='mps',
    max_epochs=100
)


💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [34]:
trainer.fit(model, train_dataloaders=train_dataloader, val_dataloaders=val_dataloader)


  | Name      | Type                   | Params | Mode 
-------------------------------------------------------------
0 | model     | SimpleTransformerModel | 100 K  | train
1 | train_acc | MulticlassAccuracy     | 0      | train
2 | val_acc   | MulticlassAccuracy     | 0      | train
-------------------------------------------------------------
100 K     Trainable params
0         Non-trainable params
100 K     Total params
0.401     Total estimated model params size (MB)
29        Modules in train mode
0         Modules in eval mode


Sanity Checking: |                                           | 0/? [00:00<?, ?it/s]

/Users/yoyo/personal/fxml/.venv/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:433: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=13` in the `DataLoader` to improve performance.
/Users/yoyo/personal/fxml/.venv/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:433: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=13` in the `DataLoader` to improve performance.
/Users/yoyo/personal/fxml/.venv/lib/python3.12/site-packages/lightning/pytorch/loops/fit_loop.py:310: The number of training batches (7) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |                                                  | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

Validation: |                                                | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=100` reached.
