In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter

import os

In [5]:
from pathlib import Path

# Configurable parts
SYMBOL = "USDJPY"
SAMPLE_TYPE = "time"
MINUTES = 15

START_DATE = "20240101"
END_DATE = "20241231"

# Build base name
RESAMPLED_NAME = f"{SYMBOL}-{MINUTES}m-{START_DATE}-{END_DATE}"

# Base directories
BASE_DIR = Path("../data")
RESAMPLED_DIR = BASE_DIR / "resampled"
PROCESSED_DIR = BASE_DIR / "processed"
EVENTS_DIR = BASE_DIR / "events"

# Final paths
RESAMPLED_FILE_PATH = RESAMPLED_DIR / f"{RESAMPLED_NAME}.pkl"
PROCESSED_FILE_PATH = PROCESSED_DIR / f"{RESAMPLED_NAME}_FEATURE.pkl"

In [13]:
DIRECTION_LABEL_FILE_PATH = BASE_DIR / 'direction_labels' / 'USDJPY-15m-20240101-20241231-Z-SCORE-W100-1Z-TB.pkl'

In [9]:
from torch.utils.data import Dataset, DataLoader
from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import SMAPE, PoissonLoss, QuantileLoss
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

In [14]:
df = pd.read_pickle(PROCESSED_FILE_PATH)
labels = pd.read_pickle(DIRECTION_LABEL_FILE_PATH)

In [12]:
print(df.shape)
df.head()

(21758, 54)


Unnamed: 0_level_0,open,high,low,close,volume,spread,return,log_return,fd_return,log_fd_return,...,BBL_5_2.0_2.0,BBM_5_2.0_2.0,BBU_5_2.0_2.0,BBB_5_2.0_2.0,BBP_5_2.0_2.0,BBL_14_2.0_2.0,BBM_14_2.0_2.0,BBU_14_2.0_2.0,BBB_14_2.0_2.0,BBP_14_2.0_2.0
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-02-15 18:00:00,1.07625,1.07645,1.07606,1.076245,5488560000000.0,2.5e-05,0.0,0.0,0.154042,0.010528,...,1.075866,1.076116,1.076366,0.046462,0.758008,1.074885,1.076427,1.07797,0.286603,0.44096
2024-02-15 18:15:00,1.07625,1.07649,1.07611,1.076455,5812690000000.0,2.4e-05,0.00021,0.000195,0.154228,0.010701,...,1.075844,1.076197,1.07655,0.065626,0.8653,1.074947,1.076345,1.077742,0.259652,0.539487
2024-02-15 18:30:00,1.07646,1.076555,1.07624,1.076475,4481140000000.0,2.5e-05,2e-05,1.9e-05,0.154186,0.010661,...,1.076039,1.076309,1.076579,0.050115,0.807754,1.075268,1.076221,1.077174,0.177042,0.63327
2024-02-15 18:45:00,1.07648,1.07676,1.07634,1.0767,3894920000000.0,2.5e-05,0.000225,0.000209,0.154375,0.010837,...,1.076085,1.076424,1.076763,0.063023,0.906841,1.075544,1.076153,1.076763,0.113251,0.948645
2024-02-15 19:00:00,1.076705,1.07724,1.07666,1.076985,4145560000000.0,2e-05,0.000285,0.000265,0.154586,0.011033,...,1.076068,1.076572,1.077076,0.093551,0.910071,1.075464,1.076183,1.076902,0.133621,1.057567


In [15]:
print(labels.shape)
labels.head()

(3157, 5)


Unnamed: 0_level_0,t1,trgt,ret,bin,bin_class
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-02-16 18:45:00,2024-02-16 19:45:00,0.000585,-0.000751,-1.0,0.0
2024-02-16 19:00:00,2024-02-16 19:45:00,0.000581,-0.000793,-1.0,0.0
2024-02-16 19:15:00,2024-02-16 19:45:00,0.000566,-0.000714,-1.0,0.0
2024-02-16 19:30:00,2024-02-16 20:00:00,0.00055,-0.000603,-1.0,0.0
2024-02-18 22:00:00,2024-02-18 23:15:00,0.000451,0.000469,1.0,2.0
