In [1]:
import sys, os
sys.path.insert(0, os.path.abspath(".."))

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from tsf.data.loader import DataLoader
from tsf.data.feature_engineer import FeatureEngineer
# from tsf.data.scaler import FeatureScaler
from tsf.data.window import WindowGenerator

In [2]:
dataset = DataLoader.from_csv(
    "../btc_usdt_1h.csv",
    preset="tick",
)

print(dataset)
dataset.df[dataset.feature_cols + dataset.label_cols].head()

Dataset(shape=(8805, 13), features=5, labels=0, time_col='open_time_iso')


Unnamed: 0,open,high,low,close,volume
0,101420.0,101558.83,101237.14,101442.42,313.78863
1,101442.41,102196.97,101424.01,102134.48,1458.80477
2,102134.48,102155.28,101775.88,101831.15,728.33298
3,101831.16,101931.73,101722.31,101733.82,216.32352
4,101733.83,102850.0,101733.82,102366.17,1788.48948


In [3]:
dataset.get_features()

Unnamed: 0,open,high,low,close,volume
0,101420.00,101558.83,101237.14,101442.42,313.78863
1,101442.41,102196.97,101424.01,102134.48,1458.80477
2,102134.48,102155.28,101775.88,101831.15,728.33298
3,101831.16,101931.73,101722.31,101733.82,216.32352
4,101733.83,102850.00,101733.82,102366.17,1788.48948
...,...,...,...,...,...
8800,87977.44,88050.17,87333.08,87588.26,1300.81662
8801,87588.26,87844.01,87093.79,87131.99,779.27901
8802,87132.00,88019.88,87061.23,87781.35,863.99314
8803,87781.35,87800.00,87294.11,87585.77,553.66943


In [4]:
print(dataset)

Dataset(shape=(8805, 13), features=5, labels=0, time_col='open_time_iso')


In [5]:
feature_config = [
    # Momentum
    {"name": "RSI", "period": 14},
    {"name": "KAMA", "period": 30},
    {"name": "HLC3"},
    # Trend
    {"name": "EMA", "period": 12},
    {"name": "TEMA", "period": 12},
    {"name": "SWMA"},
    # Volatility
    {"name": "ATR", "period": 14},
    {"name": "BBANDS", "period": 20},
    # Volume
    {"name": "OBV"},
    {"name": "MFI", "period": 14},
    # Label (forward-looking log return)
    {"name": "log_return", "shift": -1},
]

engineer = FeatureEngineer(feature_config)
engineer.apply(dataset)

print(dataset)
print(f"Features: {dataset.feature_cols}")
print(f"Labels:   {dataset.label_cols}")
dataset.df[dataset.feature_cols + dataset.label_cols].head()

Dataset(shape=(8771, 26), features=17, labels=1, time_col='open_time_iso')
Features: ['open', 'high', 'low', 'close', 'volume', 'rsi', 'kama', 'hlc3', 'ema', 'tema', 'swma', 'atr', 'bb_upper', 'bb_middle', 'bb_lower', 'obv', 'mfi']
Labels:   ['log_return']


Unnamed: 0,open,high,low,close,volume,rsi,kama,hlc3,ema,tema,swma,atr,bb_upper,bb_middle,bb_lower,obv,mfi,log_return
0,104535.44,104942.48,104500.46,104676.72,767.27443,62.028656,104983.472253,104706.553333,104531.035198,104906.279755,104813.86,652.839549,105952.397907,104070.2615,102188.125093,16973.524551,66.51318,7.9e-05
1,104676.73,104722.1,104350.01,104684.99,993.28252,62.106154,104967.60193,104585.7,104554.720552,104838.980463,104683.683333,632.78601,105987.256747,104154.7925,102322.328253,17966.807071,64.704436,-0.008895
2,104684.98,104836.36,103625.78,103757.99,1498.30185,49.829559,104932.652527,104073.376667,104432.146621,104419.142891,104502.808333,674.057009,105927.187098,104207.4915,102487.795902,16468.505221,62.441803,-0.000983
3,103758.0,104145.9,103333.0,103656.02,1555.92886,48.689427,104884.693735,103711.64,104312.742526,104076.20817,104203.116667,683.974365,105901.847831,104233.9385,102566.029169,14912.576361,56.994365,0.002428
4,103656.02,104138.99,103484.51,103908.02,1256.73983,51.634632,104841.942035,103843.84,104250.477522,103933.213947,103903.505,681.867625,105873.848698,104271.1395,102668.430302,16169.316191,47.590886,0.015082


In [None]:
window = WindowGenerator(
    dataset=dataset,
    train_window="14d",   # 14 days of hourly data for training
    test_window="1d",     # predict 1 day ahead
    step="1d",            # slide forward by 1 day
    mode="sliding",
)

print(window)
print(f"Window summary: {window.summary()}")

WindowGenerator(mode=expanding, train=14 days 00:00:00, test=1 days 00:00:00, step=7 days 00:00:00)
Window summary: {'mode': 'expanding', 'train_window': '14 days 00:00:00', 'test_window': '1 days 00:00:00', 'step': '7 days 00:00:00', 'start': '2024-12-16 09:00:00', 'end': '2025-12-16 19:00:00', 'n_splits': 51}


In [7]:
for fold_idx, (train_ds, test_ds) in enumerate(window.get_splits()):
    print(f"{fold_idx + 1} split:")
    print(f"train start: {min(train_ds.df["open_time_iso"])}, end: {max(train_ds.df["open_time_iso"])}")
    print(f"test start: {min(test_ds.df["open_time_iso"])}, end: {max(test_ds.df["open_time_iso"])}")
    print()

1 split:
train start: 2024-12-16 09:00:00+00:00, end: 2024-12-30 08:00:00+00:00
test start: 2024-12-30 09:00:00+00:00, end: 2024-12-31 08:00:00+00:00

2 split:
train start: 2024-12-16 09:00:00+00:00, end: 2025-01-06 08:00:00+00:00
test start: 2025-01-06 09:00:00+00:00, end: 2025-01-07 08:00:00+00:00

3 split:
train start: 2024-12-16 09:00:00+00:00, end: 2025-01-13 08:00:00+00:00
test start: 2025-01-13 09:00:00+00:00, end: 2025-01-14 08:00:00+00:00

4 split:
train start: 2024-12-16 09:00:00+00:00, end: 2025-01-20 08:00:00+00:00
test start: 2025-01-20 09:00:00+00:00, end: 2025-01-21 08:00:00+00:00

5 split:
train start: 2024-12-16 09:00:00+00:00, end: 2025-01-27 08:00:00+00:00
test start: 2025-01-27 09:00:00+00:00, end: 2025-01-28 08:00:00+00:00

6 split:
train start: 2024-12-16 09:00:00+00:00, end: 2025-02-03 08:00:00+00:00
test start: 2025-02-03 09:00:00+00:00, end: 2025-02-04 08:00:00+00:00

7 split:
train start: 2024-12-16 09:00:00+00:00, end: 2025-02-10 08:00:00+00:00
test start: 20