In [1]:
from pathlib import Path
import sys
import numpy as np

# Ensure the project root is on sys.path so `from src...` imports work
PROJECT_ROOT = Path.cwd().parent  # notebook is in ./notebooks
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

from src.dataloader.DataLoader import DataLoader

csv_path = PROJECT_ROOT / 'btc_usdt_1h.csv'
wrapper = DataLoader.load_wrap(str(csv_path), data_type='tick')

type(wrapper)

src.datawrapper.TickWrapper.TickWrapper

In [3]:
# Inspect the loaded data
df = wrapper.get_dataframe()
df.head(30)

Unnamed: 0,open_time_iso,open,high,low,close,volume
0,2024-12-15 00:00:00+00:00,101420.0,101558.83,101237.14,101442.42,313.78863
1,2024-12-15 01:00:00+00:00,101442.41,102196.97,101424.01,102134.48,1458.80477
2,2024-12-15 02:00:00+00:00,102134.48,102155.28,101775.88,101831.15,728.33298
3,2024-12-15 03:00:00+00:00,101831.16,101931.73,101722.31,101733.82,216.32352
4,2024-12-15 04:00:00+00:00,101733.83,102850.0,101733.82,102366.17,1788.48948
5,2024-12-15 05:00:00+00:00,102366.17,102511.81,102107.92,102159.29,485.39491
6,2024-12-15 06:00:00+00:00,102159.29,102169.9,101759.8,101759.8,417.41142
7,2024-12-15 07:00:00+00:00,101759.81,101941.18,101534.01,101732.09,399.71077
8,2024-12-15 08:00:00+00:00,101732.09,102015.16,101581.07,102015.15,434.85636
9,2024-12-15 09:00:00+00:00,102015.15,102270.95,101919.6,102264.34,361.99787


In [3]:
# Features configured by DataLoader for tick data
wrapper.get_features().head()

Unnamed: 0,volume,open,high,low,close
0,313.78863,101420.0,101558.83,101237.14,101442.42
1,1458.80477,101442.41,102196.97,101424.01,102134.48
2,728.33298,102134.48,102155.28,101775.88,101831.15
3,216.32352,101831.16,101931.73,101722.31,101733.82
4,1788.48948,101733.83,102850.0,101733.82,102366.17


In [4]:
wrapper.get_dataframe().head()

Unnamed: 0,open_time,open,high,low,close,volume
0,1734220800000,101420.0,101558.83,101237.14,101442.42,313.78863
1,1734224400000,101442.41,102196.97,101424.01,102134.48,1458.80477
2,1734228000000,102134.48,102155.28,101775.88,101831.15,728.33298
3,1734231600000,101831.16,101931.73,101722.31,101733.82,216.32352
4,1734235200000,101733.83,102850.0,101733.82,102366.17,1788.48948


In [5]:
wrapper.compute_log_return()
wrapper.get_dataframe().head()

Unnamed: 0,open_time,open,high,low,close,volume,log_return
0,1734220800000,101420.0,101558.83,101237.14,101442.42,313.78863,0.693258
1,1734224400000,101442.41,102196.97,101424.01,102134.48,1458.80477,0.696553
2,1734228000000,102134.48,102155.28,101775.88,101831.15,728.33298,0.691661
3,1734231600000,101831.16,101931.73,101722.31,101733.82,216.32352,0.692669
4,1734235200000,101733.83,102850.0,101733.82,102366.17,1788.48948,0.69625


In [6]:
wrapper.get_dataframe().dtypes

open_time       int64
open          float64
high          float64
low           float64
close         float64
volume        float64
log_return    float64
dtype: object

In [7]:
from src.transformer.TechIndicGenerator import TechIndicGenerator
tech_indic_generator = TechIndicGenerator()

In [8]:
tech_indic_generator.compute_RSI(wrapper, period=14)
wrapper.get_dataframe().head()

Unnamed: 0,open_time,open,high,low,close,volume,log_return,rsi
0,1734271200000,102784.65,103061.88,102622.99,102994.37,988.69088,0.694167,69.668788
1,1734274800000,102994.37,103039.99,102704.0,102704.01,704.04111,0.691737,64.552397
2,1734278400000,102704.0,103490.0,102583.65,103127.08,1092.30198,0.695205,68.215136
3,1734282000000,103127.08,103368.19,102978.93,103164.0,493.45924,0.693326,68.520823
4,1734285600000,103164.01,103358.89,102984.58,103204.0,381.5582,0.693341,68.870137


In [9]:
tech_indic_generator.compute_SWMA(wrapper)
wrapper.get_dataframe().head()

Unnamed: 0,open_time,open,high,low,close,volume,log_return,rsi,swma
0,1734282000000,103127.08,103368.19,102978.93,103164.0,493.45924,0.693326,68.520823,102970.091667
1,1734285600000,103164.01,103358.89,102984.58,103204.0,381.5582,0.693341,68.870137,103081.695
2,1734289200000,103203.99,103455.0,103178.47,103252.95,474.6008,0.693384,69.318823,103186.005
3,1734292800000,103252.96,103404.16,103066.67,103144.83,421.52166,0.692623,67.021012,103203.788333
4,1734296400000,103144.83,103197.67,102634.25,102859.06,733.51731,0.691761,61.242558,103143.103333


In [10]:
tech_indic_generator.compute_EMA(wrapper)
wrapper.get_dataframe().head()

Unnamed: 0,open_time,open,high,low,close,volume,log_return,rsi,swma,ema
0,1734321600000,104582.28,104805.22,104469.89,104686.15,650.79766,0.693644,66.882798,104758.89,104016.403333
1,1734325200000,104686.15,105064.0,104482.46,105028.0,781.51923,0.694779,69.249969,104691.696667,104172.03359
2,1734328800000,105028.01,105373.76,104956.0,105149.98,971.06116,0.693728,70.072006,104860.093333,104322.486884
3,1734332400000,105149.98,105420.76,104900.0,104992.79,679.89461,0.692399,67.565372,105005.816667,104425.61044
4,1734336000000,104992.79,105016.85,104505.22,104535.44,713.40126,0.690967,60.755479,104974.83,104442.507295


In [11]:
tech_indic_generator.compute_TEMA(wrapper)


In [12]:
tech_indic_generator.compute_KAMA(wrapper)

In [13]:
wrapper.get_dataframe().head()

Unnamed: 0,open_time,open,high,low,close,volume,log_return,rsi,swma,ema,tema,kama
0,1734548400000,104805.44,104805.44,102817.82,102817.82,4875.44811,0.68362,34.507879,104324.63,104311.942574,103902.048481,104691.604072
1,1734552000000,102817.82,102997.77,100303.72,100846.67,10402.4441,0.683515,26.439725,103459.411667,103778.823716,102654.405052,104244.609808
2,1734555600000,100846.68,101745.45,100481.81,101010.55,3022.31056,0.693959,27.948043,102191.42,103352.935452,101841.655707,103970.468517
3,1734559200000,101010.54,101560.89,100648.99,101152.45,1684.68673,0.693849,29.299838,101280.785,103014.399229,101342.920294,103685.562852
4,1734562800000,101152.45,101203.45,100000.0,100204.01,2820.1337,0.688448,25.813821,100896.113333,102582.031655,100644.768994,103219.480375


In [14]:
tech_indic_generator.compute_BBANDS(wrapper)
wrapper.get_dataframe().head()

Unnamed: 0,open_time,open,high,low,close,volume,log_return,rsi,swma,ema,tema,kama,bb_upper,bb_middle,bb_lower
0,1734616800000,101952.44,102800.11,100694.07,100942.41,3192.18817,0.688181,39.61844,101878.88,101701.174594,101826.476323,102061.397788,102833.497596,101319.306,99805.114404
1,1734620400000,100942.41,101063.38,99500.0,100679.86,4543.00113,0.691846,38.037143,101409.593333,101544.049271,101426.516518,102010.294838,102583.442958,101212.408,99841.373042
2,1734624000000,100679.86,101200.0,99830.0,100716.3,2127.60244,0.693328,38.404608,100985.546667,101416.70323,101154.153506,101951.329862,102585.033678,101205.8895,99826.745322
3,1734627600000,100716.3,100748.87,97500.0,98855.86,5111.98146,0.683868,28.961302,100431.765,101022.727348,100229.474994,101698.213426,102816.441356,101098.155,99379.868644
4,1734631200000,98855.86,98952.73,96900.0,98932.73,5024.54216,0.693536,29.730142,99792.818333,100701.189295,99608.612139,101481.842264,102946.878144,100987.169,99027.459856


In [15]:
tech_indic_generator.compute_ATR(wrapper)
wrapper.get_dataframe().head()

Unnamed: 0,open_time,open,high,low,close,volume,log_return,rsi,swma,ema,tema,kama,bb_upper,bb_middle,bb_lower,atr
0,1734667200000,97106.02,97838.98,96839.4,97304.07,1275.17255,0.694166,38.292522,97183.546667,97895.111026,96749.509496,99324.953492,103449.346769,99161.69,94874.033231,1659.291429
1,1734670800000,97304.07,97491.95,96672.47,96734.32,1339.46566,0.690215,35.922003,97197.848333,97716.527792,96618.876532,99243.784245,103137.870108,98907.6065,94677.342892,1599.304898
2,1734674400000,96734.33,97303.99,96374.03,97281.37,1663.60632,0.695971,39.77696,97077.361667,97649.580439,96763.382609,99180.128956,102614.624576,98651.208,94687.791424,1551.494548
3,1734678000000,97281.38,98233.0,97276.5,97832.53,2593.15424,0.695976,43.467134,97194.663333,97677.726525,97103.732734,99157.577554,101990.119221,98420.335,94850.550779,1508.994938
4,1734681600000,97832.52,97864.91,95300.0,95473.74,3437.65104,0.681019,33.894874,97072.643333,97338.651675,96431.791319,98978.96369,101487.237696,98094.622,94702.006304,1584.417442


In [16]:
tech_indic_generator.compute_MFI(wrapper)
wrapper.get_dataframe().head()

Unnamed: 0,open_time,open,high,low,close,volume,log_return,rsi,swma,ema,tema,kama,bb_upper,bb_middle,bb_lower,atr,mfi
0,1734717600000,97206.6,97308.39,96602.69,96993.58,1216.44814,0.692044,49.319393,96975.726667,96272.236058,96793.841861,97284.482848,98997.307688,96249.1795,93501.051312,1521.413264,54.144663
1,1734721200000,96992.26,97748.0,96840.0,97116.0,1339.74895,0.693785,49.941288,97040.721667,96402.045895,97069.063101,97278.656872,98953.497991,96231.8865,93510.275009,1477.598031,57.060066
2,1734724800000,97116.0,97232.0,96422.06,96451.88,1354.12601,0.689722,46.600529,96979.605,96409.712681,96991.148595,97252.190052,98876.08146,96192.0695,93508.05754,1429.908172,53.810035
3,1734728400000,96451.88,96647.36,95393.84,96623.29,1604.6054,0.694035,47.575285,96792.105,96442.57073,96989.320745,97234.384278,98897.386501,96207.2345,93517.082499,1417.309017,49.188151
4,1734732000000,96623.29,97513.0,96451.03,97463.93,978.34197,0.697488,52.185072,96788.378333,96599.702925,97307.087602,97239.207053,98872.032201,96198.8015,93525.570799,1391.927658,54.339158


In [17]:
wrapper.get_dataframe()['open_time'].min()

np.int64(1734717600000)

In [18]:
# Import WindowScope, not just Scope
from src.transformer.Scope import WindowScope

# 1. Define Training Scope (Sliding Window)
training_window_scope = WindowScope(
    wrapper=wrapper, 
    parameters={
        'column': 'open_time',
        'window_size': 100,
        'step_size': 10
    }
)

# 2. Define Testing Scope (if you have a TestingWindowScope class)
# Note: You need to import TestingWindowScope as well if used
# from src.transformer.Scope import TestingWindowScope
# testing_window_scope = TestingWindowScope(training_window_scope=training_window_scope, ...)

# Scope sanity tests
This section runs a few lightweight, assert-based checks for `WindowScope` and `TestingWindowScope` to make sure shifting, bounds checks, and train/test alignment behave as expected.

In [19]:
import math
import pandas as pd
from src.transformer.Scope import WindowScope, TestingWindowScope

df = wrapper.get_dataframe().copy()

# Add a simple row index column for deterministic, row-based window tests
if 'row_id' not in df.columns:
    df['row_id'] = np.arange(len(df), dtype=int)
    wrapper.set_dataframe(df)

assert 'row_id' in wrapper.get_dataframe().columns
assert wrapper.get_dataframe()['row_id'].iloc[0] == 0
assert wrapper.get_dataframe()['row_id'].iloc[-1] == len(wrapper.get_dataframe()) - 1

# --- WindowScope: shift + bounds ---
train = WindowScope(
    wrapper=wrapper,
    parameters={
        'column': 'row_id',
        'start_value': 0,
        'window_size': 100,
        'step_size': 10,
        # keep end_value aligned to the max row_id
        'end_value': len(wrapper.get_dataframe()) - 1,
    },
 )

assert train.current_state() == (0, 100)
assert train.is_in_scope() is True

train.shift()
assert train.start_value == 10
assert train.current_state() == (10, 110)

# number of valid windows given isInScope uses: start + window_size <= end_value
end_value = len(wrapper.get_dataframe()) - 1
expected_windows = math.floor((end_value - train.window_size - 0) / train.step_size) + 1
count = 0
train.reset_state()
while train.is_in_scope():
    count += 1
    train.shift()

assert count == expected_windows, (count, expected_windows)

# --- TestingWindowScope: aligned right after training window ---
train.reset_state()
test = TestingWindowScope(
    training_window_scope=train,
    parameters={
        'testing_window_size': 20,
        'column': 'row_id',
        'end_value': len(wrapper.get_dataframe()) - 1,
    },
 )

assert test.start_value == train.start_value + train.window_size
assert test.window_size == 20
assert test.is_in_scope() is True

# After shifting the training window, the test window should re-sync when test.shift() is called
train.shift()  # train now starts at 10
test.shift()
assert test.start_value == train.start_value + train.window_size
assert test.start_value >= train.start_value + train.window_size
assert test.start_value == 110
assert test.current_state() == (110, 130)

print('Scope tests: OK')

Scope tests: OK


In [20]:
import math
import pandas as pd
from src.transformer.Scope import WindowScope, TestingWindowScope

df = wrapper.get_dataframe().copy()

# --- WindowScope: shift + bounds ---
train = WindowScope(
    wrapper=wrapper,
    parameters={
        'column': 'row_id',
        'start_value': 0,
        'window_size': 250,
        'step_size': 50,
        # keep end_value aligned to the max row_id
        'end_value': len(wrapper.get_dataframe()) - 1,
    },
 )

# --- TestingWindowScope: aligned right after training window ---
train.reset_state()
test = TestingWindowScope(
    training_window_scope=train,
    parameters={
        'testing_window_size': 20,
        'column': 'row_id',
        'end_value': len(wrapper.get_dataframe()) - 1,
    },
 )

while train.is_in_scope():
    print(f'Training Window: {train.current_state()}')
    print(f'Testing Window: {test.current_state()}')
    train.shift()
    test.shift()

Training Window: (0, 250)
Testing Window: (250, 270)
Training Window: (50, 300)
Testing Window: (300, 320)
Training Window: (100, 350)
Testing Window: (350, 370)
Training Window: (150, 400)
Testing Window: (400, 420)
Training Window: (200, 450)
Testing Window: (450, 470)
Training Window: (250, 500)
Testing Window: (500, 520)
Training Window: (300, 550)
Testing Window: (550, 570)
Training Window: (350, 600)
Testing Window: (600, 620)
Training Window: (400, 650)
Testing Window: (650, 670)
Training Window: (450, 700)
Testing Window: (700, 720)
Training Window: (500, 750)
Testing Window: (750, 770)
Training Window: (550, 800)
Testing Window: (800, 820)
Training Window: (600, 850)
Testing Window: (850, 870)
Training Window: (650, 900)
Testing Window: (900, 920)
Training Window: (700, 950)
Testing Window: (950, 970)
Training Window: (750, 1000)
Testing Window: (1000, 1020)
Training Window: (800, 1050)
Testing Window: (1050, 1070)
Training Window: (850, 1100)
Testing Window: (1100, 1120)
Trai