In [8]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter

import os

In [9]:
from pathlib import Path

# Configurable parts
SYMBOL = "USDJPY"
SAMPLE_TYPE = "time"
MINUTES = 1
DOLLAR_THRESHOLD = "115009542m"
EVENT_NAME = "CUSUM"
EVENT_THRESHOLD = 2.52e-04

START_DATE = "20210101"
END_DATE = "20241231"

# Build base name
if SAMPLE_TYPE == "dollar":
    RESAMPLED_NAME = f"{SYMBOL}-{DOLLAR_THRESHOLD}-dollar-{START_DATE}-{END_DATE}"
else:
    RESAMPLED_NAME = f"{SYMBOL}-{MINUTES}m-{START_DATE}-{END_DATE}"

# Base directories
BASE_DIR = Path("../data")
RESAMPLED_DIR = BASE_DIR / "resampled"
PROCESSED_DIR = BASE_DIR / "processed"
NORMALIZED_DIR = BASE_DIR / "normalized"
SCALER_DIR = BASE_DIR / "scalers"
EVENTS_DIR = BASE_DIR / "events"

# Final paths
PROCESSED_FILE_PATH = PROCESSED_DIR / f"{RESAMPLED_NAME}-processed.pkl"
NORMALIZED_FILE_PATH = NORMALIZED_DIR / f"{RESAMPLED_NAME}-normalized.pkl"
STD_SCALER_PATH = SCALER_DIR / f"{RESAMPLED_NAME}_standard_scaler.pkl"
MINMAX_SCALER_PATH = SCALER_DIR / f"{RESAMPLED_NAME}_minmax_scaler.pkl"
EVENT_FILE_PATH = EVENTS_DIR / f"{RESAMPLED_NAME}_{EVENT_NAME}_{EVENT_THRESHOLD:.2e}.pkl"

In [10]:
df = pd.read_pickle(NORMALIZED_FILE_PATH)

In [11]:
df.columns

Index(['open', 'high', 'low', 'close', 'volume', 'spread', 'close_delta',
       'close_return', 'close_log_return', 'ret_mean_5', 'ret_mean_10',
       'ret_mean_15', 'ret_mean_20', 'log_volume', 'ema5', 'ema5_slope',
       'ema20', 'ema20_slope', 'ema100', 'ema100_slope', 'atr14', 'atr20',
       'atr50', 'vol_adj_return', 'close_to_atr', 'adx14', 'plus_di14',
       'minus_di14', 'bb_upper', 'bb_lower', 'bb_mavg', 'bb_width',
       'bb_position', 'donchian_upper', 'donchian_lower', 'donchian_mid',
       'donchian_width', 'stoch_k', 'stoch_d', 'rsi14', 'macd', 'macd_signal',
       'macd_diff', 'unix_time', 'hour', 'dow', 'dom', 'month', 'hour_sin',
       'hour_cos', 'dow_sin', 'dow_cos', 'dom_sin', 'dom_cos', 'month_sin',
       'month_cos'],
      dtype='object')

In [6]:
df.mean()

open                1.675033e+01
high                1.675294e+01
low                 1.674764e+01
close               1.675032e+01
volume              4.804410e+11
spread              1.000000e+00
close_delta         2.215723e-02
close_return        1.595155e-04
close_log_return    1.000000e+00
ret_mean_5          1.000000e+00
ret_mean_10         1.000000e+00
ret_mean_15         1.000000e+00
ret_mean_20         1.000000e+00
log_volume          1.000000e+00
ema5                1.000000e+00
ema5_slope          1.000000e+00
ema20               1.000000e+00
ema20_slope         1.000000e+00
ema100              1.000000e+00
ema100_slope        1.000000e+00
atr14               1.000000e+00
atr20               1.000000e+00
atr50               1.000000e+00
vol_adj_return      1.000000e+00
close_to_atr        1.000000e+00
adx14               1.210136e-01
plus_di14           9.559819e-02
minus_di14          1.016198e-01
bb_upper            1.000000e+00
bb_lower            1.000000e+00
bb_mavg   

In [7]:
df.std()

open                1.675033e+01
high                1.675294e+01
low                 1.674764e+01
close               1.675032e+01
volume              4.804410e+11
spread              1.000000e+00
close_delta         2.215723e-02
close_return        1.595155e-04
close_log_return    1.000000e+00
ret_mean_5          1.000000e+00
ret_mean_10         1.000000e+00
ret_mean_15         1.000000e+00
ret_mean_20         1.000000e+00
log_volume          1.000000e+00
ema5                1.000000e+00
ema5_slope          1.000000e+00
ema20               1.000000e+00
ema20_slope         1.000000e+00
ema100              1.000000e+00
ema100_slope        1.000000e+00
atr14               1.000000e+00
atr20               1.000000e+00
atr50               1.000000e+00
vol_adj_return      1.000000e+00
close_to_atr        1.000000e+00
adx14               1.210136e-01
plus_di14           9.559819e-02
minus_di14          1.016198e-01
bb_upper            1.000000e+00
bb_lower            1.000000e+00
bb_mavg   