In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter

import os

In [2]:
from pathlib import Path

# Configurable parts\
SYMBOL = "USDJPY"
MINUTES = 15
START_DATE = "20240101"
END_DATE = "20241231"

# Build base name
RESAMPLED_NAME = f"{SYMBOL}-{MINUTES}m-{START_DATE}-{END_DATE}"

# Base directories
BASE_DIR = Path("../data")
RESAMPLED_DIR = BASE_DIR / "resampled"
PROCESSED_DIR = BASE_DIR / "processed"

# Final paths
RESAMPLED_FILE_PATH = RESAMPLED_DIR / f"{RESAMPLED_NAME}.pkl"
RETURN_FILE_PATH = PROCESSED_DIR / f"{RESAMPLED_NAME}_RETURN.pkl"
TA_FILE_PATH = PROCESSED_DIR / f"{RESAMPLED_NAME}_TA.pkl"
MERGED_FILE_PATH = PROCESSED_DIR / f"{RESAMPLED_NAME}_FEATURE.pkl"

In [3]:
df_ret = pd.read_pickle(RETURN_FILE_PATH)
df_ta = pd.read_pickle(TA_FILE_PATH)
print(df_ret.shape)
print(df_ta.shape)

(21758, 10)
(24789, 50)


In [4]:
df_ret.head()

Unnamed: 0_level_0,open,high,low,close,volume,spread,return,log_return,fd_return,log_fd_return
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2024-02-15 18:00:00,1.07625,1.07645,1.07606,1.076245,5488560000000.0,2.5e-05,0.0,0.0,0.154042,0.010528
2024-02-15 18:15:00,1.07625,1.07649,1.07611,1.076455,5812690000000.0,2.4e-05,0.00021,0.000195,0.154228,0.010701
2024-02-15 18:30:00,1.07646,1.076555,1.07624,1.076475,4481140000000.0,2.5e-05,2e-05,1.9e-05,0.154186,0.010661
2024-02-15 18:45:00,1.07648,1.07676,1.07634,1.0767,3894920000000.0,2.5e-05,0.000225,0.000209,0.154375,0.010837
2024-02-15 19:00:00,1.076705,1.07724,1.07666,1.076985,4145560000000.0,2e-05,0.000285,0.000265,0.154586,0.011033


In [5]:
df_ta.head()

Unnamed: 0_level_0,open,high,low,close,volume,spread,SMA_5,SMA_15,SMA_30,SMA_60,...,BBL_5_2.0_2.0,BBM_5_2.0_2.0,BBU_5_2.0_2.0,BBB_5_2.0_2.0,BBP_5_2.0_2.0,BBL_14_2.0_2.0,BBM_14_2.0_2.0,BBU_14_2.0_2.0,BBB_14_2.0_2.0,BBP_14_2.0_2.0
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-01-03 04:15:00,1.096075,1.09621,1.09604,1.09605,2065040000000.0,2.7e-05,1.095897,1.095344,1.094809,1.094993,...,1.095333,1.095897,1.096461,0.102982,0.635568,1.094477,1.095372,1.096266,0.163327,0.879094
2024-01-03 04:30:00,1.096045,1.096185,1.09595,1.09599,1837770000000.0,2.9e-05,1.096026,1.095413,1.09487,1.094985,...,1.095903,1.096026,1.096149,0.022361,0.353109,1.094526,1.095444,1.096363,0.167666,0.797119
2024-01-03 04:45:00,1.095995,1.096255,1.095895,1.09624,1551680000000.0,2.9e-05,1.096089,1.095497,1.09495,1.095001,...,1.095923,1.096089,1.096255,0.030322,0.954325,1.094549,1.095528,1.096506,0.178648,0.864052
2024-01-03 05:00:00,1.09625,1.096285,1.096045,1.096095,1926770000000.0,2.8e-05,1.096089,1.095565,1.095017,1.095026,...,1.095923,1.096089,1.096255,0.030322,0.518053,1.094555,1.095574,1.096593,0.186083,0.755594
2024-01-03 05:15:00,1.0961,1.096245,1.09588,1.095885,1791080000000.0,2.8e-05,1.096052,1.095595,1.095076,1.095037,...,1.095817,1.096052,1.096287,0.04285,0.14442,1.094625,1.095627,1.096628,0.182813,0.628917


In [6]:
df_ret.reset_index(inplace=True)
df_ta.reset_index(inplace=True)

In [7]:
df_merge = pd.merge(df_ret, df_ta)
df_merge.head()

Unnamed: 0,timestamp,open,high,low,close,volume,spread,return,log_return,fd_return,...,BBL_5_2.0_2.0,BBM_5_2.0_2.0,BBU_5_2.0_2.0,BBB_5_2.0_2.0,BBP_5_2.0_2.0,BBL_14_2.0_2.0,BBM_14_2.0_2.0,BBU_14_2.0_2.0,BBB_14_2.0_2.0,BBP_14_2.0_2.0
0,2024-02-15 18:00:00,1.07625,1.07645,1.07606,1.076245,5488560000000.0,2.5e-05,0.0,0.0,0.154042,...,1.075866,1.076116,1.076366,0.046462,0.758008,1.074885,1.076427,1.07797,0.286603,0.44096
1,2024-02-15 18:15:00,1.07625,1.07649,1.07611,1.076455,5812690000000.0,2.4e-05,0.00021,0.000195,0.154228,...,1.075844,1.076197,1.07655,0.065626,0.8653,1.074947,1.076345,1.077742,0.259652,0.539487
2,2024-02-15 18:30:00,1.07646,1.076555,1.07624,1.076475,4481140000000.0,2.5e-05,2e-05,1.9e-05,0.154186,...,1.076039,1.076309,1.076579,0.050115,0.807754,1.075268,1.076221,1.077174,0.177042,0.63327
3,2024-02-15 18:45:00,1.07648,1.07676,1.07634,1.0767,3894920000000.0,2.5e-05,0.000225,0.000209,0.154375,...,1.076085,1.076424,1.076763,0.063023,0.906841,1.075544,1.076153,1.076763,0.113251,0.948645
4,2024-02-15 19:00:00,1.076705,1.07724,1.07666,1.076985,4145560000000.0,2e-05,0.000285,0.000265,0.154586,...,1.076068,1.076572,1.077076,0.093551,0.910071,1.075464,1.076183,1.076902,0.133621,1.057567


In [8]:
df_merge.set_index('timestamp', inplace=True)

In [9]:
df_merge.to_pickle(MERGED_FILE_PATH)