In [1]:
import torch
import os
import shutil
import pandas as pd
import numpy as np
import talib

# --- 제공해주신 코드의 클래스들을 임포트합니다 ---

# 모델 관련
from models.model import (GATree, GATreePop, FEATURE_NUM, FEATURE_COMPARISON_MAP, ALL_FEATURES,
                   COL_NODE_TYPE, NODE_TYPE_UNUSED)

# 변이 연산자 관련
from evolution.Mutation.base import BaseMutation
from evolution.Mutation.chain import ChainMutation
from evolution.Mutation.node_param import NodeParamMutation
from evolution.Mutation.reinitialize_node import ReinitializeNodeMutation
from evolution.Mutation.add_node import AddNodeMutation
from evolution.Mutation.delete_node import DeleteNodeMutation
from evolution.Mutation.add_subtree import AddSubtreeMutation
from evolution.Mutation.delete_subtree import DeleteSubtreeMutation

import data.data_download as data_dd
# import data.feature_generator as data_fe
# import data.candlestick_patterns as data_cp

import data.ta_lib_feature_generator as talib_feat
import data.merge_dataset as data_md

In [2]:
# df = data_dd.fetch_historical_data('BTCUSDT', '1m', 365*5)
# df.to_csv('/Users/yoon-seunggyu/Documents/GA_Tree/GA_tree/dataset/btc_1m_long_t.csv')

In [4]:
# df = data_dd.fetch_historical_data('BTCUSDT', '1m', 14)

df = pd.read_csv('/Users/yoon-seunggyu/Documents/GA_Tree/dataset/btc_1m_long_t.csv')
df['Close time'] = pd.to_datetime(df['Close time'])
df = df[['Open', 'High', 'Low', 'Close', 'Volume', 'Close time']]

In [5]:
final_dataframe, added_cols = data_md.run_feature_generation_from_yaml(df=df, 
                                                                       timestamp_col='Close time', 
                                                                       target_timeframes=['5m', '30m', '1h', '4h', '1d'],
                                                                       yaml_config_path='data/feature_config.yaml')

'data/feature_config.yaml' 에서 피처 생성 설정을 로드합니다.

YAML 설정에 기반하여 Multi-Timeframe 피처 생성을 시작합니다.
기준 시간 단위가 '5m'으로 설정되었습니다.


  time_deltas = [pd.to_timedelta(tf.replace('m', 'T')) for tf in target_timeframes]
  base_df = source_df.resample(base_timeframe.replace('m', 'T')).agg(agg_rules)
  for timeframe in sorted(list(set(target_timeframes)), key=lambda x: pd.to_timedelta(x.replace('m', 'T'))):
  df_resampled = source_df.resample(timeframe.replace('m', 'T')).agg(agg_rules).dropna()


--- '5m' 시간 단위 피처 계산 시작 ---
  - calculate_price_change_features ({}): 6개 컬럼 생성 완료
  - calculate_ma ({'window': 20}): 1개 컬럼 생성 완료
  - calculate_ma ({'window': 50}): 1개 컬럼 생성 완료
  - calculate_ma ({'window': 200}): 1개 컬럼 생성 완료
  - calculate_ema ({'window': 20}): 1개 컬럼 생성 완료
  - calculate_ema ({'window': 50}): 1개 컬럼 생성 완료
  - calculate_ema ({'window': 200}): 1개 컬럼 생성 완료
  - calculate_dema ({'window': 20}): 1개 컬럼 생성 완료
  - calculate_dema ({'window': 50}): 1개 컬럼 생성 완료
  - calculate_dema ({'window': 200}): 1개 컬럼 생성 완료
  - calculate_tema ({'window': 20}): 1개 컬럼 생성 완료
  - calculate_tema ({'window': 50}): 1개 컬럼 생성 완료
  - calculate_tema ({'window': 200}): 1개 컬럼 생성 완료
  - calculate_trima ({'window': 20}): 1개 컬럼 생성 완료
  - calculate_trima ({'window': 50}): 1개 컬럼 생성 완료
  - calculate_trima ({'window': 200}): 1개 컬럼 생성 완료
  - calculate_vwma ({'window': 20}): 1개 컬럼 생성 완료
  - calculate_vwma ({'window': 50}): 1개 컬럼 생성 완료
  - calculate_vwma ({'window': 200}): 1개 컬럼 생성 완료
  - calculate_adx ({'window': 7}): 3

  df_resampled = source_df.resample(timeframe.replace('m', 'T')).agg(agg_rules).dropna()


  - calculate_trima ({'window': 10}): 1개 컬럼 생성 완료
  - calculate_trima ({'window': 20}): 1개 컬럼 생성 완료
  - calculate_trima ({'window': 50}): 1개 컬럼 생성 완료
  - calculate_vwma ({'window': 10}): 1개 컬럼 생성 완료
  - calculate_vwma ({'window': 20}): 1개 컬럼 생성 완료
  - calculate_vwma ({'window': 50}): 1개 컬럼 생성 완료
  - calculate_adx ({'window': 7}): 3개 컬럼 생성 완료
  - calculate_adx ({'window': 14}): 3개 컬럼 생성 완료
  - calculate_adx ({'window': 28}): 3개 컬럼 생성 완료
  - calculate_macd ({'short_window': 8, 'long_window': 17, 'signal_window': 9}): 3개 컬럼 생성 완료
  - calculate_macd ({'short_window': 12, 'long_window': 26, 'signal_window': 9}): 3개 컬럼 생성 완료
  - calculate_macd ({'short_window': 5, 'long_window': 35, 'signal_window': 5}): 3개 컬럼 생성 완료
  - calculate_ichimoku ({'short_window': 9, 'mid_window': 26, 'long_window': 52}): 5개 컬럼 생성 완료
  - calculate_sar ({'acceleration': 0.02, 'maximum': 0.2}): 1개 컬럼 생성 완료
  - calculate_rsi ({'window': 9}): 1개 컬럼 생성 완료
  - calculate_rsi ({'window': 14}): 1개 컬럼 생성 완료
  - calculate_rsi 

  sorted_timeframes = sorted(features_by_timeframe.keys(), key=lambda x: pd.to_timedelta(x.replace('m', 'T')))


'5m' 피처가 직접 통합되었습니다.
'30m' 피처가 'merge_asof'로 통합되었습니다.
'1h' 피처가 'merge_asof'로 통합되었습니다.
'4h' 피처가 'merge_asof'로 통합되었습니다.
'1d' 피처가 'merge_asof'로 통합되었습니다.

최종 피처 생성 및 통합이 완료되었습니다.


In [7]:
added_cols

['%D_14_3_1d',
 '%D_14_3_1h',
 '%D_14_3_30m',
 '%D_14_3_4h',
 '%D_14_3_5m',
 '%D_14_5_1h',
 '%D_21_5_30m',
 '%D_21_5_4h',
 '%D_21_7_1d',
 '%D_21_7_1h',
 '%D_28_7_4h',
 '%D_5_3_5m',
 '%D_8_3_30m',
 '%D_8_3_5m',
 '%D_9_3_1d',
 '%K_14_3_1d',
 '%K_14_3_1h',
 '%K_14_3_30m',
 '%K_14_3_4h',
 '%K_14_3_5m',
 '%K_14_5_1h',
 '%K_21_5_30m',
 '%K_21_5_4h',
 '%K_21_7_1d',
 '%K_21_7_1h',
 '%K_28_7_4h',
 '%K_5_3_5m',
 '%K_8_3_30m',
 '%K_8_3_5m',
 '%K_9_3_1d',
 '3BlackCrows_1d',
 '3BlackCrows_1h',
 '3BlackCrows_30m',
 '3BlackCrows_4h',
 '3BlackCrows_5m',
 '3StarsInSouth_1d',
 '3StarsInSouth_1h',
 '3StarsInSouth_30m',
 '3StarsInSouth_4h',
 '3StarsInSouth_5m',
 '3WhiteSoldiers_1d',
 '3WhiteSoldiers_1h',
 '3WhiteSoldiers_30m',
 '3WhiteSoldiers_4h',
 '3WhiteSoldiers_5m',
 'ADOSC_2_8_1d',
 'ADOSC_2_8_1h',
 'ADOSC_2_8_30m',
 'ADOSC_2_8_4h',
 'ADOSC_2_8_5m',
 'ADOSC_3_10_1d',
 'ADOSC_3_10_1h',
 'ADOSC_3_10_30m',
 'ADOSC_3_10_4h',
 'ADOSC_3_10_5m',
 'ADOSC_5_20_1d',
 'ADOSC_5_20_1h',
 'ADOSC_5_20_30m',
 'ADOSC

In [8]:
final_dataframe

Unnamed: 0,Close time,Open,High,Low,Close,Volume,close_change_pct_5m,body_size_5m,upper_wick_size_5m,lower_wick_size_5m,...,BullishDojiStar_1d,BearishDojiStar_1d,BullishTasukiGap_1d,BearishTasukiGap_1d,BullishXSideGap3Methods_1d,BearishXSideGap3Methods_1d,BullishSpinningTop_1d,BearishSpinningTop_1d,BullishRise3Methods_1d,BearishFall3Methods_1d
0,2022-03-20 00:00:00,42201.13,42246.28,42172.15,42227.71,103.34117,0.062984,26.58,18.57,28.98,...,False,False,False,False,False,False,False,False,False,False
1,2022-03-20 00:05:00,42227.70,42277.77,42176.00,42214.71,226.16678,-0.030785,12.99,50.07,38.71,...,False,False,False,False,False,False,False,False,False,False
2,2022-03-20 00:10:00,42214.70,42296.26,42159.30,42271.80,198.15169,0.135237,57.10,24.46,55.40,...,False,False,False,False,False,False,False,False,False,False
3,2022-03-20 00:15:00,42271.79,42281.57,42210.00,42248.00,164.42392,-0.056302,23.79,9.78,38.00,...,False,False,False,False,False,False,False,False,False,False
4,2022-03-20 00:20:00,42247.99,42248.06,42179.85,42179.86,63.64370,-0.161286,68.13,0.07,0.01,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
336651,2025-05-31 23:35:00,104660.65,104685.87,104635.30,104658.79,14.59027,-0.001777,1.86,25.22,23.49,...,False,False,False,False,False,False,False,False,False,False
336652,2025-05-31 23:40:00,104658.80,104668.48,104619.04,104649.82,15.24798,-0.008571,8.98,9.68,30.78,...,False,False,False,False,False,False,False,False,False,False
336653,2025-05-31 23:45:00,104649.82,104654.77,104608.51,104654.76,22.17350,0.004721,4.94,0.01,41.31,...,False,False,False,False,False,False,False,False,False,False
336654,2025-05-31 23:50:00,104654.77,104654.77,104615.67,104615.68,7.40238,-0.037342,39.09,0.00,0.01,...,False,False,False,False,False,False,False,False,False,False


In [6]:
final_dataframe

Unnamed: 0,Close time,Open,High,Low,Close,Volume,close_change_pct_5m,body_size_5m,upper_wick_size_5m,lower_wick_size_5m,...,BullishDojiStar_1d,BearishDojiStar_1d,BullishTasukiGap_1d,BearishTasukiGap_1d,BullishXSideGap3Methods_1d,BearishXSideGap3Methods_1d,BullishSpinningTop_1d,BearishSpinningTop_1d,BullishRise3Methods_1d,BearishFall3Methods_1d
0,2022-03-20 00:00:00,42201.13,42246.28,42172.15,42227.71,103.34117,0.062984,26.58,18.57,28.98,...,False,False,False,False,False,False,False,False,False,False
1,2022-03-20 00:05:00,42227.70,42277.77,42176.00,42214.71,226.16678,-0.030785,12.99,50.07,38.71,...,False,False,False,False,False,False,False,False,False,False
2,2022-03-20 00:10:00,42214.70,42296.26,42159.30,42271.80,198.15169,0.135237,57.10,24.46,55.40,...,False,False,False,False,False,False,False,False,False,False
3,2022-03-20 00:15:00,42271.79,42281.57,42210.00,42248.00,164.42392,-0.056302,23.79,9.78,38.00,...,False,False,False,False,False,False,False,False,False,False
4,2022-03-20 00:20:00,42247.99,42248.06,42179.85,42179.86,63.64370,-0.161286,68.13,0.07,0.01,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
336651,2025-05-31 23:35:00,104660.65,104685.87,104635.30,104658.79,14.59027,-0.001777,1.86,25.22,23.49,...,False,False,False,False,False,False,False,False,False,False
336652,2025-05-31 23:40:00,104658.80,104668.48,104619.04,104649.82,15.24798,-0.008571,8.98,9.68,30.78,...,False,False,False,False,False,False,False,False,False,False
336653,2025-05-31 23:45:00,104649.82,104654.77,104608.51,104654.76,22.17350,0.004721,4.94,0.01,41.31,...,False,False,False,False,False,False,False,False,False,False
336654,2025-05-31 23:50:00,104654.77,104654.77,104615.67,104615.68,7.40238,-0.037342,39.09,0.00,0.01,...,False,False,False,False,False,False,False,False,False,False
