In [1]:
import torch
import os
import shutil
import pandas as pd
import numpy as np
import talib

# --- 제공해주신 코드의 클래스들을 임포트합니다 ---

# 모델 관련
from models.model import (GATree, GATreePop, FEATURE_NUM, FEATURE_COMPARISON_MAP, ALL_FEATURES,
                   COL_NODE_TYPE, NODE_TYPE_UNUSED)

# 변이 연산자 관련
from evolution.Mutation.base import BaseMutation
from evolution.Mutation.chain import ChainMutation
from evolution.Mutation.node_param import NodeParamMutation
from evolution.Mutation.reinitialize_node import ReinitializeNodeMutation
from evolution.Mutation.add_node import AddNodeMutation
from evolution.Mutation.delete_node import DeleteNodeMutation
from evolution.Mutation.add_subtree import AddSubtreeMutation
from evolution.Mutation.delete_subtree import DeleteSubtreeMutation

# import data.data_download as data_dd
# import data.feature_generator as data_fe
# import data.candlestick_patterns as data_cp

import data.ta_lib_feature_generator as talib_feat

In [2]:
# df = data_dd.fetch_historical_data('BTCUSDT', '1m', 14)

df = pd.read_csv('dataset/btc_1m_t.csv')
df['Close time'] = pd.to_datetime(df['Close time'])
df = df[['Open', 'High', 'Low', 'Close', 'Volume', 'Close time']]

In [None]:
# # ==============================================================================
# # Feature Generation
# # ==============================================================================
# print("Generating technical analysis features...")

# # 1. 추세 (Trend) 지표
# print("- Calculating Trend Indicators...")
# df, price_col = talib_feat.calculate_price_change_features(df)
# df, ma_col = talib_feat.calculate_ma(df, window=14)
# df, ema_col = talib_feat.calculate_ema(df, window=24)
# df, vwma_col = talib_feat.calculate_vwma(df, window=5) # TA-Lib 미지원
# df, macd_cols = talib_feat.calculate_macd(df, short_window=12, long_window=26, signal_window=9)
# df, adx_cols = talib_feat.calculate_adx(df, window=14)
# df, ichimoku_cols = talib_feat.calculate_ichimoku(df) # TA-Lib 미지원
# # --- 추가된 추세 지표 ---
# df, _ = talib_feat.calculate_dema(df, window=30)
# df, _ = talib_feat.calculate_tema(df, window=30)
# df, _ = talib_feat.calculate_sar(df, acceleration=0.02, maximum=0.2)
# df, _ = talib_feat.calculate_trima(df, window=30)

# # 2. 모멘텀 (Momentum) 지표
# print("- Calculating Momentum Indicators...")
# df, rsi_col = talib_feat.calculate_rsi(df, window=14)
# df, stoch_cols = talib_feat.calculate_stochastic_oscillator(df, k_window=14, d_window=3)
# df, williams_r_col = talib_feat.calculate_williams_r(df, window=14)
# df, cci_col = talib_feat.calculate_cci(df, window=20)
# # --- 추가된 모멘텀 지표 ---
# df, _ = talib_feat.calculate_stochrsi(df, window=14, k_window=5, d_window=3)
# df, _ = talib_feat.calculate_ppo(df, fast_window=12, slow_window=26)
# df, _ = talib_feat.calculate_roc(df, window=10)
# df, _ = talib_feat.calculate_ultosc(df, window1=7, window2=14, window3=28)
# df, _ = talib_feat.calculate_mom(df, window=10)


# # 3. 변동성 (Volatility) 지표
# print("- Calculating Volatility Indicators...")
# df, bb_cols = talib_feat.calculate_bollinger_bands(df, window=20, num_std=2)
# df, atr_col = talib_feat.calculate_atr(df, window=14)
# # --- 추가된 변동성 지표 ---
# df, _ = talib_feat.calculate_natr(df, window=14)


# # 4. 거래량 (Volume) 지표
# print("- Calculating Volume Indicators...")
# df, obv_col = talib_feat.calculate_obv(df)
# df, cmf_col = talib_feat.calculate_cmf(df, window=20) # TA-Lib 미지원
# # --- 추가된 거래량 지표 ---
# df, _ = talib_feat.calculate_ad(df)
# df, _ = talib_feat.calculate_adosc(df, fast_window=3, slow_window=10)


# # 5. 주기 (Cycle) 지표
# print("- Calculating Cycle Indicators...")
# # --- 추가된 주기 지표 ---
# df, _ = talib_feat.calculate_ht_dcperiod(df)
# df, _ = talib_feat.calculate_ht_trendmode(df)
# df, _ = talib_feat.calculate_ht_sine(df)


# # 6. 지지 (Support) 및 저항 (Resistance) 지표
# print("- Calculating Support & Resistance Indicators...")
# df, sr_cols = talib_feat.calculate_support_resistance(df, window=14)


# # 7. 캔들스틱 패턴 (함수 내부에 신규 패턴이 이미 추가됨)
# print("- Calculating Candlestick Patterns...")
# df, candle_cols = talib_feat.calculate_all_candlestick_patterns(df)


# # ==============================================================================
# # 최종 데이터 처리
# # ==============================================================================
# print("\nFinalizing data...")
# # 결측치 처리 (TA-Lib 계산 시 초반 데이터는 NaN이 됨)
# initial_rows = len(df)
# df.dropna(inplace=True)
# df.reset_index(drop=True, inplace=True)
# final_rows = len(df)

# print(f"Feature generation complete. Dropped {initial_rows - final_rows} rows due to NaN values.")
# print(f"Final DataFrame shape: {df.shape}")

In [9]:
import pandas as pd
import numpy as np
# 제공된 피처 생성기 코드를 'talib_feat.py'로 저장했다고 가정하고 임포트합니다.
# 실제 환경에서는 파일명과 경로에 맞게 수정해야 합니다.
# import ta_lib_feature_generator as talib_feat

def generate_multi_timeframe_features(
    df: pd.DataFrame,
    timestamp_col: str,
    target_timeframes: list,
    feature_params: dict
) -> (pd.DataFrame, list):
    """
    서로 다른 시간 단위의 기술적 분석 지표를 생성하고 기준 데이터프레임에 통합합니다.
    (이전 답변과 동일한 함수 코드)
    """
    # --- 0. 입력 유효성 검사 및 초기 설정 ---
    if not target_timeframes:
        raise ValueError("`target_timeframes` 리스트는 비어 있을 수 없습니다.")
        
    if timestamp_col not in df.columns:
        raise ValueError(f"'{timestamp_col}' 컬럼이 데이터프레임에 존재하지 않습니다.")

    source_df = df.copy()
    
    source_df[timestamp_col] = pd.to_datetime(source_df[timestamp_col])
    source_df.set_index(timestamp_col, inplace=True)
    
    all_new_columns = []

    # --- 1. 기준 시간 단위 결정 및 기준 데이터프레임 생성 ---
    time_deltas = [pd.to_timedelta(tf.replace('m', 'T')) for tf in target_timeframes]
    base_timeframe = target_timeframes[np.argmin(time_deltas)]
    
    print(f"기준 시간 단위가 '{base_timeframe}'으로 설정되었습니다.")

    agg_rules = {
        'Open': 'first',
        'High': 'max',
        'Low': 'min',
        'Close': 'last',
        'Volume': 'sum'
    }
    base_df = source_df.resample(base_timeframe.replace('m', 'T')).agg(agg_rules)
    base_df.dropna(inplace=True)

    # --- 2. 시간 단위별 피처 계산 및 통합 ---
    features_by_timeframe = {}

    for timeframe in sorted(list(set(target_timeframes)), key=lambda x: pd.to_timedelta(x.replace('m', 'T'))):
        if timeframe not in feature_params:
            print(f"'{timeframe}'에 대한 피처 설정이 없어 건너뜁니다.")
            continue
            
        print(f"--- '{timeframe}' 시간 단위 피처 계산 시작 ---")

        df_resampled = source_df.resample(timeframe.replace('m', 'T')).agg(agg_rules).dropna()
        
        for func_name, params_list in feature_params[timeframe].items():
            try:
                feature_function = getattr(talib_feat, func_name)
            except AttributeError:
                print(f"경고: '{func_name}' 함수를 찾을 수 없습니다. 건너뜁니다.")
                continue

            for params in params_list:
                df_with_feature, new_cols = feature_function(df_resampled.copy(), **params)
                
                rename_dict = {col: f"{col}_{timeframe}" for col in new_cols}
                df_with_feature.rename(columns=rename_dict, inplace=True)
                
                renamed_cols = list(rename_dict.values())
                
                if timeframe not in features_by_timeframe:
                    features_by_timeframe[timeframe] = df_with_feature[renamed_cols]
                else:
                    features_by_timeframe[timeframe] = features_by_timeframe[timeframe].join(df_with_feature[renamed_cols])

                all_new_columns.extend(renamed_cols)
                print(f"  - {func_name} ({params}): {len(renamed_cols)}개 컬럼 생성 완료")

    # --- 3. 피처 병합 ---
    print("\n--- 모든 피처를 기준 데이터프레임에 병합합니다 ---")
    final_df = base_df.copy()

    sorted_timeframes = sorted(features_by_timeframe.keys(), key=lambda x: pd.to_timedelta(x.replace('m', 'T')))

    for timeframe in sorted_timeframes:
        feature_df = features_by_timeframe[timeframe]
        if timeframe == base_timeframe:
            final_df = final_df.join(feature_df)
            print(f"'{timeframe}' 피처가 직접 통합되었습니다.")
        else:
            final_df = pd.merge_asof(
                left=final_df,
                right=feature_df,
                left_index=True,
                right_index=True,
                direction='backward'
            )
            print(f"'{timeframe}' 피처가 'merge_asof'로 통합되었습니다.")

    final_df.dropna(inplace=True)
    final_df.reset_index(inplace=True)

    print("\n최종 피처 생성 및 통합이 완료되었습니다.")
    return final_df, sorted(list(set(all_new_columns)))


# ==============================================================================
#                      복잡하고 풍부한 함수 실행 예시
# ==============================================================================
if __name__ == '__main__':
    # 1. 가상 데이터 생성 (1분봉, 15일치 데이터)
    print("1. 가상 1분봉 데이터 생성...")
    time_index = pd.to_datetime(pd.date_range(start='2024-07-15 00:00', periods=15*24*60*200, freq='1T'))
    data_size = len(time_index)
    data = {
        'Timestamp': time_index,
        'Open': np.random.uniform(-0.5, 0.5, data_size).cumsum() + 2000,
        'Close': np.random.uniform(-0.5, 0.5, data_size).cumsum() + 2000,
        'Volume': np.random.uniform(10, 100, data_size)
    }
    data['High'] = np.maximum(data['Open'], data['Close']) + np.random.uniform(0, 2, data_size)
    data['Low'] = np.minimum(data['Open'], data['Close']) - np.random.uniform(0, 2, data_size)
    
    source_df = pd.DataFrame(data)
    print(f"생성된 원본 데이터 Shape: {source_df.shape}")

    # 2. 복잡한 피처 생성 규칙 정의
    feature_generation_params = {
        # -- 단기(5분봉) 지표: 빠른 반응성 지표들 --
        '5m': {
            'calculate_ema': [{'window': 12}],
            'calculate_rsi': [{'window': 14}],
            'calculate_stochastic_oscillator': [{'k_window': 14, 'd_window': 3}],
        },
        # -- 중단기(15분봉) 지표 --
        '15m': {
            'calculate_ma': [{'window': 20}, {'window': 50}], # 두 개의 다른 SMA 계산
            'calculate_cci': [{'window': 20}],
        },
        # -- 중기(1시간봉) 지표: 추세 및 변동성 중심 --
        '1h': {
            'calculate_macd': [{'short_window': 12, 'long_window': 26, 'signal_window': 9}],
            'calculate_bollinger_bands': [{'window': 20, 'num_std': 2}],
            'calculate_adx': [{'window': 14}],
        },
        # -- 장기(4시간봉) 지표: 긴 호흡의 추세 및 거래량 --
        '4h': {
            'calculate_ma': [{'window': 120}], # 긴 기간의 SMA
            'calculate_atr': [{'window': 14}],
            'calculate_obv': [{}], # 인자가 없는 함수는 빈 dict 전달
        },
        # -- 초장기(일봉) 지표: 지지/저항 및 종합 패턴 --
        '1d': {
            'calculate_support_resistance': [{'window': 14}],
            'calculate_all_candlestick_patterns': [{}], # 모든 캔들 패턴 생성
        }
    }

    # 3. Multi-Timeframe 피처 생성 함수 호출
    print("\n2. Multi-Timeframe 피처 생성 시작...")
    # target_timeframes_list = ['5m', '15m', '1h', '4h', '1d']
    target_timeframes_list = ['5m', '15m', '1h']
    final_dataframe, added_cols = generate_multi_timeframe_features(
        df=df,
        timestamp_col='Close time',
        target_timeframes=target_timeframes_list,
        feature_params=feature_generation_params
    )
    
    # 4. 결과 확인
    print("\n3. 최종 결과 확인...")
    print(f"최종 데이터프레임 Shape: {final_dataframe.shape}")
    print(f"총 {len(added_cols)}개의 피처가 추가되었습니다.")
    # 추가된 컬럼 중 일부만 출력
    print("추가된 컬럼 목록 (일부):", added_cols[:5], "...", added_cols[-5:])
    
    # 각 시간 단위별 대표 피처들을 선정하여 병합 결과 확인
    print("\n각 시간 단위별 대표 피처 병합 결과 샘플 (마지막 15개 행):")
    display_cols = [
        'Timestamp',
        'Close',
        'RSI_14_5m',                   # 5분봉 대표
        'SMA_50_15m',                  # 15분봉 대표
        'MACD_12_26_9_1h',             # 1시간봉 대표
        # 'ATR_14_4h',                   # 4시간봉 대표
        # 'Support_MA_14_1d',            # 일봉 대표
        # 'Hammers_1d'                   # 일봉 캔들 패턴 대표
    ]
    # display_cols에 있는 컬럼만 필터링
    display_cols_exist = [col for col in display_cols if col in final_dataframe.columns]
    
    # 소수점 2자리까지만 표시하도록 설정
    pd.set_option('display.float_format', lambda x: '%.2f' % x)
    
    print(final_dataframe[display_cols_exist].tail(15).to_string())

1. 가상 1분봉 데이터 생성...


  time_index = pd.to_datetime(pd.date_range(start='2024-07-15 00:00', periods=15*24*60*200, freq='1T'))


생성된 원본 데이터 Shape: (4320000, 6)

2. Multi-Timeframe 피처 생성 시작...
기준 시간 단위가 '5m'으로 설정되었습니다.
--- '5m' 시간 단위 피처 계산 시작 ---
  - calculate_ema ({'window': 12}): 1개 컬럼 생성 완료
  - calculate_rsi ({'window': 14}): 1개 컬럼 생성 완료
  - calculate_stochastic_oscillator ({'k_window': 14, 'd_window': 3}): 2개 컬럼 생성 완료
--- '15m' 시간 단위 피처 계산 시작 ---
  - calculate_ma ({'window': 20}): 1개 컬럼 생성 완료
  - calculate_ma ({'window': 50}): 1개 컬럼 생성 완료
  - calculate_cci ({'window': 20}): 1개 컬럼 생성 완료
--- '1h' 시간 단위 피처 계산 시작 ---
  - calculate_macd ({'short_window': 12, 'long_window': 26, 'signal_window': 9}): 3개 컬럼 생성 완료
  - calculate_bollinger_bands ({'window': 20, 'num_std': 2}): 4개 컬럼 생성 완료
  - calculate_adx ({'window': 14}): 3개 컬럼 생성 완료

--- 모든 피처를 기준 데이터프레임에 병합합니다 ---
'5m' 피처가 직접 통합되었습니다.
'15m' 피처가 'merge_asof'로 통합되었습니다.
'1h' 피처가 'merge_asof'로 통합되었습니다.

최종 피처 생성 및 통합이 완료되었습니다.

3. 최종 결과 확인...
최종 데이터프레임 Shape: (3685, 23)
총 17개의 피처가 추가되었습니다.
추가된 컬럼 목록 (일부): ['%D_14_3_5m', '%K_14_3_5m', 'ADX_14_1h', 'BB_Lower_20_2_1h', 'BB

  time_deltas = [pd.to_timedelta(tf.replace('m', 'T')) for tf in target_timeframes]
  base_df = source_df.resample(base_timeframe.replace('m', 'T')).agg(agg_rules)
  for timeframe in sorted(list(set(target_timeframes)), key=lambda x: pd.to_timedelta(x.replace('m', 'T'))):
  df_resampled = source_df.resample(timeframe.replace('m', 'T')).agg(agg_rules).dropna()
  df_resampled = source_df.resample(timeframe.replace('m', 'T')).agg(agg_rules).dropna()
  sorted_timeframes = sorted(features_by_timeframe.keys(), key=lambda x: pd.to_timedelta(x.replace('m', 'T')))


In [10]:
added_cols

['%D_14_3_5m',
 '%K_14_3_5m',
 'ADX_14_1h',
 'BB_Lower_20_2_1h',
 'BB_Mid_20_2_1h',
 'BB_Upper_20_2_1h',
 'BB_Width_20_2_1h',
 'CCI_20_15m',
 'DI_minus_14_1h',
 'DI_plus_14_1h',
 'EMA_12_5m',
 'MACD_12_26_1h',
 'MACD_Hist_12_26_9_1h',
 'MACD_Signal_12_26_9_1h',
 'RSI_14_5m',
 'SMA_20_15m',
 'SMA_50_15m']

In [11]:
final_dataframe

Unnamed: 0,Close time,Open,High,Low,Close,Volume,EMA_12_5m,RSI_14_5m,%K_14_3_5m,%D_14_3_5m,...,MACD_12_26_1h,MACD_Signal_12_26_9_1h,MACD_Hist_12_26_9_1h,BB_Upper_20_2_1h,BB_Mid_20_2_1h,BB_Lower_20_2_1h,BB_Width_20_2_1h,ADX_14_1h,DI_plus_14_1h,DI_minus_14_1h
0,2025-07-11 09:00:00,117964.75,117986.78,117909.26,117926.29,59.61,117924.39,54.26,72.98,75.52,...,1923.46,1828.03,95.42,119833.06,115462.37,111091.69,0.08,63.07,44.81,6.07
1,2025-07-11 09:05:00,117926.30,118000.00,117883.74,117987.98,68.20,117934.17,57.05,72.28,73.71,...,1923.46,1828.03,95.42,119833.06,115462.37,111091.69,0.08,63.07,44.81,6.07
2,2025-07-11 09:10:00,117987.99,118033.16,117987.98,118033.16,111.12,117949.40,59.02,74.51,73.26,...,1923.46,1828.03,95.42,119833.06,115462.37,111091.69,0.08,63.07,44.81,6.07
3,2025-07-11 09:15:00,118033.16,118226.54,118033.15,118134.46,97.98,117977.87,63.11,79.88,75.56,...,1923.46,1828.03,95.42,119833.06,115462.37,111091.69,0.08,63.07,44.81,6.07
4,2025-07-11 09:20:00,118134.45,118232.32,118134.45,118173.46,65.55,118007.96,64.57,83.73,79.37,...,1923.46,1828.03,95.42,119833.06,115462.37,111091.69,0.08,63.07,44.81,6.07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3680,2025-07-24 03:40:00,118965.37,119039.64,118933.09,118933.10,40.07,119022.92,47.39,36.21,64.20,...,57.58,-21.00,78.58,119208.87,118356.81,117504.74,0.01,20.24,17.11,23.14
3681,2025-07-24 03:45:00,118933.09,118933.09,118680.98,118680.98,68.85,118970.31,37.35,12.59,38.32,...,57.58,-21.00,78.58,119208.87,118356.81,117504.74,0.01,20.24,17.11,23.14
3682,2025-07-24 03:50:00,118680.98,118709.97,118622.70,118622.70,30.50,118916.83,35.47,3.39,17.40,...,57.58,-21.00,78.58,119208.87,118356.81,117504.74,0.01,20.24,17.11,23.14
3683,2025-07-24 03:55:00,118622.71,118622.71,118466.71,118488.92,133.50,118851.00,31.56,0.92,5.63,...,57.58,-21.00,78.58,119208.87,118356.81,117504.74,0.01,20.24,17.11,23.14


In [32]:
df

Unnamed: 0,Open,High,Low,Close,Volume,Close time,close_change_pct,body_size,upper_wick_size,lower_wick_size,...,BullishDojiStar,BearishDojiStar,BullishTasukiGap,BearishTasukiGap,BullishXSideGap3Methods,BearishXSideGap3Methods,BullishSpinningTop,BearishSpinningTop,BullishRise3Methods,BearishFall3Methods
0,111102.16,111102.16,111084.25,111084.36,3.31,2025-07-10 01:27:59.999,-0.02,17.80,0.00,0.11,...,False,False,False,False,False,False,False,False,False,False
1,111084.36,111084.37,111084.26,111084.26,1.90,2025-07-10 01:28:59.999,-0.00,0.10,0.01,0.00,...,False,False,False,False,False,False,False,False,False,False
2,111084.26,111096.05,111084.26,111096.04,3.83,2025-07-10 01:29:59.999,0.01,11.78,0.01,0.00,...,False,False,False,False,False,False,False,False,False,False
3,111096.04,111131.12,111096.04,111111.63,6.43,2025-07-10 01:30:59.999,0.01,15.59,19.49,0.00,...,False,False,False,False,False,False,False,False,False,False
4,111111.63,111122.00,111106.29,111122.00,3.38,2025-07-10 01:31:59.999,0.01,10.37,0.00,5.34,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20285,119128.04,119152.37,119088.00,119152.37,25.59,2025-07-24 03:32:59.999,0.02,24.33,0.00,40.04,...,False,False,False,False,False,False,False,False,False,False
20286,119152.37,119207.26,119152.36,119167.33,19.08,2025-07-24 03:33:59.999,0.01,14.96,39.93,0.01,...,False,False,False,False,False,False,False,False,False,False
20287,119167.33,119167.33,119124.51,119137.87,13.81,2025-07-24 03:34:59.999,-0.02,29.46,0.00,13.36,...,False,False,False,False,False,False,False,False,False,False
20288,119137.88,119137.88,119064.23,119064.23,13.58,2025-07-24 03:35:59.999,-0.06,73.65,0.00,0.00,...,False,False,False,False,False,False,False,False,False,False
