In [1]:
# ============================================================================
# 기본 라이브러리
# ============================================================================
import os
import warnings
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
from collections import Counter

# ============================================================================
# 데이터 전처리 및 Feature Engineering
# ============================================================================
import pandas_ta as ta
from sklearn.preprocessing import RobustScaler, StandardScaler
from sklearn.feature_selection import (
    SelectKBest, RFE, 
    mutual_info_classif, mutual_info_regression
)

# ============================================================================
# 시계열 분석
# ============================================================================
from statsmodels.tsa.stattools import grangercausalitytests
from statsmodels.tsa.vector_ar.var_model import VAR

# ============================================================================
# Scikit-learn ML 모델
# ============================================================================
# 선형 모델
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier

# 트리 기반 모델
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.svm import SVC, SVR

# 앙상블 모델
from sklearn.ensemble import (
    RandomForestClassifier, RandomForestRegressor,
    AdaBoostClassifier,
    ExtraTreesClassifier, ExtraTreesRegressor,
    BaggingClassifier, BaggingRegressor,
    GradientBoostingClassifier, GradientBoostingRegressor,
    StackingClassifier, StackingRegressor,
    VotingClassifier, VotingRegressor
)

# ============================================================================
# Gradient Boosting 라이브러리
# ============================================================================
from lightgbm import LGBMClassifier, LGBMRegressor, early_stopping
from xgboost import XGBClassifier, XGBRegressor
from catboost import CatBoostClassifier, CatBoostRegressor

# ============================================================================
# TabNet 
# ============================================================================
try:
    from pytorch_tabnet.tab_model import TabNetClassifier, TabNetRegressor
    TABNET_AVAILABLE = True
except ImportError:
    TABNET_AVAILABLE = False
    print("Warning: pytorch-tabnet not installed. TabNet models will be skipped.")

# ============================================================================
# PyTorch (Optional)
# ============================================================================
try:
    import torch
    import torch.nn as nn
    PYTORCH_AVAILABLE = True
except ImportError:
    PYTORCH_AVAILABLE = False
    print("Warning: PyTorch not installed. Some models may not work.")

# ============================================================================
# Scikit-learn 평가 지표
# ============================================================================
# 분류 지표
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
)

# 회귀 지표
from sklearn.metrics import (
    mean_squared_error, mean_absolute_error, r2_score, 
    mean_absolute_percentage_error
)

# ============================================================================
# TensorFlow/Keras 딥러닝
# ============================================================================
import tensorflow as tf
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import (
    # 기본 레이어
    Input, Dense, Flatten, Dropout, 
    
    # RNN 레이어
    LSTM, GRU, SimpleRNN, Bidirectional,
    
    # CNN 레이어
    Conv1D, MaxPooling1D, AveragePooling1D,
    GlobalAveragePooling1D, GlobalMaxPooling1D,
    
    # 정규화 레이어
    BatchNormalization, LayerNormalization,
    
    # Attention 레이어
    Attention, MultiHeadAttention,
    
    # 유틸리티 레이어
    Concatenate, Add, Multiply, Lambda,
    Reshape, Permute, RepeatVector, TimeDistributed,
    Activation
)
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam


warnings.filterwarnings('ignore')
warnings.filterwarnings('ignore')



# ============================================================================ 
# 1. 날짜 파싱 및 CSV 로드 함수
# ============================================================================ 
def standardize_date_column(df,file_name):
    """날짜 컬럼 자동 탐지 + datetime 통일 + tz 제거 + 시각 제거"""

    date_cols = [col for col in df.columns if 'date' in col.lower()]
    if not date_cols:
        print("[Warning] 날짜 컬럼을 찾을 수 없습니다.")
        return df
    date_col = date_cols[0]
    

    if date_col != 'date':
        df.rename(columns={date_col: 'date'}, inplace=True)
    

    if file_name == 'eth_onchain.csv':
        df['date'] = pd.to_datetime(df['date'], format='%y-%m-%d', errors='coerce')
    else:
        df['date'] = pd.to_datetime(df['date'], errors='coerce', infer_datetime_format=True)
    
    #print(df.shape)
    df = df.dropna(subset=['date'])
    #print(df.shape)
    df['date'] = df['date'].dt.normalize()  
    if pd.api.types.is_datetime64tz_dtype(df['date']):
        df['date'] = df['date'].dt.tz_convert(None)
    else:
        df['date'] = df['date'].dt.tz_localize(None)
    #print(df.shape)
    return df

def load_and_standardize_data(filepath):

    df = pd.read_csv(filepath)
    df = standardize_date_column(df,filepath)
    return df
# ============================================================================ 
# 2. 데이터 로딩
# ============================================================================ 
DATA_DIR = './macro_data'

def load_from_macro_data(filename):
    return load_and_standardize_data(os.path.join(DATA_DIR, filename))

macro_df = load_from_macro_data('macro_crypto_data.csv')
news_df = load_from_macro_data('news_data.csv')
eth_onchain_df = load_from_macro_data('eth_onchain.csv')
fear_greed_df = load_from_macro_data('fear_greed.csv')
usdt_eth_mcap_df = load_from_macro_data('usdt_eth_mcap.csv')
aave_tvl_df = load_from_macro_data('aave_eth_tvl.csv')
lido_tvl_df = load_from_macro_data('lido_eth_tvl.csv')
makerdao_tvl_df = load_from_macro_data('makerdao_eth_tvl.csv')
eth_chain_tvl_df = load_from_macro_data('eth_chain_tvl.csv')
eth_funding_df = load_from_macro_data('eth_funding_rate.csv')
sp500_df = load_from_macro_data('SP500.csv')
vix_df = load_from_macro_data('VIX.csv')
gold_df = load_from_macro_data('GOLD.csv')
dxy_df = load_from_macro_data('DXY.csv')

# ============================================================================ 
# 3. 기준 날짜 설정 (Lido TVL 시작일 기준)
# ============================================================================ 
train_start_date = pd.to_datetime('2020-12-19')
lookback_start_date = train_start_date - timedelta(days=200)
end_date= pd.to_datetime('2025-10-06')

# ============================================================================ 
# 4. 뉴스 감성 피처 생성 
# ============================================================================ 
def create_sentiment_features(news_df):
    """
    한국어 뉴스 감성 지표 생성
    출처: "Cryptocurrency Price Prediction Model Based on Sentiment Analysis" (2024)
    """
    
    sentiment_agg = news_df.groupby('date').agg(
        # ===== 기본 통계 =====
        sentiment_mean=('label', 'mean'),
        sentiment_std=('label', 'std'),
        news_count=('label', 'count'),
        positive_ratio=('label', lambda x: (x == 1).sum() / len(x)),
        negative_ratio=('label', lambda x: (x == -1).sum() / len(x)),
        
        # ===== 추가 지표 =====
        # 1. 극단 감성 카운트
        extreme_positive_count=('label', lambda x: (x == 1).sum()),
        extreme_negative_count=('label', lambda x: (x == -1).sum()),
        
        # 2. 총 감성 점수
        sentiment_sum=('label', 'sum'),
    ).reset_index()
    
    sentiment_agg = sentiment_agg.fillna(0)
    
    # ===== 파생 지표 계산 =====
    
    # 1. Sentiment Polarity 
    sentiment_agg['sentiment_polarity'] = (
        sentiment_agg['positive_ratio'] - sentiment_agg['negative_ratio']
    )
    
    # 2. Sentiment Intensity (감성 강도) 
    sentiment_agg['sentiment_intensity'] = (
        sentiment_agg['positive_ratio'] + sentiment_agg['negative_ratio']
    )
    
    # 3. Sentiment Disagreement 
    sentiment_agg['sentiment_disagreement'] = (
        sentiment_agg['positive_ratio'] * sentiment_agg['negative_ratio']
    )
    
    # 4. Bull/Bear Ratio 
    sentiment_agg['bull_bear_ratio'] = (
        sentiment_agg['positive_ratio'] / (sentiment_agg['negative_ratio'] + 1e-10)
    )
    
    # 5. Weighted Sentiment 
    sentiment_agg['weighted_sentiment'] = (
        sentiment_agg['sentiment_mean'] * np.log1p(sentiment_agg['news_count'])
    )
    
    # 6. Extremity Index 
    sentiment_agg['extremity_index'] = (
        (sentiment_agg['extreme_positive_count'] + sentiment_agg['extreme_negative_count']) / 
        (sentiment_agg['news_count'] + 1e-10)
    )
    
    # ===== 시계열 파생 지표 (이동 평균) =====
    
    for window in [3, 7, 14]:
        # 감성 이동 평균
        sentiment_agg[f'sentiment_ma{window}'] = (
            sentiment_agg['sentiment_mean'].rolling(window=window, min_periods=1).mean()
        )
        
        # 감성 변동성 (이동 표준편차)
        sentiment_agg[f'sentiment_volatility_{window}'] = (
            sentiment_agg['sentiment_mean'].rolling(window=window, min_periods=1).std()
        )
    
    # 7. Sentiment Trend 
    sentiment_agg['sentiment_trend'] = sentiment_agg['sentiment_mean'].diff()
    
    # 8. Sentiment Acceleration
    sentiment_agg['sentiment_acceleration'] = sentiment_agg['sentiment_trend'].diff()
    
    # 9. News Volume Change
    sentiment_agg['news_volume_change'] = sentiment_agg['news_count'].pct_change()
    
    # 10. News Volume MA 
    for window in [7, 14]:
        sentiment_agg[f'news_volume_ma{window}'] = (
            sentiment_agg['news_count'].rolling(window=window, min_periods=1).mean()
        )
    
    print(f"✓ 감성 지표 생성 완료: {sentiment_agg.shape[1] - 1}개 (date 제외)")
    sentiment_agg = sentiment_agg.fillna(0)
    
    return sentiment_agg


sentiment_features = create_sentiment_features(news_df)



# ============================================================================ 
# 5. 데이터 병합
# ============================================================================ 
def add_prefix(df, prefix):
    df.columns = [prefix + '_' + col if col != 'date' else col for col in df.columns]
    return df

eth_onchain_df = add_prefix(eth_onchain_df, 'eth')
fear_greed_df = add_prefix(fear_greed_df, 'fg')
usdt_eth_mcap_df = add_prefix(usdt_eth_mcap_df, 'usdt')
aave_tvl_df = add_prefix(aave_tvl_df, 'aave')
lido_tvl_df = add_prefix(lido_tvl_df, 'lido')
makerdao_tvl_df = add_prefix(makerdao_tvl_df, 'makerdao')
eth_chain_tvl_df = add_prefix(eth_chain_tvl_df, 'chain')
eth_funding_df = add_prefix(eth_funding_df, 'funding')
sp500_df = add_prefix(sp500_df, 'sp500')
vix_df = add_prefix(vix_df, 'vix')
gold_df = add_prefix(gold_df, 'gold')
dxy_df = add_prefix(dxy_df, 'dxy')

date_range = pd.date_range(start=lookback_start_date, end=end_date, freq='D')
df_merged = pd.DataFrame(date_range, columns=['date'])

dataframes_to_merge = [
    macro_df, sentiment_features, eth_onchain_df, fear_greed_df, usdt_eth_mcap_df,
    aave_tvl_df, lido_tvl_df, makerdao_tvl_df, eth_chain_tvl_df,
    eth_funding_df, sp500_df, vix_df, gold_df, dxy_df
]

# 1. 외부 데이터 Merge 후
for df_to_merge in dataframes_to_merge:
    df_merged = pd.merge(df_merged, df_to_merge, on='date', how='left')

# 2. 감성 지표 결측 처리 (0)
sentiment_cols = [col for col in df_merged.columns 
                 if any(x in col for x in ['sentiment', 'news', 'ext', 'bull_bear','positive','negative','extreme'])]

print(f"\n감성 지표 결측 처리:")
for col in sentiment_cols:
    missing_before = df_merged[col].isnull().sum()
    if missing_before > 0:
        df_merged[col] = df_merged[col].fillna(0)
        print(f"  {col}: {missing_before}개 → 0 (데이터 없음 = 중립)")

# 3. 외부 변수 FFill (bfill 절대 금지!)
external_cols = [col for col in df_merged.columns 
                if any(x in col for x in ['eth_', 'fg_', 'usdt_', 'aave_', 'lido_', 
                                         'makerdao_', 'chain_', 'funding_',
                                         'sp500_', 'vix_', 'gold_', 'dxy_'])]

print(f"\n외부 변수 FFill 처리:")
missing_before = df_merged[external_cols].isnull().sum().sum()
df_merged[external_cols] = df_merged[external_cols].fillna(method='ffill')
missing_after = df_merged[external_cols].isnull().sum().sum()
print(f"  {missing_before:,} → {missing_after:,}개 (FFill)")

# 4. Lookback 기간 제거
print(f"\nLookback 기간 제거:")
before = len(df_merged)
df_merged = df_merged[df_merged['date'] >= lookback_start_date].reset_index(drop=True)
print(f"  {before} → {len(df_merged)}행")

remaining_missing = df_merged[external_cols].isnull().sum().sum()
if remaining_missing > 0:
    print(f"\n초기 결측치 처리:")
    print(f"  남은 결측: {remaining_missing}개 → 0")
    df_merged[external_cols] = df_merged[external_cols].fillna(0)

# 6. Lookback 기간 동안 모두 NaN인 컬럼 제거
lookback_df = df_merged[df_merged['date'] < train_start_date]
cols_to_drop = [col for col in lookback_df.columns 
               if lookback_df[col].isnull().all() and col != 'date']

if cols_to_drop:
    print(f"\nLookback 기간 완전 결측 컬럼 제거:")
    print(f"  {cols_to_drop}")
    df_merged = df_merged.drop(columns=cols_to_drop)

print(f"\n✓ 최종 데이터: {df_merged.shape}")
print(f"  날짜: {df_merged['date'].min().date()} ~ {df_merged['date'].max().date()}")
print(f"  결측: {df_merged.isnull().sum().sum()}개")

2025-10-22 01:24:09.805784: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-10-22 01:24:09.805835: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-10-22 01:24:09.807025: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-10-22 01:24:09.814033: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


✓ 감성 지표 생성 완료: 25개 (date 제외)

감성 지표 결측 처리:
  sentiment_mean: 39개 → 0 (데이터 없음 = 중립)
  sentiment_std: 39개 → 0 (데이터 없음 = 중립)
  news_count: 39개 → 0 (데이터 없음 = 중립)
  positive_ratio: 39개 → 0 (데이터 없음 = 중립)
  negative_ratio: 39개 → 0 (데이터 없음 = 중립)
  extreme_positive_count: 39개 → 0 (데이터 없음 = 중립)
  extreme_negative_count: 39개 → 0 (데이터 없음 = 중립)
  sentiment_sum: 39개 → 0 (데이터 없음 = 중립)
  sentiment_polarity: 39개 → 0 (데이터 없음 = 중립)
  sentiment_intensity: 39개 → 0 (데이터 없음 = 중립)
  sentiment_disagreement: 39개 → 0 (데이터 없음 = 중립)
  bull_bear_ratio: 39개 → 0 (데이터 없음 = 중립)
  weighted_sentiment: 39개 → 0 (데이터 없음 = 중립)
  extremity_index: 39개 → 0 (데이터 없음 = 중립)
  sentiment_ma3: 39개 → 0 (데이터 없음 = 중립)
  sentiment_volatility_3: 39개 → 0 (데이터 없음 = 중립)
  sentiment_ma7: 39개 → 0 (데이터 없음 = 중립)
  sentiment_volatility_7: 39개 → 0 (데이터 없음 = 중립)
  sentiment_ma14: 39개 → 0 (데이터 없음 = 중립)
  sentiment_volatility_14: 39개 → 0 (데이터 없음 = 중립)
  sentiment_trend: 39개 → 0 (데이터 없음 = 중립)
  sentiment_acceleration: 39개 → 0 (데이터 없음 = 중립)
  news_volum

In [2]:
def add_indicator_to_df(df_ta, indicator):
    """pandas_ta 지표 결과를 DataFrame에 안전하게 추가"""
    if indicator is None:
        return

    if isinstance(indicator, pd.DataFrame) and not indicator.empty:
        for col in indicator.columns:
            df_ta[col] = indicator[col]
    elif isinstance(indicator, pd.Series) and not indicator.empty:
        colname = indicator.name if indicator.name else 'Unnamed'
        df_ta[colname] = indicator

def safe_add(df_ta, func, *args, **kwargs):
    """지표 생성 시 오류 방지를 위한 래퍼 함수"""
    try:
        result = func(*args, **kwargs)
        add_indicator_to_df(df_ta, result)
        return True
    except Exception as e:
        func_name = func.__name__ if hasattr(func, '__name__') else str(func)
        print(f"    ⚠ {func_name.upper()} 생성 실패: {str(e)[:50]}")
        return False

def calculate_technical_indicators(df):
    """
    출처: 
    - "CryptoPulse: Short-Term Cryptocurrency Forecasting" (2024)
    - "Enhancing Price Prediction in Cryptocurrency Using Transformer" (2024)
    - "Bitcoin Trend Prediction with Attention-Based Deep Learning" (2024)
    """
    #print("\n=== 기술적 지표 생성 중 ===")
    df = df.sort_values('date').reset_index(drop=True)
    df_ta = df.copy()

    close = df['ETH_Close']
    high = df.get('ETH_High', close)
    low = df.get('ETH_Low', close)
    volume = df.get('ETH_Volume', pd.Series(index=df.index, data=1))
    open_ = df.get('ETH_Open', close)

    try:
        # ===== [핵심] MOMENTUM INDICATORS =====
        
        # RSI (필수)
        df_ta['RSI_14'] = ta.rsi(close, length=14)
        df_ta['RSI_30'] = ta.rsi(close, length=30)
        df_ta['RSI_200'] = ta.rsi(close, length=200)  # 장기 RSI 추가
        
        # MACD (필수 - top feature importance)
        safe_add(df_ta, ta.macd, close, fast=12, slow=26, signal=9)
        
        # Stochastic Oscillator (%K, %D - 논문에서 핵심 지표)
        safe_add(df_ta, ta.stoch, high, low, close, k=14, d=3)
        safe_add(df_ta, ta.stoch, high, low, close, k=30, d=3)  # 30일 추가
        safe_add(df_ta, ta.stoch, high, low, close, k=200, d=3)  # 200일 추가
        
        # Williams %R
        df_ta['WILLR_14'] = ta.willr(high, low, close, length=14)
        
        # ROC (Rate of Change)
        df_ta['ROC_10'] = ta.roc(close, length=10)
        df_ta['ROC_20'] = ta.roc(close, length=20)
        
        # MOM (Momentum - 다양한 기간)
        df_ta['MOM_10'] = ta.mom(close, length=10)
        df_ta['MOM_30'] = ta.mom(close, length=30) 
        
        # CCI (Commodity Channel Index)
        df_ta['CCI_14'] = ta.cci(high, low, close, length=14)
        df_ta['CCI_20'] = ta.cci(high, low, close, length=20)
        df_ta['CCI_50'] = ta.cci(high, low, close, length=50)
        df_ta['CCI_SIGNAL'] = (df_ta['CCI_20'] > 100).astype(int)
      
        # TSI (True Strength Index)
        safe_add(df_ta, ta.tsi, close, fast=13, slow=25, signal=13)

        
        # =====  Ichimoku Cloud (암호화폐 트렌드 분석에 효과적) =====
        try:
            ichimoku = ta.ichimoku(high, low, close)
            if ichimoku is not None and isinstance(ichimoku, tuple):
                ichimoku_df = ichimoku[0]
                if ichimoku_df is not None:
                    for col in ichimoku_df.columns:
                        df_ta[col] = ichimoku_df[col]
        except Exception as e:
            print(f"    ⚠ ICHIMOKU 생성 실패")

        # ===== [핵심] OVERLAP INDICATORS =====
        
        # SMA (필수! - Golden/Death Cross)
        df_ta['SMA_10'] = ta.sma(close, length=10)
        df_ta['SMA_20'] = ta.sma(close, length=20)
        df_ta['SMA_50'] = ta.sma(close, length=50)
        df_ta['SMA_200'] = ta.sma(close, length=200)
        
        # EMA (필수!)
        df_ta['EMA_12'] = ta.ema(close, length=12)
        df_ta['EMA_26'] = ta.ema(close, length=26)
        df_ta['EMA_50'] = ta.ema(close, length=50)
        df_ta['EMA_200'] = ta.ema(close, length=200) 
        
        # TEMA (Triple EMA - 논문에서 high importance)
        df_ta['TEMA_10'] = ta.tema(close, length=10)
        df_ta['TEMA_30'] = ta.tema(close, length=30) 
        
        # WMA (Weighted Moving Average)
        df_ta['WMA_10'] = ta.wma(close, length=10)
        df_ta['WMA_20'] = ta.wma(close, length=20)  
        
        # HMA (Hull Moving Average)
        df_ta['HMA_9'] = ta.hma(close, length=9)
        
        # DEMA (Double EMA)
        df_ta['DEMA_10'] = ta.dema(close, length=10)
        
        
        # VWMA (Volume Weighted)
        df_ta['VWMA_20'] = ta.vwma(close, volume, length=20)
        
        # 가격 조합
        df_ta['HL2'] = ta.hl2(high, low)
        df_ta['HLC3'] = ta.hlc3(high, low, close)
        df_ta['OHLC4'] = ta.ohlc4(open_, high, low, close)

        # ===== [핵심] VOLATILITY INDICATORS =====
        
        # Bollinger Bands (필수 )
        safe_add(df_ta, ta.bbands, close, length=20, std=2)
        safe_add(df_ta, ta.bbands, close, length=50, std=2)  
        
        # ATR 
        df_ta['ATR_7'] = ta.atr(high, low, close, length=7)
        df_ta['ATR_14'] = ta.atr(high, low, close, length=14)
        df_ta['ATR_21'] = ta.atr(high, low, close, length=21) 
        
        # NATR (Normalized ATR)
        df_ta['NATR_14'] = ta.natr(high, low, close, length=14)
        
        # True Range
        try:
            tr = ta.true_range(high, low, close)
            if isinstance(tr, pd.Series) and not tr.empty:
                df_ta['TRUERANGE'] = tr
            elif isinstance(tr, pd.DataFrame) and not tr.empty:
                df_ta['TRUERANGE'] = tr.iloc[:, 0]
        except:
            pass
        
        # Keltner Channel
        safe_add(df_ta, ta.kc, high, low, close, length=20)
        
        # Donchian Channel 
        try:
            dc = ta.donchian(high, low, lower_length=20, upper_length=20)
            if dc is not None and isinstance(dc, pd.DataFrame) and not dc.empty:
                for col in dc.columns:
                    df_ta[col] = dc[col]
        except:
            pass
        
        atr_10 = ta.atr(high, low, close, length=10)
        hl2_calc = (high + low) / 2
        upper_band = hl2_calc + (3 * atr_10)
        lower_band = hl2_calc - (3 * atr_10)
        
        df_ta['SUPERTREND'] = 0
        for i in range(1, len(df_ta)):
            if close.iloc[i] > upper_band.iloc[i-1]:
                df_ta.loc[df_ta.index[i], 'SUPERTREND'] = 1
            elif close.iloc[i] < lower_band.iloc[i-1]:
                df_ta.loc[df_ta.index[i], 'SUPERTREND'] = -1
            else:
                df_ta.loc[df_ta.index[i], 'SUPERTREND'] = df_ta['SUPERTREND'].iloc[i-1]

        
        
        # ===== [핵심] VOLUME INDICATORS =====
        
        # OBV (필수)
        df_ta['OBV'] = ta.obv(close, volume)
        
        # AD (Accumulation/Distribution)
        df_ta['AD'] = ta.ad(high, low, close, volume)
        
        # ADOSC
        df_ta['ADOSC_3_10'] = ta.adosc(high, low, close, volume, fast=3, slow=10)
        
        # MFI (Money Flow Index)
        df_ta['MFI_14'] = ta.mfi(high, low, close, volume, length=14)
        
        # CMF (Chaikin Money Flow - 논문에서 중요 지표)
        df_ta['CMF_20'] = ta.cmf(high, low, close, volume, length=20)
        
        # EFI (Elder Force Index)
        df_ta['EFI_13'] = ta.efi(close, volume, length=13)
        
        # EOM (Ease of Movement)
        safe_add(df_ta, ta.eom, high, low, close, volume, length=14)
        
        # VWAP (Volume Weighted Average Price) 
        try:
            df_ta['VWAP'] = ta.vwap(high, low, close, volume)
        except:
            pass

        # ===== TREND INDICATORS =====
        
        # ADX 
        safe_add(df_ta, ta.adx, high, low, close, length=14)
        
        # Aroon 
        try:
            aroon = ta.aroon(high, low, length=25)
            if aroon is not None and isinstance(aroon, pd.DataFrame):
                for col in aroon.columns:
                    df_ta[col] = aroon[col]
        except:
            pass
        
        # PSAR
        try:
            psar = ta.psar(high, low, close)
            if psar is not None:
                if isinstance(psar, pd.DataFrame) and not psar.empty:
                    for col in psar.columns:
                        df_ta[col] = psar[col]
                elif isinstance(psar, pd.Series) and not psar.empty:
                    df_ta[psar.name] = psar
        except:
            pass
        
        # Vortex
        safe_add(df_ta, ta.vortex, high, low, close, length=14)
        
        # DPO (Detrended Price Oscillator)
        try:
            df_ta['DPO_20'] = ta.dpo(close, length=20)
        except:
            pass

        # ===== 파생 지표 =====
        
        # 가격 변화율 
        df_ta['PRICE_CHANGE'] = close.pct_change()
        df_ta['PRICE_CHANGE_2'] = close.pct_change(periods=2)
        df_ta['PRICE_CHANGE_5'] = close.pct_change(periods=5)
        df_ta['PRICE_CHANGE_10'] = close.pct_change(periods=10) 
        
        # 변동성 (Rolling Std)
        df_ta['VOLATILITY_5'] = close.pct_change().rolling(window=5).std()
        df_ta['VOLATILITY_10'] = close.pct_change().rolling(window=10).std()
        df_ta['VOLATILITY_20'] = close.pct_change().rolling(window=20).std()
        df_ta['VOLATILITY_30'] = close.pct_change().rolling(window=30).std() 
        
        # 모멘텀 (Price Ratio)
        df_ta['MOMENTUM_5'] = close / close.shift(5) - 1
        df_ta['MOMENTUM_10'] = close / close.shift(10) - 1
        df_ta['MOMENTUM_20'] = close / close.shift(20) - 1
        df_ta['MOMENTUM_30'] = close / close.shift(30) - 1  
        
        # 이동평균 대비 위치 
        df_ta['PRICE_VS_SMA10'] = close / df_ta['SMA_10'] - 1
        df_ta['PRICE_VS_SMA20'] = close / df_ta['SMA_20'] - 1
        df_ta['PRICE_VS_SMA50'] = close / df_ta['SMA_50'] - 1
        df_ta['PRICE_VS_SMA200'] = close / df_ta['SMA_200'] - 1
        df_ta['PRICE_VS_EMA12'] = close / df_ta['EMA_12'] - 1 
        df_ta['PRICE_VS_EMA26'] = close / df_ta['EMA_26'] - 1  
        
        # 크로스 신호 
        df_ta['SMA_CROSS_SIGNAL'] = (df_ta['SMA_10'] > df_ta['SMA_20']).astype(int)
        df_ta['SMA_GOLDEN_CROSS'] = (df_ta['SMA_50'] > df_ta['SMA_200']).astype(int) 
        df_ta['EMA_CROSS_SIGNAL'] = (df_ta['EMA_12'] > df_ta['EMA_26']).astype(int)
        
        # 거래량 지표
        df_ta['VOLUME_SMA_20'] = ta.sma(volume, length=20)
        df_ta['VOLUME_RATIO'] = volume / (df_ta['VOLUME_SMA_20'] + 1e-10)
        df_ta['VOLUME_CHANGE'] = volume.pct_change()
        df_ta['VOLUME_CHANGE_5'] = volume.pct_change(periods=5)  
        
        # Range 지표
        df_ta['HIGH_LOW_RANGE'] = (high - low) / (close + 1e-10)
        df_ta['HIGH_CLOSE_RANGE'] = np.abs(high - close.shift()) / (close + 1e-10)
        df_ta['CLOSE_LOW_RANGE'] = (close - low) / (close + 1e-10)
        
        # 일중 가격 위치 
        df_ta['INTRADAY_POSITION'] = (close - low) / ((high - low) + 1e-10)  
        
        # Linear Regression Slope
        try:
            df_ta['SLOPE_5'] = ta.linreg(close, length=5, slope=True)
            df_ta['SLOPE_10'] = ta.linreg(close, length=10, slope=True)
            df_ta['LINREG_14'] = ta.linreg(close, length=14)
        except:
            df_ta['SLOPE_5'] = close.rolling(window=5).apply(
                lambda x: np.polyfit(np.arange(len(x)), x, 1)[0] if len(x) == 5 else np.nan, raw=True
            )
            df_ta['SLOPE_10'] = close.rolling(window=10).apply(
                lambda x: np.polyfit(np.arange(len(x)), x, 1)[0] if len(x) == 10 else np.nan, raw=True
            )
        
        # Increasing/Decreasing 신호
        df_ta['INC_1'] = (close > close.shift(1)).astype(int)
        df_ta['DEC_1'] = (close < close.shift(1)).astype(int)
        df_ta['INC_3'] = (close > close.shift(3)).astype(int)
        df_ta['INC_5'] = (close > close.shift(5)).astype(int)  
        
        # BOP 
        df_ta['BOP'] = (close - open_) / ((high - low) + 1e-10)
        df_ta['BOP'] = df_ta['BOP'].fillna(0)
        
        # ===== 고급 파생 지표 =====
        
        # Bollinger Bands 관련 파생
        if 'BBL_20' in df_ta.columns and 'BBU_20' in df_ta.columns and 'BBM_20' in df_ta.columns:
            df_ta['BB_WIDTH'] = (df_ta['BBU_20'] - df_ta['BBL_20']) / (df_ta['BBM_20'] + 1e-8)
            df_ta['BB_POSITION'] = (close - df_ta['BBL_20']) / (df_ta['BBU_20'] - df_ta['BBL_20'] + 1e-8)
        else:
            print(f"    ⚠ Bollinger Bands 컬럼 미발견")
        
        # RSI 파생 (Overbought/Oversold)
        df_ta['RSI_OVERBOUGHT'] = (df_ta['RSI_14'] > 70).astype(int)
        df_ta['RSI_OVERSOLD'] = (df_ta['RSI_14'] < 30).astype(int)
        
        # MACD 히스토그램 변화율
        if 'MACDh_12_26_9' in df_ta.columns:
            df_ta['MACD_HIST_CHANGE'] = df_ta['MACDh_12_26_9'].diff()
        
        # Volume Profile (상대적 거래량 강도)
        df_ta['VOLUME_STRENGTH'] = volume / volume.rolling(window=50).mean()
        
        # Price Acceleration (2차 미분)
        df_ta['PRICE_ACCELERATION'] = close.pct_change().diff()
        
        # Gap (시가-전일종가)
        df_ta['GAP'] = (open_ - close.shift(1)) / (close.shift(1) + 1e-10)
        
        df_ta['ROLLING_MAX_20'] = close.rolling(window=20).max()
        df_ta['ROLLING_MIN_20'] = close.rolling(window=20).min()
        df_ta['DISTANCE_FROM_HIGH'] = (df_ta['ROLLING_MAX_20'] - close) / (df_ta['ROLLING_MAX_20'] + 1e-10)
        df_ta['DISTANCE_FROM_LOW'] = (close - df_ta['ROLLING_MIN_20']) / (close + 1e-10)

        # Realized Volatility 
        ret_squared = close.pct_change() ** 2
        df_ta['RV_5'] = ret_squared.rolling(5).sum()
        df_ta['RV_20'] = ret_squared.rolling(20).sum()
        df_ta['RV_RATIO'] = df_ta['RV_5'] / (df_ta['RV_20'] + 1e-10)
        
        # Fibonacci Pivots 
        high_20 = high.rolling(20).max()
        low_20 = low.rolling(20).min()
        diff = high_20 - low_20
        
        df_ta['FIB_0'] = high_20
        df_ta['FIB_236'] = high_20 - 0.236 * diff
        df_ta['FIB_382'] = high_20 - 0.382 * diff
        df_ta['FIB_500'] = high_20 - 0.500 * diff
        df_ta['FIB_618'] = high_20 - 0.618 * diff
        df_ta['FIB_1'] = low_20
        
        #Directional Change Events 
        df_ta['DC_EVENT'] = 0
        df_ta['DC_TYPE'] = 0
        
        threshold = 0.05
        last_extreme = close.iloc[0]
        last_type = 0
        
        for i in range(1, len(df_ta)):
            price = close.iloc[i]
            change = (price - last_extreme) / last_extreme
            
            if last_type <= 0 and change >= threshold:
                df_ta.loc[df_ta.index[i], 'DC_EVENT'] = 1
                df_ta.loc[df_ta.index[i], 'DC_TYPE'] = 1
                last_extreme = price
                last_type = 1
            elif last_type >= 0 and change <= -threshold:
                df_ta.loc[df_ta.index[i], 'DC_EVENT'] = 1
                df_ta.loc[df_ta.index[i], 'DC_TYPE'] = -1
                last_extreme = price
                last_type = -1
        
        
        added = df_ta.shape[1] - df.shape[1]

                
    except Exception as e:
        print(f"\n❌ Error: {e}")
        import traceback
        traceback.print_exc()

    return df_ta


def add_enhanced_cross_crypto_features(df):
    df_enhanced = df.copy()

    df_enhanced['eth_return'] = df['ETH_Close'].pct_change()
    df_enhanced['btc_return'] = df['BTC_Close'].pct_change()

    for lag in [1, 2, 3, 5, 10]:
        df_enhanced[f'btc_return_lag{lag}'] = df_enhanced['btc_return'].shift(lag)

    for window in [3, 7, 14, 30, 60]:
        df_enhanced[f'eth_btc_corr_{window}d'] = (
            df_enhanced['eth_return'].rolling(window).corr(df_enhanced['btc_return'])
        )

    eth_vol = df_enhanced['eth_return'].abs()
    btc_vol = df_enhanced['btc_return'].abs()

    for window in [7, 14, 30]:
        df_enhanced[f'eth_btc_volcorr_{window}d'] = eth_vol.rolling(window).corr(btc_vol)
        df_enhanced[f'eth_btc_volcorr_sq_{window}d'] = (
            (df_enhanced['eth_return']**2).rolling(window).corr(df_enhanced['btc_return']**2)
        )

    df_enhanced['btc_eth_strength_ratio'] = (
        df_enhanced['btc_return'] / (df_enhanced['eth_return'].abs() + 1e-8)
    )
    df_enhanced['btc_eth_strength_ratio_7d'] = (
        df_enhanced['btc_eth_strength_ratio'].rolling(7).mean()
    )

    alt_returns = []
    for coin in ['BNB', 'XRP', 'SOL', 'ADA']:
        if f'{coin}_Close' in df.columns:
            alt_returns.append(df[f'{coin}_Close'].pct_change())

    if alt_returns:
        market_return = pd.concat(
            alt_returns + [df_enhanced['eth_return'], df_enhanced['btc_return']], axis=1
        ).mean(axis=1)
        df_enhanced['btc_dominance'] = df_enhanced['btc_return'] / (market_return + 1e-8)

    for window in [30, 60, 90]:
        covariance = df_enhanced['eth_return'].rolling(window).cov(df_enhanced['btc_return'])
        btc_variance = df_enhanced['btc_return'].rolling(window).var()
        df_enhanced[f'eth_btc_beta_{window}d'] = covariance / (btc_variance + 1e-8)

    df_enhanced['eth_btc_spread'] = df_enhanced['eth_return'] - df_enhanced['btc_return']
    df_enhanced['eth_btc_spread_ma7'] = df_enhanced['eth_btc_spread'].rolling(7).mean()
    df_enhanced['eth_btc_spread_std7'] = df_enhanced['eth_btc_spread'].rolling(7).std()

    btc_vol_ma = btc_vol.rolling(30).mean()
    high_vol_mask = btc_vol > btc_vol_ma

    df_enhanced['eth_btc_corr_highvol'] = np.nan
    df_enhanced['eth_btc_corr_lowvol'] = np.nan

    for i in range(30, len(df_enhanced)):
        window_data = df_enhanced.iloc[i-30:i]
        high_vol_data = window_data[high_vol_mask.iloc[i-30:i]]
        low_vol_data = window_data[~high_vol_mask.iloc[i-30:i]]

        if len(high_vol_data) > 5:
            df_enhanced.loc[df_enhanced.index[i], 'eth_btc_corr_highvol'] = (
                high_vol_data['eth_return'].corr(high_vol_data['btc_return'])
            )
        if len(low_vol_data) > 5:
            df_enhanced.loc[df_enhanced.index[i], 'eth_btc_corr_lowvol'] = (
                low_vol_data['eth_return'].corr(low_vol_data['btc_return'])
            )

    return df_enhanced


def remove_raw_prices_and_transform(df):
    df_transformed = df.copy()

    if 'eth_log_return' not in df_transformed.columns:
        df_transformed['eth_log_return'] = np.log(df['ETH_Close'] / df['ETH_Close'].shift(1))
    if 'eth_intraday_range' not in df_transformed.columns:
        df_transformed['eth_intraday_range'] = (df['ETH_High'] - df['ETH_Low']) / (df['ETH_Close'] + 1e-8)
    if 'eth_body_ratio' not in df_transformed.columns:
        df_transformed['eth_body_ratio'] = (df['ETH_Close'] - df['ETH_Open']) / (df['ETH_Close'] + 1e-8)
    if 'eth_close_position' not in df_transformed.columns:
        df_transformed['eth_close_position'] = (
            (df['ETH_Close'] - df['ETH_Low']) / (df['ETH_High'] - df['ETH_Low'] + 1e-8)
        )

    if 'BTC_Close' in df_transformed.columns:
        if 'btc_log_return' not in df_transformed.columns:
            df_transformed['btc_log_return'] = np.log(df['BTC_Close'] / df['BTC_Close'].shift(1))
        for period in [5, 10, 20, 30]:
            col_name = f'btc_return_{period}d'
            if col_name not in df_transformed.columns:
                df_transformed[col_name] = np.log(df['BTC_Close'] / df['BTC_Close'].shift(period)).fillna(0)
        for period in [7, 14, 30]:
            col_name = f'btc_volatility_{period}d'
            if col_name not in df_transformed.columns:
                df_transformed[col_name] = (
                    df_transformed['btc_log_return'].rolling(period, min_periods=max(3, period//3)).std()
                ).fillna(0)
        if 'btc_intraday_range' not in df_transformed.columns:
            df_transformed['btc_intraday_range'] = (df['BTC_High'] - df['BTC_Low']) / (df['BTC_Close'] + 1e-8)
        if 'btc_body_ratio' not in df_transformed.columns:
            df_transformed['btc_body_ratio'] = (df['BTC_Close'] - df['BTC_Open']) / (df['BTC_Close'] + 1e-8)

        if 'BTC_Volume' in df.columns:
            btc_volume = df['BTC_Volume']
            if 'btc_volume_change' not in df_transformed.columns:
                df_transformed['btc_volume_change'] = btc_volume.pct_change().fillna(0)
            if 'btc_volume_ratio_20d' not in df_transformed.columns:
                volume_ma20 = btc_volume.rolling(20, min_periods=5).mean()
                df_transformed['btc_volume_ratio_20d'] = (btc_volume / (volume_ma20 + 1e-8)).fillna(1)
            if 'btc_volume_volatility_30d' not in df_transformed.columns:
                df_transformed['btc_volume_volatility_30d'] = (
                    btc_volume.pct_change().rolling(30, min_periods=10).std()
                ).fillna(0)
            if 'btc_obv' not in df_transformed.columns:
                btc_close = df['BTC_Close']
                obv = np.where(btc_close > btc_close.shift(1), btc_volume,
                               np.where(btc_close < btc_close.shift(1), -btc_volume, 0))
                df_transformed['btc_obv'] = pd.Series(obv, index=df.index).cumsum().fillna(0)
            if 'btc_volume_price_corr_30d' not in df_transformed.columns:
                df_transformed['btc_volume_price_corr_30d'] = (
                    btc_volume.pct_change().rolling(30, min_periods=10).corr(
                        df_transformed['btc_log_return']
                    )
                ).fillna(0)

    altcoins = ['BNB', 'XRP', 'SOL', 'ADA', 'DOGE', 'AVAX', 'DOT']
    for coin in altcoins:
        if f'{coin}_Close' in df_transformed.columns:
            col_name = f'{coin.lower()}_return'
            if col_name not in df_transformed.columns:
                df_transformed[col_name] = np.log(df[f'{coin}_Close'] / df[f'{coin}_Close'].shift(1)).fillna(0)
            vol_col = f'{coin.lower()}_volatility_30d'
            if vol_col not in df_transformed.columns:
                df_transformed[vol_col] = (
                    df_transformed[col_name].rolling(30, min_periods=10).std()
                ).fillna(0)
            if f'{coin}_Volume' in df.columns:
                coin_volume = df[f'{coin}_Volume']
                volume_change_col = f'{coin.lower()}_volume_change'
                if volume_change_col not in df_transformed.columns:
                    df_transformed[volume_change_col] = coin_volume.pct_change().fillna(0)
                volume_ratio_col = f'{coin.lower()}_volume_ratio_20d'
                if volume_ratio_col not in df_transformed.columns:
                    volume_ma20 = coin_volume.rolling(20, min_periods=5).mean()
                    df_transformed[volume_ratio_col] = (coin_volume / (volume_ma20 + 1e-8)).fillna(1)

    if 'ETH_Volume' in df.columns and 'BTC_Volume' in df.columns:
        eth_volume = df['ETH_Volume']
        btc_volume = df['BTC_Volume']
        if 'eth_btc_volume_corr_30d' not in df_transformed.columns:
            df_transformed['eth_btc_volume_corr_30d'] = (
                eth_volume.pct_change().rolling(30, min_periods=10).corr(
                    btc_volume.pct_change()
                )
            ).fillna(0)
        if 'eth_btc_volume_ratio' not in df_transformed.columns:
            df_transformed['eth_btc_volume_ratio'] = (
                eth_volume / (btc_volume + 1e-8)
            ).fillna(0)
        if 'eth_btc_volume_ratio_ma30' not in df_transformed.columns:
            df_transformed['eth_btc_volume_ratio_ma30'] = (
                df_transformed['eth_btc_volume_ratio'].rolling(30, min_periods=10).mean()
            ).fillna(0)

    remove_patterns = ['_Close', '_Open', '_High', '_Low', '_Volume']
    cols_to_remove = [
        col for col in df_transformed.columns
        if any(p in col for p in remove_patterns)
        and not any(d in col.lower() for d in ['_lag', '_position', '_ratio', '_range', '_change', '_corr', '_volatility', '_obv'])
    ]
    df_transformed.drop(cols_to_remove, axis=1, inplace=True)

    return_cols = [
        col for col in df_transformed.columns
        if 'return' in col.lower() and 'next' not in col
    ]
    if return_cols:
        df_transformed[return_cols] = df_transformed[return_cols].fillna(0)

    return df_transformed

In [3]:
# ============================================================================
# 2. Lag 적용
# ============================================================================
def apply_lag_features(df, news_lag=2, onchain_lag=1):
    """
    Lag 피처 적용 (원본 유지 + lag 추가)
    
    핵심 원칙:
    1. 원본(lag0) 피처는 그대로 유지
    2. lag1, lag2 피처를 추가로 생성
    3. 이동평균/차분은 lag 불필요 (이미 과거 참조)
    4. 이벤트는 lag 없음 (당일 반영)
    
    출처: "Seeing Beyond Noise" (2024), scikit-learn
    """
    df_lagged = df.copy()
    
    # ===== Lag 적용 대상: 원본 감성 지표만 =====
    raw_sentiment_cols = [
        'sentiment_mean', 'sentiment_std', 'sentiment_sum',
        'news_count', 'positive_ratio', 'negative_ratio',
        'sentiment_polarity', 'sentiment_intensity', 
        'sentiment_disagreement', 'bull_bear_ratio',
        'weighted_sentiment', 'extremity_index',
        'extreme_positive_count', 'extreme_negative_count'
    ]
    
    # ===== Lag 제외: 이동평균, 차분 (이미 과거 참조) =====
    no_lag_patterns = [
        '_ma', '_volatility_', '_trend', '_acceleration', 
        '_volume_change', '_volume_ma'
    ]
    
    # ===== 온체인 데이터 =====
    onchain_cols = [col for col in df.columns if any(keyword in col.lower() 
                    for keyword in ['eth_tx', 'eth_active', 'eth_new', 
                                  'eth_large', 'eth_token', 'eth_contract',
                                  'eth_avg_gas', 'eth_total_gas', 
                                  'eth_avg_block'])]
    
    # ===== 기타 외부 변수 =====
    other_cols = [col for col in df.columns if any(keyword in col.lower() 
                  for keyword in ['tvl', 'funding', 'lido_', 'aave_', 'makerdao_', 
                                'chain_', 'usdt_', 'sp500_', 'vix_', 'gold_', 'dxy_', 'fg_'])]
    
    # ===== 제외 컬럼 =====
    exclude_cols = ['ETH_Close', 'ETH_High', 'ETH_Low', 'ETH_Open','date']
    exclude_cols.extend([col for col in df.columns if 'event_' in col or 'period_' in col])
    exclude_cols.extend([col for col in df.columns if '_lag' in col])
    
    lag_count = 0
    
    # ===== 1. 원본 감성 지표에만 lag 적용 =====
    for col in raw_sentiment_cols:
        if col in df.columns:
            is_derived = any(pattern in col for pattern in no_lag_patterns)
            
            if not is_derived:
                for lag in range(1, news_lag):
                    new_col = f"{col}_lag{lag}"
                    df_lagged[new_col] = df[col].shift(lag)
                    lag_count += 1
    
    # ===== 2. 온체인 lag =====
    onchain_lag_count = 0
    for col in onchain_cols:
        if col not in exclude_cols:
            df_lagged[f"{col}_lag1"] = df[col].shift(onchain_lag)
            onchain_lag_count += 1
    
    # ===== 3. 기타 외부 변수 lag  =====
    other_lag_count = 0
    for col in other_cols:
        if col not in exclude_cols:
            df_lagged[f"{col}_lag1"] = df[col].shift(1)
            other_lag_count += 1
    
    total_lag = lag_count + onchain_lag_count + other_lag_count
    
    return df_lagged


def add_price_lag_features_first(df):
    """
    과거 가격을 피처로 추가 
    """
    df_new = df.copy()
    close = df['ETH_Close']
    high = df['ETH_High']
    low = df['ETH_Low']
    volume = df['ETH_Volume']
    
    # 과거 종가 
    for lag in [1, 2, 3, 5, 7, 14, 21, 30]:
        df_new[f'close_lag{lag}'] = close.shift(lag)
    
    # 과거 고가/저가
    for lag in [1, 2, 3, 5, 7]:
        df_new[f'high_lag{lag}'] = high.shift(lag)
        df_new[f'low_lag{lag}'] = low.shift(lag)
    
    # 과거 거래량
    for lag in [1, 2, 3, 5, 7]:
        df_new[f'volume_lag{lag}'] = volume.shift(lag)
    
    # 과거 수익률
    for lag in [1, 2, 3, 5, 7]:
        df_new[f'return_lag{lag}'] = close.pct_change(periods=lag).shift(1)
    
    # 과거 가격 비율
    for lag in [1, 7, 30]:
        df_new[f'close_ratio_lag{lag}'] = close / close.shift(lag)
    
    added = df_new.shape[1] - df.shape[1]
    
    return df_new


# ============================================================================
# 3. 타겟 변수 생성
# ============================================================================

def create_targets(df):
    """타겟 변수 생성"""
    df_target = df.copy()
    close = df['ETH_Close']

    # 내일 종가
    next_close = close.shift(-1)
    
    # 오늘 → 내일 로그 수익률
    df_target['next_log_return'] = np.log(next_close / close)
    
    # 오늘 → 내일 방향성
    df_target['next_direction'] = (next_close > close).astype(int)
    
    # 내일 실제 종가
    df_target['next_close'] = next_close   
    
    return df_target



In [4]:
def add_temporal_cyclic_features(df):
    """
    시간 주기성 특징 추가 
    
    Reference:
    - "The Importance of Time-Based Cyclic Features" (2025)
    - "Feature engineering for time-series data" (Statsig, 2025)
    """
    df_temporal = df.copy()
    
    # 기본 시간 특징
    df_temporal['day_of_week'] = df['date'].dt.dayofweek
    df_temporal['day_of_month'] = df['date'].dt.day
    df_temporal['month'] = df['date'].dt.month
    df_temporal['quarter'] = df['date'].dt.quarter
    df_temporal['week_of_year'] = df['date'].dt.isocalendar().week
    
    # 월말/월초 효과 
    df_temporal['is_month_start'] = (df['date'].dt.is_month_start).astype(int)
    df_temporal['is_month_end'] = (df['date'].dt.is_month_end).astype(int)
    df_temporal['is_quarter_start'] = (df['date'].dt.is_quarter_start).astype(int)
    df_temporal['is_quarter_end'] = (df['date'].dt.is_quarter_end).astype(int)
    
    # 주말 효과 
    df_temporal['is_weekend'] = (df['date'].dt.dayofweek >= 5).astype(int)
    
    # Cyclical Encoding (Sine/Cosine for periodicity)
    df_temporal['day_of_week_sin'] = np.sin(2 * np.pi * df_temporal['day_of_week'] / 7)
    df_temporal['day_of_week_cos'] = np.cos(2 * np.pi * df_temporal['day_of_week'] / 7)
    df_temporal['month_sin'] = np.sin(2 * np.pi * df_temporal['month'] / 12)
    df_temporal['month_cos'] = np.cos(2 * np.pi * df_temporal['month'] / 12)
    df_temporal['day_of_month_sin'] = np.sin(2 * np.pi * df_temporal['day_of_month'] / 31)
    df_temporal['day_of_month_cos'] = np.cos(2 * np.pi * df_temporal['day_of_month'] / 31)
    
    added = df_temporal.shape[1] - df.shape[1]
    
    return df_temporal


def add_interaction_features(df):
    """
    고차원 상호작용 특징 추가
    
    Reference:
    - "Optimizing Forecast Accuracy" (2025): Momentum × Volatility 상호작용 중요
    - "Causal Feature Engineering" (2023): 특징 조합이 단일 특징보다 예측력 높음
    """
    df_interact = df.copy()
    
    # 1. RSI × Volume
    if 'RSI_14' in df.columns and 'VOLUME_RATIO' in df.columns:
        df_interact['RSI_Volume_Strength'] = df['RSI_14'] * df['VOLUME_RATIO']
    
    # 2. Bollinger Band Position × Sentiment
    if 'BB_POSITION' in df.columns and 'sentiment_polarity' in df.columns:
        df_interact['BB_Sentiment_Consensus'] = df['BB_POSITION'] * df['sentiment_polarity']
    
    # 3. VIX × ETH Volatility
    if 'vix_VIX' in df.columns and 'VOLATILITY_20' in df.columns:
        df_interact['VIX_ETH_Vol_Cross'] = df['vix_VIX'] * df['VOLATILITY_20']
    
    # 4. MACD × Volume
    if 'MACD_12_26_9' in df.columns and 'VOLUME_RATIO' in df.columns:
        df_interact['MACD_Volume_Momentum'] = df['MACD_12_26_9'] * df['VOLUME_RATIO']
    
    # 5. BTC Return × ETH-BTC Correlation
    if 'btc_return' in df.columns and 'eth_btc_corr_30d' in df.columns:
        df_interact['BTC_Weighted_Impact'] = df['btc_return'] * df['eth_btc_corr_30d']
    
    # 6. Sentiment × News Volume
    if 'sentiment_polarity' in df.columns and 'news_count' in df.columns:
        df_interact['Sentiment_Volume_Intensity'] = df['sentiment_polarity'] * np.log1p(df['news_count'])
    
    # 7. ATR × Volume Ratio
    if 'ATR_14' in df.columns and 'VOLUME_RATIO' in df.columns:
        df_interact['Liquidity_Risk'] = df['ATR_14'] * (1 / (df['VOLUME_RATIO'] + 1e-8))
    
    # 8. RSI Overbought × High Volume
    if 'RSI_OVERBOUGHT' in df.columns and 'VOLUME_RATIO' in df.columns:
        df_interact['Overbought_High_Volume'] = df['RSI_OVERBOUGHT'] * (df['VOLUME_RATIO'] > 1.5).astype(int)
    
    # 9. Golden Cross × Positive Sentiment
    if 'SMA_GOLDEN_CROSS' in df.columns and 'sentiment_polarity' in df.columns:
        df_interact['Golden_Sentiment_Align'] = df['SMA_GOLDEN_CROSS'] * (df['sentiment_polarity'] > 0).astype(int)
    
    # 10. Price Acceleration × Momentum
    if 'PRICE_ACCELERATION' in df.columns and 'MOMENTUM_10' in df.columns:
        df_interact['Acceleration_Momentum'] = df['PRICE_ACCELERATION'] * df['MOMENTUM_10']
    
    added = df_interact.shape[1] - df.shape[1]
    
    return df_interact


def add_volatility_regime_features(df):
    """
    변동성 체제 특징 추가
    
    Reference:
    - "Intraday trading of cryptocurrencies" (2023): 변동성 체제별 예측 정확도 차이 존재

    """
    df_regime = df.copy()
    
    if 'VOLATILITY_20' in df.columns:
        # 1. 고변동성 vs 저변동성 
        vol_median = df['VOLATILITY_20'].rolling(60, min_periods=20).median()
        df_regime['vol_regime_high'] = (df['VOLATILITY_20'] > vol_median).astype(int)
        
        # 2. 변동성 급증 이벤트
        vol_mean = df['VOLATILITY_20'].rolling(30, min_periods=10).mean()
        vol_std = df['VOLATILITY_20'].rolling(30, min_periods=10).std()
        df_regime['vol_spike'] = (df['VOLATILITY_20'] > vol_mean + 2 * vol_std).astype(int)
        
        # 3. 변동성 백분위수
        df_regime['vol_percentile_90d'] = df['VOLATILITY_20'].rolling(90, min_periods=30).apply(
            lambda x: (x.iloc[-1] > x).sum() / len(x) if len(x) > 0 else 0.5
        )
        
        # 4. 변동성 추세
        df_regime['vol_trend'] = df['VOLATILITY_20'].pct_change(5)
        
        # 5. 변동성 체제 지속기간
        df_regime['vol_regime_duration'] = df_regime.groupby(
            (df_regime['vol_regime_high'] != df_regime['vol_regime_high'].shift()).cumsum()
        ).cumcount() + 1

    added = df_regime.shape[1] - df.shape[1]
    
    return df_regime


def add_normalized_price_lags(df):
    """
    정규화된 가격 Lag 특징 추가 (분류 모델용)
    
    Reference:
    - "Financial Forecasting with ML: Price vs Return" (2021)
    - 분류 문제에서 절대 가격보다 비율이 2-3배 더 예측력 높음
    """
    df_norm = df.copy()
    
    if 'ETH_Close' in df.columns:
        current_close = df['ETH_Close']
    else:
        return df_norm
    
    # 1. 가격 Lag를 현재 가격 대비 비율로 변환
    lag_cols = [col for col in df.columns if 'close_lag' in col and col.replace('close_lag', '').isdigit()]
    
    for col in lag_cols:
        lag_num = col.replace('close_lag', '')
        df_norm[f'close_lag{lag_num}_ratio'] = df[col] / (current_close + 1e-8)
        
        next_lag_col = f'close_lag{int(lag_num)+1}'
        if next_lag_col in df.columns:
            df_norm[f'close_lag{lag_num}_logret'] = np.log(df[col] / (df[next_lag_col] + 1e-8))
    
    # 2. High/Low Lag를 Close 대비 비율
    for col in df.columns:
        if 'high_lag' in col:
            lag_num = col.replace('high_lag', '')
            df_norm[f'high_lag{lag_num}_ratio'] = df[col] / (current_close + 1e-8)
        
        if 'low_lag' in col:
            lag_num = col.replace('low_lag', '')
            df_norm[f'low_lag{lag_num}_ratio'] = df[col] / (current_close + 1e-8)
    
    added = df_norm.shape[1] - df.shape[1]

    return df_norm


def add_cumulative_streak_features(df):
    """
    누적 및 연속 패턴 특징 추가
    
    Reference:
    - "Feature engineering for time-series" (2025): 연속 패턴은 모멘텀 지속성 예측에 핵심
    """
    df_cum = df.copy()
    
    if 'eth_log_return' in df.columns:
        returns = df['eth_log_return']
        
        # 1. 연속 상승 일수
        df_cum['consecutive_up_days'] = (returns > 0).astype(int).groupby(
            (returns <= 0).cumsum()
        ).cumsum()
        
        # 2. 연속 하락 일수
        df_cum['consecutive_down_days'] = (returns < 0).astype(int).groupby(
            (returns >= 0).cumsum()
        ).cumsum()
        
        # 3. 최근 20일 내 최대 연속 상승
        df_cum['max_consecutive_up_20d'] = df_cum['consecutive_up_days'].rolling(20, min_periods=5).max()
        
        # 4. 최근 20일 내 최대 연속 하락
        df_cum['max_consecutive_down_20d'] = df_cum['consecutive_down_days'].rolling(20, min_periods=5).max()
        
        # 5. 누적 수익률 (20일)
        df_cum['cumulative_return_20d'] = returns.rolling(20, min_periods=5).sum()
        
        # 6. 상승/하락 비율 (20일 내)
        df_cum['up_down_ratio_20d'] = (
            (returns > 0).rolling(20, min_periods=5).sum() / 
            ((returns < 0).rolling(20, min_periods=5).sum() + 1e-8)
        )

    added = df_cum.shape[1] - df.shape[1]
    
    return df_cum


def add_percentile_features(df):
    """

    Reference:
    - "Optimizing Forecast Accuracy" (2025): 백분위수 특징이 상대적 위치 파악에 효과적
    """
    df_pct = df.copy()
    
    # 1. 가격 백분위수 (250일)
    if 'ETH_Close' in df.columns:
        df_pct['price_percentile_250d'] = df['ETH_Close'].rolling(250, min_periods=60).apply(
            lambda x: (x.iloc[-1] > x).sum() / len(x) if len(x) > 0 else 0.5
        )
    
    # 2. 거래량 백분위수 (90일)
    if 'ETH_Volume' in df.columns:
        df_pct['volume_percentile_90d'] = df['ETH_Volume'].rolling(90, min_periods=30).apply(
            lambda x: (x.iloc[-1] > x).sum() / len(x) if len(x) > 0 else 0.5
        )
    
    # 3. RSI 백분위수 (60일)
    if 'RSI_14' in df.columns:
        df_pct['RSI_percentile_60d'] = df['RSI_14'].rolling(60, min_periods=20).apply(
            lambda x: (x.iloc[-1] > x).sum() / len(x) if len(x) > 0 else 0.5
        )
    
    added = df_pct.shape[1] - df.shape[1]
    
    return df_pct


def handle_missing_values_paper_based(df_clean, train_start_date, is_train=True, train_stats=None):
    """
    암호화폐 시계열 결측치 처리
    
    참고문헌:
    1. "Quantifying Cryptocurrency Unpredictability" (2025)

    2. "Time Series Data Forecasting" 
    
    3. "Dealing with Leaky Missing Data in Production" (2021)

    """
    
    # ===== 1. Lookback 제거 =====
    if isinstance(train_start_date, str):
        train_start_date = pd.to_datetime(train_start_date)
    
    before = len(df_clean)
    df_clean = df_clean[df_clean['date'] >= train_start_date].reset_index(drop=True)
    
    # ===== 2. Feature 컬럼 선택 =====
    target_cols = ['next_log_return', 'next_direction', 'next_close']
    feature_cols = [col for col in df_clean.columns 
                   if col not in target_cols + ['date']]
    
    # ===== 3. 결측 확인 =====
    missing_before = df_clean[feature_cols].isnull().sum().sum()
    
    # ===== 4. FFill → 0 =====
    df_clean[feature_cols] = df_clean[feature_cols].fillna(method='ffill')
    df_clean[feature_cols] = df_clean[feature_cols].fillna(0)
    
    missing_after = df_clean[feature_cols].isnull().sum().sum()
    
    # ===== 5. 무한대 처리 =====
    inf_count = 0
    for col in feature_cols:
        if np.isinf(df_clean[col]).sum() > 0:
            inf_count += np.isinf(df_clean[col]).sum()
            df_clean[col] = df_clean[col].replace([np.inf, -np.inf], np.nan)
            df_clean[col] = df_clean[col].fillna(method='ffill').fillna(0)
    
    # ===== 6. 최종 확인 =====
    final_missing = df_clean[feature_cols].isnull().sum().sum()
    
    if final_missing > 0:
        df_clean[feature_cols] = df_clean[feature_cols].fillna(0)
    
    
    if is_train:
        return df_clean, {}
    else:
        return df_clean

In [5]:
def select_features_multi_target(X_train, y_train, target_type='direction', top_n=40):
    
    if target_type == 'direction':
        selected, stats = select_features_verified(
            X_train, 
            y_train['next_direction'], 
            task='class', 
            top_n=top_n
        )
        
    elif target_type == 'return':
        selected, stats = select_features_verified(
            X_train, 
            y_train['next_log_return'], 
            task='reg', 
            top_n=top_n
        )
        
    elif target_type == 'price':
        selected, stats = select_features_verified(
            X_train, 
            y_train['next_close'], 
            task='reg', 
            top_n=top_n
        )
        
    elif target_type == 'direction_return':
        print("\n[Hybrid] Direction (50%) + Return (50%)")
        
        dir_features, dir_stats = select_features_verified(
            X_train, 
            y_train['next_direction'], 
            task='class', 
            top_n=top_n // 2,
            verbose=False
        )
        
        ret_features, ret_stats = select_features_verified(
            X_train, 
            y_train['next_log_return'], 
            task='reg', 
            top_n=top_n // 2,
            verbose=False
        )
        
        selected = list(dict.fromkeys(dir_features + ret_features))
        
        if len(selected) < top_n:
            all_mi_scores = {**dir_stats['mi_scores'], **ret_stats['mi_scores']}
            sorted_features = sorted(all_mi_scores.items(), key=lambda x: x[1], reverse=True)
            
            for feat, _ in sorted_features:
                if feat not in selected:
                    selected.append(feat)
                    if len(selected) >= top_n:
                        break
        
        selected = selected[:top_n]
        
        stats = {
            'dir_stats': dir_stats,
            'ret_stats': ret_stats,
            'overlap': len(set(dir_features) & set(ret_features))
        }
        
        
    elif target_type == 'direction_price':
        print("\n[Hybrid] Direction (50%) + Price (50%)")
        
        dir_features, dir_stats = select_features_verified(
            X_train, 
            y_train['next_direction'], 
            task='class', 
            top_n=top_n // 2,
            verbose=False
        )
        
        price_features, price_stats = select_features_verified(
            X_train, 
            y_train['next_close'], 
            task='reg', 
            top_n=top_n // 2,
            verbose=False
        )
        
        selected = list(dict.fromkeys(dir_features + price_features))
        
        if len(selected) < top_n:
            all_mi_scores = {**dir_stats['mi_scores'], **price_stats['mi_scores']}
            sorted_features = sorted(all_mi_scores.items(), key=lambda x: x[1], reverse=True)
            
            for feat, _ in sorted_features:
                if feat not in selected:
                    selected.append(feat)
                    if len(selected) >= top_n:
                        break
        
        selected = selected[:top_n]
        
        stats = {
            'dir_stats': dir_stats,
            'price_stats': price_stats,
            'overlap': len(set(dir_features) & set(price_features))
        }
        
    else:
        raise ValueError(f"Unknown target_type: {target_type}")
    
    print("Selected Features")
    print(", ".join(selected))
    return selected, stats


def select_features_verified(X_train, y_train, task='class', top_n=40, verbose=True):
    
    if task == 'class':
        mi_scores = mutual_info_classif(X_train, y_train, random_state=42, n_neighbors=3)
    else:
        mi_scores = mutual_info_regression(X_train, y_train, random_state=42, n_neighbors=3)
    
    mi_idx = np.argsort(mi_scores)[::-1][:top_n]
    mi_features = X_train.columns[mi_idx].tolist()
    
    if task == 'class':
        estimator = LGBMClassifier(
            n_estimators=100,
            learning_rate=0.05,
            max_depth=5,
            random_state=42,
            verbose=-1
        )
    else:
        estimator = LGBMRegressor(
            n_estimators=100,
            learning_rate=0.05,
            max_depth=5,
            random_state=42,
            verbose=-1
        )
    
    rfe = RFE(
        estimator=estimator,
        n_features_to_select=top_n,
        step=0.1,
        verbose=0
    )
    
    rfe.fit(X_train, y_train)
    rfe_features = X_train.columns[rfe.support_].tolist()

    if task == 'class':
        rf_model = RandomForestClassifier(
            n_estimators=100,
            max_depth=10,
            random_state=42,
            n_jobs=-1
        )
    else:
        rf_model = RandomForestRegressor(
            n_estimators=100,
            max_depth=10,
            random_state=42,
            n_jobs=-1
        )
    
    rf_model.fit(X_train, y_train)
    rf_importances = rf_model.feature_importances_
    rf_idx = np.argsort(rf_importances)[::-1][:top_n]
    rf_features = X_train.columns[rf_idx].tolist()
    
    all_features = mi_features + rfe_features + rf_features
    feature_votes = Counter(all_features)
    selected_features = [feat for feat, _ in feature_votes.most_common(top_n)]

    if len(selected_features) < top_n:
        remaining = top_n - len(selected_features)
        for feat in mi_features:
            if feat not in selected_features:
                selected_features.append(feat)
                remaining -= 1
                if remaining == 0:
                    break
    
    return selected_features, {
        'mi_features': mi_features,
        'rfe_features': rfe_features,
        'rf_features': rf_features,
        'feature_votes': feature_votes,
        'mi_scores': dict(zip(X_train.columns, mi_scores)),
        'rf_importances': dict(zip(X_train.columns, rf_importances))
    }


def split_tvt_method(df, train_start_date, test_start_date='2025-01-01', 
                     train_ratio=0.7, val_ratio=0.15):
    """
    test_start_date를 고정하고, 그 이전 데이터를 train/val로 분할
    test_start_date 이후 데이터는 모두 test로 사용
    """
    df_period = df[df['date'] >= train_start_date].copy()
    
    # 테스트 시작 날짜를 datetime으로 변환
    if isinstance(test_start_date, str):
        test_start_date = pd.to_datetime(test_start_date)
    
    # test_start_date 이전 데이터를 train/val로, 이후를 test로 분할
    pre_test_df = df_period[df_period['date'] < test_start_date].copy()
    test_df = df_period[df_period['date'] >= test_start_date].copy()
    
    # train/val 분할 (test 이전 데이터만 사용)
    n_pre_test = len(pre_test_df)
    train_end = int(n_pre_test * train_ratio / (train_ratio + val_ratio))
    
    train_df = pre_test_df.iloc[:train_end].copy()
    val_df = pre_test_df.iloc[train_end:].copy()
    
    print(f"\n{'='*80}")
    print(f"TVT Split (Fixed Test Start: {test_start_date.date()})")
    print(f"{'='*80}")
    print(f"  Train: {len(train_df):4d} ({train_df['date'].min().date()} ~ {train_df['date'].max().date()})")
    print(f"  Val:   {len(val_df):4d} ({val_df['date'].min().date()} ~ {val_df['date'].max().date()})")
    print(f"  Test:  {len(test_df):4d} ({test_df['date'].min().date()} ~ {test_df['date'].max().date()})")
    print(f"{'='*80}\n")
    
    return {'train': train_df, 'val': val_df, 'test': test_df}


def split_walk_forward_method(df, train_start_date, 
                              final_test_start='2025-01-01',
                              n_splits=4,
                              initial_train_size=600,
                              val_size=60,
                              test_size=60,
                              step=240,
                              lookback=30):
    """
    과거 기간에 대한 walk-forward folds + 최종 고정 테스트 기간
    
    최적 설정 (기본값):
    - n_splits=4: 약세장, 회복기, 강세전환, 통합기 모두 평가
    - step=240 (8개월): fold 간 충분한 독립성 확보
    - 총 5회 학습 (4 walk-forward + 1 final holdout)
    
    Parameters:
    -----------
    df : pd.DataFrame
        'date' 컬럼을 포함한 시계열 데이터
    train_start_date : str
        학습 시작 날짜 (예: '2020-12-19')
    final_test_start : str
        고정 holdout 테스트 시작 날짜 (기본: '2025-01-01')
    n_splits : int
        생성할 walk-forward fold 수 (기본: 4)
    initial_train_size : int
        첫 fold의 최소 학습 데이터 일수 (기본: 600)
    val_size : int
        검증 기간 일수 (기본: 60)
    test_size : int
        테스트 기간 일수 (기본: 60)
    step : int
        fold 간 이동 간격 일수 (기본: 240)
    lookback : int
        특성 생성용 lookback 기간 (기본: 30)
    
    Returns:
    --------
    list of dict
        각 fold의 train/val/test 데이터프레임과 메타정보
    """
    df_period = df[df['date'] >= train_start_date].copy()
    df_period = df_period.sort_values('date').reset_index(drop=True)
    
    if isinstance(final_test_start, str):
        final_test_start = pd.to_datetime(final_test_start)
    
    pre_final_df = df_period[df_period['date'] < final_test_start].copy()
    final_test_df = df_period[df_period['date'] >= final_test_start].copy()
    
    print(f"\n{'='*80}")
    print(f"Walk-Forward Configuration")
    print(f"{'='*80}")
    print(f"Total: {len(df_period)} days")
    print(f"Pre-final: {len(pre_final_df)} days | Final holdout: {len(final_test_df)} days")
    print(f"Target: {n_splits} walk-forward + 1 final holdout = {n_splits + 1} folds")
    print(f"{'='*80}\n")
    
    folds = []
    
    if n_splits is None:
        available_data = len(pre_final_df) - initial_train_size - val_size - test_size
        n_splits = max(1, (available_data // step) + 1)
    
    for fold_idx in range(n_splits):
        test_start_idx = initial_train_size + val_size + (fold_idx * step)
        test_end_idx = test_start_idx + test_size
        
        if test_end_idx > len(pre_final_df):
            break
        
        val_end_idx = test_start_idx
        val_start_idx = val_end_idx - val_size
        train_end_idx = val_start_idx
        
        if train_end_idx < initial_train_size:
            continue
        
        train_fold = pre_final_df.iloc[:train_end_idx].copy()
        val_fold = pre_final_df.iloc[val_start_idx:val_end_idx].copy()
        test_fold = pre_final_df.iloc[test_start_idx:test_end_idx].copy()
        
        assert train_fold['date'].max() < val_fold['date'].min(), "Train/Val overlap!"
        assert val_fold['date'].max() <= test_fold['date'].min(), "Val/Test overlap!"
        
        # ========== 출력 추가 ==========
        print(f"Fold {fold_idx + 1} (walk_forward)")
        print(f"  Train: {len(train_fold):4d}d  {train_fold['date'].min().date()} ~ {train_fold['date'].max().date()}")
        print(f"  Val:   {len(val_fold):4d}d  {val_fold['date'].min().date()} ~ {val_fold['date'].max().date()}")
        print(f"  Test:  {len(test_fold):4d}d  {test_fold['date'].min().date()} ~ {test_fold['date'].max().date()}\n")
        
        folds.append({
            'train': train_fold,
            'val': val_fold,
            'test': test_fold,
            'fold_idx': fold_idx + 1,
            'fold_type': 'walk_forward'
        })
    
    if len(final_test_df) > 0:
        final_train_end = len(pre_final_df)
        final_val_start = final_train_end - val_size
        final_train_data = pre_final_df.iloc[:final_val_start].copy()
        final_val_data = pre_final_df.iloc[final_val_start:final_train_end].copy()
        
        # ========== 출력 추가 ==========
        print(f"Fold {len(folds) + 1} (final_holdout)")
        print(f"  Train: {len(final_train_data):4d}d  {final_train_data['date'].min().date()} ~ {final_train_data['date'].max().date()}")
        print(f"  Val:   {len(final_val_data):4d}d  {final_val_data['date'].min().date()} ~ {final_val_data['date'].max().date()}")
        print(f"  Test:  {len(final_test_df):4d}d  {final_test_df['date'].min().date()} ~ {final_test_df['date'].max().date()}\n")
        
        folds.append({
            'train': final_train_data,
            'val': final_val_data,
            'test': final_test_df,
            'fold_idx': len(folds) + 1,
            'fold_type': 'final_holdout'
        })
    
    print(f"{'='*80}")
    print(f"Created {len(folds)} folds total")
    print(f"{'='*80}\n")
    
    return folds


def process_single_split(split_data, target_type='direction', top_n=40, fold_idx=None):
    """
    각 fold를 독립적으로 처리 (feature selection 포함)
    """
    
    train_df = split_data['train']
    val_df = split_data['val']
    test_df = split_data['test']
    fold_type = split_data.get('fold_type', 'unknown')
    
    # Fold 정보 출력
    if fold_idx is not None:
        print(f"\n{'='*60}")
        print(f"Processing Fold {fold_idx} ({fold_type})")
        print(f"{'='*60}")
    
    train_processed, missing_stats = handle_missing_values_paper_based(
        train_df.copy(),
        train_start_date=train_df['date'].min(),
        is_train=True
    )
    
    val_processed = handle_missing_values_paper_based(
        val_df.copy(),
        train_start_date=val_df['date'].min(),
        is_train=False,
        train_stats=missing_stats
    )
    
    test_processed = handle_missing_values_paper_based(
        test_df.copy(),
        train_start_date=test_df['date'].min(),
        is_train=False,
        train_stats=missing_stats
    )
    
    target_cols = ['next_log_return', 'next_direction', 'next_close']
    
    train_processed = train_processed.dropna(subset=target_cols).reset_index(drop=True)
    val_processed = val_processed.dropna(subset=target_cols).reset_index(drop=True)
    test_processed = test_processed.dropna(subset=target_cols).reset_index(drop=True)

    feature_cols = [col for col in train_processed.columns 
                   if col not in target_cols + ['date']]
    
    X_train = train_processed[feature_cols]
    y_train = train_processed[target_cols]
    
    X_val = val_processed[feature_cols]
    y_val = val_processed[target_cols]
    
    X_test = test_processed[feature_cols]
    y_test = test_processed[target_cols]

    print(f"\n[Feature Selection for Fold {fold_idx}]")
    print(f"Training data shape: {X_train.shape}")
    
    selected_features, selection_stats = select_features_multi_target(
        X_train, 
        y_train, 
        target_type=target_type, 
        top_n=top_n
    )
    
    print(f"Selected {len(selected_features)} features for this fold")
    
    X_train_sel = X_train[selected_features]
    X_val_sel = X_val[selected_features]
    X_test_sel = X_test[selected_features]
    
    robust_scaler = RobustScaler()
    standard_scaler = StandardScaler()
    
    X_train_robust = robust_scaler.fit_transform(X_train_sel)
    X_val_robust = robust_scaler.transform(X_val_sel)
    X_test_robust = robust_scaler.transform(X_test_sel)
    
    X_train_standard = standard_scaler.fit_transform(X_train_sel)
    X_val_standard = standard_scaler.transform(X_val_sel)
    X_test_standard = standard_scaler.transform(X_test_sel)
    
    print(f"Scaling completed for Fold {fold_idx}")
    print(f"{'='*60}\n")
    
    result = {
        'train': {
            'X_robust': X_train_robust,
            'X_standard': X_train_standard,
            'X_raw': X_train_sel,
            'y': y_train.reset_index(drop=True), 
            'dates': train_df['date'].reset_index(drop=True) 
        },
        'val': {
            'X_robust': X_val_robust,
            'X_standard': X_val_standard,
            'X_raw': X_val_sel,
            'y': y_val.reset_index(drop=True), 
            'dates': val_df['date'].reset_index(drop=True)  
        },
        'test': {
            'X_robust': X_test_robust,
            'X_standard': X_test_standard,
            'X_raw': X_test_sel,
            'y': y_test.reset_index(drop=True),  
            'dates': test_df['date'].reset_index(drop=True)  
        },
        'stats': {
            'robust_scaler': robust_scaler,
            'standard_scaler': standard_scaler,
            'selected_features': selected_features,
            'selection_stats': selection_stats,
            'target_type': target_type,
            'target_cols': target_cols,
            'fold_type': fold_type,
            'fold_idx': fold_idx
        }
    }
    
    return result 


def build_complete_pipeline_corrected(df_raw, train_start_date, 
                                     final_test_start='2025-01-01',
                                     method='tvt', target_type='direction', **kwargs):
    """
    전체 파이프라인 실행 함수
    
    Parameters:
    -----------
    df_raw : DataFrame
        원본 데이터
    train_start_date : str
        학습 데이터 시작 날짜
    final_test_start : str, default='2025-01-01'
        최종 고정 테스트 시작 날짜
        - TVT: 이 날짜부터 마지막까지 테스트
        - Walk-forward: 이 날짜 이전은 walk-forward folds, 이후는 final holdout
    method : str, default='tvt'
        'tvt' 또는 'walk_forward'
    target_type : str, default='direction'
        'direction', 'return', 'price', 'direction_return', 'direction_price'
    **kwargs : dict
        각 method에 필요한 추가 파라미터
    """
    
    df = df_raw.copy()
    
    df = create_targets(df)
    df = add_price_lag_features_first(df)
    df = calculate_technical_indicators(df)
    df = add_temporal_cyclic_features(df)
    df = add_enhanced_cross_crypto_features(df)
    df = add_volatility_regime_features(df)
    df = add_interaction_features(df)
    df = add_cumulative_streak_features(df)
    df = add_percentile_features(df)
    df = add_normalized_price_lags(df)
    df = remove_raw_prices_and_transform(df)
    df = apply_lag_features(df, news_lag=2, onchain_lag=1)

    pd.set_option('display.max_columns', None)
    df = df.iloc[:-1]  
    
    split_kwargs = {}
    
    if method == 'tvt':
        split_kwargs['test_start_date'] = final_test_start
        if 'train_ratio' in kwargs:
            split_kwargs['train_ratio'] = kwargs['train_ratio']
        if 'val_ratio' in kwargs:
            split_kwargs['val_ratio'] = kwargs['val_ratio']
        splits = split_tvt_method(df, train_start_date, **split_kwargs)
        
    elif method == 'walk_forward':
        split_kwargs['final_test_start'] = final_test_start
        if 'n_splits' in kwargs:
            split_kwargs['n_splits'] = kwargs['n_splits']
        if 'initial_train_size' in kwargs:
            split_kwargs['initial_train_size'] = kwargs['initial_train_size']
        if 'test_size' in kwargs:
            split_kwargs['test_size'] = kwargs['test_size']
        if 'val_size' in kwargs:
            split_kwargs['val_size'] = kwargs['val_size']
        if 'step' in kwargs:
            split_kwargs['step'] = kwargs['step']
        if 'lookback' in kwargs:
            split_kwargs['lookback'] = kwargs['lookback']
        splits = split_walk_forward_method(df, train_start_date, **split_kwargs)
    else:
        raise ValueError(f"Unknown method: {method}")
    
    if method == 'tvt':
        result = process_single_split(
            splits, 
            target_type=target_type,  
            top_n=40,
            fold_idx=1
        )
    else:
        result = [
            process_single_split(
                fold, 
                target_type=target_type,  
                top_n=40,
                fold_idx=fold['fold_idx']
            ) 
            for fold in splits
        ]
    
    return result


In [6]:
# def select_features_multi_target(X_train, y_train, target_type='direction', top_n=40):
#     """
#     Multi-Target Feature Selection
    
#     5가지 케이스별 최적화된 feature selection:
#     1. direction (분류)
#     2. return (회귀)  
#     3. price (회귀)
#     4. direction_return (혼합)
#     5. direction_price (혼합)
    
#     Reference:
#     - "Multi-target HSIC-Lasso" (2024)
#     - "Feature selection for multi-target regression" (2021)
#     """

    
#     if target_type == 'direction':
#         # 순수 분류
#         selected, stats = select_features_verified(
#             X_train, 
#             y_train['next_direction'], 
#             task='class', 
#             top_n=top_n
#         )
        
#     elif target_type == 'return':
#         # 순수 회귀 (수익률)
#         selected, stats = select_features_verified(
#             X_train, 
#             y_train['next_log_return'], 
#             task='reg', 
#             top_n=top_n
#         )
        
#     elif target_type == 'price':
#         # 순수 회귀 (가격)
#         selected, stats = select_features_verified(
#             X_train, 
#             y_train['next_close'], 
#             task='reg', 
#             top_n=top_n
#         )
        
#     elif target_type == 'direction_return':
#         # 혼합: 분류 + 회귀 (방향 + 수익률)
#         print("\n[Hybrid] Direction (50%) + Return (50%)")
        
#         # 각각 절반씩 선택
#         dir_features, dir_stats = select_features_verified(
#             X_train, 
#             y_train['next_direction'], 
#             task='class', 
#             top_n=top_n // 2,
#             verbose=False
#         )
        
#         ret_features, ret_stats = select_features_verified(
#             X_train, 
#             y_train['next_log_return'], 
#             task='reg', 
#             top_n=top_n // 2,
#             verbose=False
#         )
        
#         # 합집합으로 결합 (중복 제거)
#         selected = list(dict.fromkeys(dir_features + ret_features))
        
#         # 부족하면 MI 스코어 높은 순으로 추가
#         if len(selected) < top_n:
#             all_mi_scores = {**dir_stats['mi_scores'], **ret_stats['mi_scores']}
#             sorted_features = sorted(all_mi_scores.items(), key=lambda x: x[1], reverse=True)
            
#             for feat, _ in sorted_features:
#                 if feat not in selected:
#                     selected.append(feat)
#                     if len(selected) >= top_n:
#                         break
        
#         # 너무 많으면 자르기
#         selected = selected[:top_n]
        
#         stats = {
#             'dir_stats': dir_stats,
#             'ret_stats': ret_stats,
#             'overlap': len(set(dir_features) & set(ret_features))
#         }
        
        
#     elif target_type == 'direction_price':
#         # 혼합: 분류 + 회귀 (방향 + 가격)
#         print("\n[Hybrid] Direction (50%) + Price (50%)")
        
#         dir_features, dir_stats = select_features_verified(
#             X_train, 
#             y_train['next_direction'], 
#             task='class', 
#             top_n=top_n // 2,
#             verbose=False
#         )
        
#         price_features, price_stats = select_features_verified(
#             X_train, 
#             y_train['next_close'], 
#             task='reg', 
#             top_n=top_n // 2,
#             verbose=False
#         )
        
#         selected = list(dict.fromkeys(dir_features + price_features))
        
#         if len(selected) < top_n:
#             all_mi_scores = {**dir_stats['mi_scores'], **price_stats['mi_scores']}
#             sorted_features = sorted(all_mi_scores.items(), key=lambda x: x[1], reverse=True)
            
#             for feat, _ in sorted_features:
#                 if feat not in selected:
#                     selected.append(feat)
#                     if len(selected) >= top_n:
#                         break
        
#         selected = selected[:top_n]
        
#         stats = {
#             'dir_stats': dir_stats,
#             'price_stats': price_stats,
#             'overlap': len(set(dir_features) & set(price_features))
#         }
        
#     else:
#         raise ValueError(f"Unknown target_type: {target_type}")
    
#     print("선택된 지표들")
#     print(", ".join(selected))
#     return selected, stats


# def select_features_verified(X_train, y_train, task='class', top_n=40, verbose=True):
#     """
#     검증된 Feature Selection 방법 (2025 연구 기반)
    
#     핵심 원칙:
#     1. 하이퍼파라미터 튜닝 없이 기본 파라미터 사용
#     2. MI + RFE + RF Importance 앙상블
#     3. 빠른 실행 속도
    
#     Reference:
#     - "Optimizing Forecast Accuracy in Cryptocurrency Markets" (2025)
#     - "Feature Selection After Split" (Reddit, 2022)
    
#     """
    
#     if task == 'class':
#         mi_scores = mutual_info_classif(X_train, y_train, random_state=42)
#     else:
#         mi_scores = mutual_info_regression(X_train, y_train, random_state=42)
    
#     mi_idx = np.argsort(mi_scores)[::-1][:top_n]
#     mi_features = X_train.columns[mi_idx].tolist()
    
    
#     # 기본 파라미터만 사용 
#     if task == 'class':
#         estimator = LGBMClassifier(
#             n_estimators=100,
#             learning_rate=0.05,
#             max_depth=5,
#             random_state=42,
#             verbose=-1
#         )
#     else:
#         estimator = LGBMRegressor(
#             n_estimators=100,
#             learning_rate=0.05,
#             max_depth=5,
#             random_state=42,
#             verbose=-1
#         )
    
#     rfe = RFE(
#         estimator=estimator,
#         n_features_to_select=top_n,
#         step=0.1,  # 10%씩 제거
#         verbose=0
#     )
    
#     rfe.fit(X_train, y_train)
#     rfe_features = X_train.columns[rfe.support_].tolist()

    
#     if task == 'class':
#         rf_model = RandomForestClassifier(
#             n_estimators=100,
#             max_depth=10,
#             random_state=42,
#             n_jobs=-1
#         )
#     else:
#         rf_model = RandomForestRegressor(
#             n_estimators=100,
#             max_depth=10,
#             random_state=42,
#             n_jobs=-1
#         )
    
#     rf_model.fit(X_train, y_train)
#     rf_importances = rf_model.feature_importances_
#     rf_idx = np.argsort(rf_importances)[::-1][:top_n]
#     rf_features = X_train.columns[rf_idx].tolist()
#     all_features = mi_features + rfe_features + rf_features
#     feature_votes = Counter(all_features)
#     selected_features = [feat for feat, _ in feature_votes.most_common(top_n)]

#     if len(selected_features) < top_n:
#         remaining = top_n - len(selected_features)
#         for feat in mi_features:
#             if feat not in selected_features:
#                 selected_features.append(feat)
#                 remaining -= 1
#                 if remaining == 0:
#                     break
    
#     return selected_features, {
#         'mi_features': mi_features,
#         'rfe_features': rfe_features,
#         'rf_features': rf_features,
#         'feature_votes': feature_votes,
#         'mi_scores': dict(zip(X_train.columns, mi_scores)),
#         'rf_importances': dict(zip(X_train.columns, rf_importances))
#     }

# # ============================================================================
# # 전체 파이프라인 
# # ============================================================================

# def build_complete_pipeline_corrected(df_raw, train_start_date, 
#                                      method='tvt', target_type='direction', **kwargs):
#     """

#     1. Feature Engineering (전체 데이터)
#     2. Target 생성 (전체 데이터)  
#     3. Train/Val/Test Split
#     4. Missing Value Handling 
#     5. Feature Selection 
#     6. Scaling (Train에서만 Fit)
    
#     Reference:
#     - "Feature Selection After Split" (Stack Overflow, 2019)
#     - "Scaling After Feature Selection" (Reddit, 2023)
#     """
    
#     df = df_raw.copy()
    
#     # Target 생성 
#     df = create_targets(df)
    
#     # 과거 가격 Lag
#     df = add_price_lag_features_first(df)
    
#     # 기술적 지표
#     df = calculate_technical_indicators(df)
    
#     # 시간 주기성
#     df = add_temporal_cyclic_features(df)
    
#     # BTC-ETH 교차 특징
#     df = add_enhanced_cross_crypto_features(df)
    
#     # 변동성 체제
#     df = add_volatility_regime_features(df)
    
#     # 상호작용 특징
#     df = add_interaction_features(df)
    
#     # 누적/연속 특징
#     df = add_cumulative_streak_features(df)
    
#     # 백분위수 특징
#     df = add_percentile_features(df)
    
#     # 정규화 가격 Lag
#     df = add_normalized_price_lags(df)
    
#     # Raw 가격 제거
#     df = remove_raw_prices_and_transform(df)
    
#     # Lag 적용 (감성, 온체인)
#     df = apply_lag_features(df, news_lag=2, onchain_lag=1)


#     # 1. 원본 VIX 확인
#     if 'vix_VIX' in df.columns:
#         vix_missing = df['vix_VIX'].isnull().sum()

#     # 2. VOLATILITY_20 확인
#     if 'VOLATILITY_20' in df.columns:
#         vol_missing = df['VOLATILITY_20'].isnull().sum()

#     # 3. 상호작용 특징 확인
#     if 'VIX_ETH_Vol_Cross' in df.columns:
#         cross_missing = df['VIX_ETH_Vol_Cross'].isnull().sum()


#     # 4. Lag 적용 후 확인
#     if 'VIX_ETH_Vol_Cross_lag1' in df.columns:
#         cross_lag_missing = df['VIX_ETH_Vol_Cross_lag1'].isnull().sum()


#     pd.set_option('display.max_columns', None)
#     column_list = df.columns.tolist()
#     df = df.iloc[:-1]  
#     split_kwargs = {}
#     if method == 'tvt':
#         if 'train_ratio' in kwargs:
#             split_kwargs['train_ratio'] = kwargs['train_ratio']
#         if 'val_ratio' in kwargs:
#             split_kwargs['val_ratio'] = kwargs['val_ratio']
#         splits = split_tvt_method(df, train_start_date, **split_kwargs)
#     elif method == 'walk_forward':
#         if 'n_splits' in kwargs:
#             split_kwargs['n_splits'] = kwargs['n_splits']
#         if 'initial_train_size' in kwargs:
#             split_kwargs['initial_train_size'] = kwargs['initial_train_size']
#         if 'test_size' in kwargs:
#             split_kwargs['test_size'] = kwargs['test_size']
#         splits = split_walk_forward_method(df, train_start_date, **split_kwargs)
#     else:
#         raise ValueError(f"Unknown method: {method}")
    
#     # ===================================================================
#     # PHASE 3: 각 Split에 대해 Missing/Selection/Scaling 수행
#     # ===================================================================

    
#     if method == 'tvt':
#             result = process_single_split(
#         splits, 
#         target_type=target_type,  
#         top_n=40                 
#         )
#     else:
#             result = [
#         process_single_split(
#             fold, 
#             target_type=target_type,  
#             top_n=40,
#             fold_idx=i+1
#         ) 
#         for i, fold in enumerate(splits)
#         ]
#     return result


# # ============================================================================
# # Split 함수들 
# # ============================================================================

# def split_tvt_method(df, train_start_date, train_ratio=0.7, val_ratio=0.15):
#     """TVT 분할 (결측치 처리 X, 단순 분할만)"""
    
#     df_period = df[df['date'] >= train_start_date].copy()
    
#     n = len(df_period)
#     train_end = int(n * train_ratio)
#     val_end = int(n * (train_ratio + val_ratio))
    
#     train_df = df_period.iloc[:train_end].copy()
#     val_df = df_period.iloc[train_end:val_end].copy()
#     test_df = df_period.iloc[val_end:].copy()
    
#     print(f"  Train: {len(train_df)} ({train_df['date'].min().date()} ~ {train_df['date'].max().date()})")
#     print(f"  Val:   {len(val_df)} ({val_df['date'].min().date()} ~ {val_df['date'].max().date()})")
#     print(f"  Test:  {len(test_df)} ({test_df['date'].min().date()} ~ {test_df['date'].max().date()})")
    
#     return {'train': train_df, 'val': val_df, 'test': test_df}


# def split_walk_forward_method(df, train_start_date, 
#                               n_splits=None,
#                               initial_train_size=600, 
#                               val_size=60,      
#                               test_size=60,
#                               lookback=30):     
#     """
#     Walk-Forward 분할 (Anchored/Expanding Window)
    
#     설정:
#     - Initial Train: 600일
#     - Val: 60일
#     - Test: 60일
#     - Step: 60일
#     - n_splits: None이면 데이터 최대 활용하여 자동 계산
    
#     Reference:
#     - "Optimizing Forecast Accuracy in Cryptocurrency" (2025)
#     - Anchored Window: Train이 매 Fold마다 확장
#     """
    
#     df_period = df[df['date'] >= train_start_date].copy()
#     df_period = df_period.sort_values('date').reset_index(drop=True)
    
#     step = 60
    
#     if n_splits is None:
#         total_data = len(df_period)
#         min_required = initial_train_size + val_size + test_size
#         remaining = total_data - min_required
#         n_splits = (remaining // step) + 1
#         print(f"Auto-calculated n_splits: {n_splits} (from {total_data} days)")
    
#     folds = []
    
#     print(f"\n{'='*80}")
#     print(f"Walk-Forward Configuration")
#     print(f"{'='*80}")
#     print(f"Total data: {len(df_period)} days")
#     print(f"Train={initial_train_size}d, Val={val_size}d, Test={test_size}d, Step={step}d")
#     print(f"Lookback={lookback}d, Val sequences: {val_size - lookback}")
#     print(f"Target folds: {n_splits}")
#     print(f"{'='*80}\n")
    
#     for fold_idx in range(n_splits):
#         train_end_idx = initial_train_size + (fold_idx * step)
#         val_start_idx = train_end_idx
#         val_end_idx = val_start_idx + val_size
#         test_start_idx = val_end_idx
#         test_end_idx = test_start_idx + test_size
        
#         if test_end_idx > len(df_period):
#             print(f"Insufficient data: Fold {fold_idx+1} stopped (need {test_end_idx}, have {len(df_period)})")
#             break
        
#         train_fold = df_period.iloc[:train_end_idx].copy()
#         val_fold = df_period.iloc[val_start_idx:val_end_idx].copy()
#         test_fold = df_period.iloc[test_start_idx:test_end_idx].copy()
        
#         assert train_fold['date'].max() < val_fold['date'].min(), "Train/Val overlap detected!"
#         assert val_fold['date'].max() < test_fold['date'].min(), "Val/Test overlap detected!"
        
#         print(f"Fold {fold_idx + 1:2d}:")
#         print(f"  Train: {len(train_fold):4d}d  ({train_fold['date'].min().date()} ~ {train_fold['date'].max().date()})")
#         print(f"  Val:   {len(val_fold):4d}d  ({val_fold['date'].min().date()} ~ {val_fold['date'].max().date()})")
#         print(f"  Test:  {len(test_fold):4d}d  ({test_fold['date'].min().date()} ~ {test_fold['date'].max().date()})")
        
#         folds.append({
#             'train': train_fold,
#             'val': val_fold,
#             'test': test_fold,
#             'fold_idx': fold_idx + 1
#         })
    
#     print(f"\n{'='*80}")
#     print(f"Summary: {len(folds)} folds generated")
#     print(f"Total test days: {len(folds) * test_size}")
#     print(f"Test coverage: {folds[0]['test']['date'].min().date()} ~ {folds[-1]['test']['date'].max().date()}")
#     print(f"Data utilization: {(test_end_idx/len(df_period)*100):.1f}%")
#     print(f"{'='*80}\n")
    
#     return folds



# # ============================================================================
# # 핵심: 각 Split 처리 
# # ============================================================================

# def process_single_split(split_data, target_type='direction', top_n=40, fold_idx=None):
#     """
#     개선된 전처리 파이프라인
    
#     변경사항:
#     1. GridSearchCV 제거 (feature selection 단계에서)
#     2. 검증된 MI+RFE+RF 앙상블 사용
#     3. Multi-target 지원
#     """
    
#     train_df = split_data['train']
#     val_df = split_data['val']
#     test_df = split_data['test']
    
#     # ===== 1. 결측치 처리 =====
    
#     train_processed, missing_stats = handle_missing_values_paper_based(
#         train_df.copy(),
#         train_start_date=train_df['date'].min(),
#         is_train=True
#     )
    
#     val_processed = handle_missing_values_paper_based(
#         val_df.copy(),
#         train_start_date=val_df['date'].min(),
#         is_train=False,
#         train_stats=missing_stats
#     )
    
#     test_processed = handle_missing_values_paper_based(
#         test_df.copy(),
#         train_start_date=test_df['date'].min(),
#         is_train=False,
#         train_stats=missing_stats
#     )
    
#     target_cols = ['next_log_return', 'next_direction', 'next_close']
    
#     train_processed = train_processed.dropna(subset=target_cols).reset_index(drop=True)
#     val_processed = val_processed.dropna(subset=target_cols).reset_index(drop=True)
#     test_processed = test_processed.dropna(subset=target_cols).reset_index(drop=True)

    
#     feature_cols = [col for col in train_processed.columns 
#                    if col not in target_cols + ['date']]
    
#     X_train = train_processed[feature_cols]
#     y_train = train_processed[target_cols]
    
#     X_val = val_processed[feature_cols]
#     y_val = val_processed[target_cols]
    
#     X_test = test_processed[feature_cols]
#     y_test = test_processed[target_cols]

    
#     selected_features, selection_stats = select_features_multi_target(
#         X_train, 
#         y_train, 
#         target_type=target_type, 
#         top_n=top_n
#     )
    
#     X_train_sel = X_train[selected_features]
#     X_val_sel = X_val[selected_features]
#     X_test_sel = X_test[selected_features]
    
#     robust_scaler = RobustScaler()
#     standard_scaler = StandardScaler()
    
#     X_train_robust = robust_scaler.fit_transform(X_train_sel)
#     X_val_robust = robust_scaler.transform(X_val_sel)
#     X_test_robust = robust_scaler.transform(X_test_sel)
    
#     X_train_standard = standard_scaler.fit_transform(X_train_sel)
#     X_val_standard = standard_scaler.transform(X_val_sel)
#     X_test_standard = standard_scaler.transform(X_test_sel)
    
#     # ===== 6. 결과 패키징 =====
#     result = {
#         'train': {
#             'X_robust': X_train_robust,
#             'X_standard': X_train_standard,
#             'X_raw': X_train_sel,
#             'y': y_train.reset_index(drop=True), 
#             'dates': train_df['date'].reset_index(drop=True) 
#         },
#         'val': {
#             'X_robust': X_val_robust,
#             'X_standard': X_val_standard,
#             'X_raw': X_val_sel,
#             'y': y_val.reset_index(drop=True), 
#             'dates': val_df['date'].reset_index(drop=True)  
#         },
#         'test': {
#             'X_robust': X_test_robust,
#             'X_standard': X_test_standard,
#             'X_raw': X_test_sel,
#             'y': y_test.reset_index(drop=True),  
#             'dates': test_df['date'].reset_index(drop=True)  
#         },
#         'stats': {
#             'robust_scaler': robust_scaler,
#             'standard_scaler': standard_scaler,
#             'selected_features': selected_features,
#             'selection_stats': selection_stats,
#             'target_type': target_type,
#             'target_cols': target_cols
#         }
#     }
    
#     return result





In [7]:
class DirectionModels:
    
    @staticmethod
    def random_forest(X_train, y_train, X_val, y_val):
        from sklearn.model_selection import GridSearchCV
        param_grid = {
            'n_estimators': [100, 200, 300],
            'max_depth': [10, 15, 20],
            'min_samples_split': [10, 15, 20],
            'min_samples_leaf': [4, 6, 8],
            'max_features': ['sqrt', 'log2']
        }
        model = RandomForestClassifier(random_state=42, n_jobs=-1)
        grid_search = GridSearchCV(model, param_grid, cv=3, scoring='accuracy', n_jobs=-1, verbose=0)
        grid_search.fit(X_train, y_train)
        return grid_search.best_estimator_
    
    @staticmethod
    def lightgbm(X_train, y_train, X_val, y_val):
        import optuna
        optuna.logging.set_verbosity(optuna.logging.WARNING)
        
        def objective(trial):
            param = {
                'n_estimators': trial.suggest_int('n_estimators', 100, 300),
                'max_depth': trial.suggest_int('max_depth', 3, 10),
                'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.1),
                'num_leaves': trial.suggest_int('num_leaves', 20, 100),
                'subsample': trial.suggest_float('subsample', 0.6, 0.9),
                'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 0.9),
                'reg_alpha': trial.suggest_float('reg_alpha', 0.1, 1.0),
                'reg_lambda': trial.suggest_float('reg_lambda', 0.1, 1.0),
                'min_child_samples': trial.suggest_int('min_child_samples', 20, 50),
                'random_state': 42,
                'verbose': -1
            }
            model = LGBMClassifier(**param)
            model.fit(X_train, y_train, eval_set=[(X_val, y_val)],
                     callbacks=[early_stopping(50, verbose=False)])
            preds = model.predict(X_val)
            accuracy = (preds == y_val).sum() / len(y_val)
            return accuracy
        
        study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=42))
        study.optimize(objective, n_trials=30, show_progress_bar=False)
        model = LGBMClassifier(**study.best_params, random_state=42, verbose=-1)
        model.fit(X_train, y_train, eval_set=[(X_val, y_val)],
                 callbacks=[early_stopping(50, verbose=False)])
        return model
    
    @staticmethod
    def xgboost(X_train, y_train, X_val, y_val):
        import optuna
        optuna.logging.set_verbosity(optuna.logging.WARNING)
        
        def objective(trial):
            param = {
                'n_estimators': trial.suggest_int('n_estimators', 100, 300),
                'max_depth': trial.suggest_int('max_depth', 3, 10),
                'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.1),
                'subsample': trial.suggest_float('subsample', 0.6, 0.9),
                'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 0.9),
                'reg_alpha': trial.suggest_float('reg_alpha', 0.1, 1.0),
                'reg_lambda': trial.suggest_float('reg_lambda', 0.5, 2.0),
                'min_child_weight': trial.suggest_int('min_child_weight', 3, 10),
                'gamma': trial.suggest_float('gamma', 0.1, 0.5),
                'random_state': 42,
                'eval_metric': 'logloss'
            }
            model = XGBClassifier(**param)
            model.fit(X_train, y_train, eval_set=[(X_val, y_val)], verbose=False)
            preds = model.predict(X_val)
            accuracy = (preds == y_val).sum() / len(y_val)
            return accuracy
        
        study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=42))
        study.optimize(objective, n_trials=30, show_progress_bar=False)
        model = XGBClassifier(**study.best_params, random_state=42, eval_metric='logloss')
        model.fit(X_train, y_train, eval_set=[(X_val, y_val)], verbose=False)
        return model
    
    @staticmethod
    def svm(X_train, y_train, X_val, y_val):
        from sklearn.model_selection import GridSearchCV
        param_grid = {
            'C': [0.1, 1.0, 10.0],
            'gamma': ['scale', 'auto', 0.01, 0.1],
            'kernel': ['rbf', 'linear']
        }
        model = SVC(random_state=42, probability=True)
        grid_search = GridSearchCV(model, param_grid, cv=3, scoring='accuracy', n_jobs=-1, verbose=0)
        grid_search.fit(X_train, y_train)
        return grid_search.best_estimator_
    
    @staticmethod
    def lstm(X_train, y_train, X_val, y_val, input_shape):
        import optuna
        optuna.logging.set_verbosity(optuna.logging.WARNING)
        
        def objective(trial):
            units1 = trial.suggest_int('units1', 64, 256, step=32)
            units2 = trial.suggest_int('units2', 32, 128, step=32)
            dropout = trial.suggest_float('dropout', 0.2, 0.5)
            l2_reg = trial.suggest_float('l2_reg', 0.001, 0.1, log=True)
            learning_rate = trial.suggest_float('learning_rate', 0.0001, 0.01, log=True)
            
            model = Sequential([
                LSTM(units1, activation='tanh', return_sequences=True, 
                     input_shape=input_shape, kernel_regularizer=l2(l2_reg)),
                BatchNormalization(),
                Dropout(dropout),
                LSTM(units2, activation='tanh', kernel_regularizer=l2(l2_reg)),
                BatchNormalization(),
                Dropout(dropout),
                Dense(32, activation='relu', kernel_regularizer=l2(l2_reg)),
                Dropout(dropout * 0.7),
                Dense(1, activation='sigmoid')
            ])
            model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                         loss='binary_crossentropy', metrics=['accuracy'])
            early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
            model.fit(X_train, y_train, validation_data=(X_val, y_val),
                     epochs=50, batch_size=32, callbacks=[early_stop], verbose=0)
            _, accuracy = model.evaluate(X_val, y_val, verbose=0)
            return accuracy
        
        study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=42))
        study.optimize(objective, n_trials=20, show_progress_bar=False)
        
        best_params = study.best_params
        model = Sequential([
            LSTM(best_params['units1'], activation='tanh', return_sequences=True, 
                 input_shape=input_shape, kernel_regularizer=l2(best_params['l2_reg'])),
            BatchNormalization(),
            Dropout(best_params['dropout']),
            LSTM(best_params['units2'], activation='tanh', kernel_regularizer=l2(best_params['l2_reg'])),
            BatchNormalization(),
            Dropout(best_params['dropout']),
            Dense(32, activation='relu', kernel_regularizer=l2(best_params['l2_reg'])),
            Dropout(best_params['dropout'] * 0.7),
            Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=best_params['learning_rate']),
                     loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-7)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop, reduce_lr], verbose=0)
        return model
    
    @staticmethod
    def bilstm(X_train, y_train, X_val, y_val, input_shape):
        import optuna
        optuna.logging.set_verbosity(optuna.logging.WARNING)
        
        def objective(trial):
            units1 = trial.suggest_int('units1', 64, 256, step=32)
            units2 = trial.suggest_int('units2', 32, 128, step=32)
            dropout = trial.suggest_float('dropout', 0.2, 0.5)
            l2_reg = trial.suggest_float('l2_reg', 0.001, 0.1, log=True)
            learning_rate = trial.suggest_float('learning_rate', 0.0001, 0.01, log=True)
            
            model = Sequential([
                Bidirectional(LSTM(units1, return_sequences=True, kernel_regularizer=l2(l2_reg)), 
                             input_shape=input_shape),
                BatchNormalization(),
                Dropout(dropout),
                Bidirectional(LSTM(units2, kernel_regularizer=l2(l2_reg))),
                BatchNormalization(),
                Dropout(dropout),
                Dense(32, activation='relu', kernel_regularizer=l2(l2_reg)),
                Dropout(dropout * 0.7),
                Dense(1, activation='sigmoid')
            ])
            model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                         loss='binary_crossentropy', metrics=['accuracy'])
            early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
            model.fit(X_train, y_train, validation_data=(X_val, y_val),
                     epochs=50, batch_size=32, callbacks=[early_stop], verbose=0)
            _, accuracy = model.evaluate(X_val, y_val, verbose=0)
            return accuracy
        
        study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=42))
        study.optimize(objective, n_trials=20, show_progress_bar=False)
        
        best_params = study.best_params
        model = Sequential([
            Bidirectional(LSTM(best_params['units1'], return_sequences=True, kernel_regularizer=l2(best_params['l2_reg'])), 
                         input_shape=input_shape),
            BatchNormalization(),
            Dropout(best_params['dropout']),
            Bidirectional(LSTM(best_params['units2'], kernel_regularizer=l2(best_params['l2_reg']))),
            BatchNormalization(),
            Dropout(best_params['dropout']),
            Dense(32, activation='relu', kernel_regularizer=l2(best_params['l2_reg'])),
            Dropout(best_params['dropout'] * 0.7),
            Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=best_params['learning_rate']),
                     loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-7)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop, reduce_lr], verbose=0)
        return model
    
    @staticmethod
    def gru(X_train, y_train, X_val, y_val, input_shape):
        import optuna
        optuna.logging.set_verbosity(optuna.logging.WARNING)
        
        def objective(trial):
            units1 = trial.suggest_int('units1', 64, 256, step=32)
            units2 = trial.suggest_int('units2', 32, 128, step=32)
            dropout = trial.suggest_float('dropout', 0.2, 0.5)
            l2_reg = trial.suggest_float('l2_reg', 0.001, 0.1, log=True)
            learning_rate = trial.suggest_float('learning_rate', 0.0001, 0.01, log=True)
            
            model = Sequential([
                GRU(units1, activation='tanh', return_sequences=True, 
                    input_shape=input_shape, kernel_regularizer=l2(l2_reg)),
                BatchNormalization(),
                Dropout(dropout),
                GRU(units2, activation='tanh', kernel_regularizer=l2(l2_reg)),
                BatchNormalization(),
                Dropout(dropout),
                Dense(32, activation='relu', kernel_regularizer=l2(l2_reg)),
                Dropout(dropout * 0.7),
                Dense(1, activation='sigmoid')
            ])
            model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                         loss='binary_crossentropy', metrics=['accuracy'])
            early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
            model.fit(X_train, y_train, validation_data=(X_val, y_val),
                     epochs=50, batch_size=32, callbacks=[early_stop], verbose=0)
            _, accuracy = model.evaluate(X_val, y_val, verbose=0)
            return accuracy
        
        study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=42))
        study.optimize(objective, n_trials=20, show_progress_bar=False)
        
        best_params = study.best_params
        model = Sequential([
            GRU(best_params['units1'], activation='tanh', return_sequences=True, 
                input_shape=input_shape, kernel_regularizer=l2(best_params['l2_reg'])),
            BatchNormalization(),
            Dropout(best_params['dropout']),
            GRU(best_params['units2'], activation='tanh', kernel_regularizer=l2(best_params['l2_reg'])),
            BatchNormalization(),
            Dropout(best_params['dropout']),
            Dense(32, activation='relu', kernel_regularizer=l2(best_params['l2_reg'])),
            Dropout(best_params['dropout'] * 0.7),
            Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=best_params['learning_rate']),
                     loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-7)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop, reduce_lr], verbose=0)
        return model
    
    @staticmethod
    def stacked_lstm(X_train, y_train, X_val, y_val, input_shape):
        import optuna
        optuna.logging.set_verbosity(optuna.logging.WARNING)
        
        def objective(trial):
            units1 = trial.suggest_int('units1', 64, 256, step=32)
            units2 = trial.suggest_int('units2', 48, 128, step=16)
            units3 = trial.suggest_int('units3', 32, 96, step=16)
            dropout = trial.suggest_float('dropout', 0.2, 0.5)
            l2_reg = trial.suggest_float('l2_reg', 0.001, 0.1, log=True)
            learning_rate = trial.suggest_float('learning_rate', 0.0001, 0.01, log=True)
            
            model = Sequential([
                LSTM(units1, return_sequences=True, input_shape=input_shape, kernel_regularizer=l2(l2_reg)),
                BatchNormalization(),
                Dropout(dropout),
                LSTM(units2, return_sequences=True, kernel_regularizer=l2(l2_reg)),
                BatchNormalization(),
                Dropout(dropout),
                LSTM(units3, kernel_regularizer=l2(l2_reg)),
                BatchNormalization(),
                Dropout(dropout),
                Dense(32, activation='relu', kernel_regularizer=l2(l2_reg)),
                Dropout(dropout * 0.7),
                Dense(1, activation='sigmoid')
            ])
            model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate), 
                         loss='binary_crossentropy', metrics=['accuracy'])
            early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
            model.fit(X_train, y_train, validation_data=(X_val, y_val),
                     epochs=50, batch_size=32, callbacks=[early_stop], verbose=0)
            _, accuracy = model.evaluate(X_val, y_val, verbose=0)
            return accuracy
        
        study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=42))
        study.optimize(objective, n_trials=20, show_progress_bar=False)
        
        best_params = study.best_params
        model = Sequential([
            LSTM(best_params['units1'], return_sequences=True, input_shape=input_shape, kernel_regularizer=l2(best_params['l2_reg'])),
            BatchNormalization(),
            Dropout(best_params['dropout']),
            LSTM(best_params['units2'], return_sequences=True, kernel_regularizer=l2(best_params['l2_reg'])),
            BatchNormalization(),
            Dropout(best_params['dropout']),
            LSTM(best_params['units3'], kernel_regularizer=l2(best_params['l2_reg'])),
            BatchNormalization(),
            Dropout(best_params['dropout']),
            Dense(32, activation='relu', kernel_regularizer=l2(best_params['l2_reg'])),
            Dropout(best_params['dropout'] * 0.7),
            Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=best_params['learning_rate']), 
                     loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-7)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop, reduce_lr], verbose=0)
        return model
    
    @staticmethod
    def cnn_lstm(X_train, y_train, X_val, y_val, input_shape):
        import optuna
        optuna.logging.set_verbosity(optuna.logging.WARNING)
        
        def objective(trial):
            conv_filters1 = trial.suggest_int('conv_filters1', 32, 128, step=32)
            conv_filters2 = trial.suggest_int('conv_filters2', 16, 64, step=16)
            lstm_units1 = trial.suggest_int('lstm_units1', 64, 192, step=32)
            lstm_units2 = trial.suggest_int('lstm_units2', 32, 96, step=32)
            dropout_conv = trial.suggest_float('dropout_conv', 0.1, 0.3)
            dropout_lstm = trial.suggest_float('dropout_lstm', 0.2, 0.5)
            l2_reg = trial.suggest_float('l2_reg', 0.001, 0.1, log=True)
            learning_rate = trial.suggest_float('learning_rate', 0.0001, 0.01, log=True)
            
            model = Sequential([
                Conv1D(conv_filters1, 3, activation='relu', padding='same', 
                       input_shape=input_shape, kernel_regularizer=l2(l2_reg)),
                BatchNormalization(),
                MaxPooling1D(2),
                Dropout(dropout_conv),
                Conv1D(conv_filters2, 3, activation='relu', padding='same', kernel_regularizer=l2(l2_reg)),
                BatchNormalization(),
                Dropout(dropout_conv),
                LSTM(lstm_units1, return_sequences=True, kernel_regularizer=l2(l2_reg)),
                BatchNormalization(),
                Dropout(dropout_lstm),
                LSTM(lstm_units2, kernel_regularizer=l2(l2_reg)),
                BatchNormalization(),
                Dropout(dropout_lstm),
                Dense(32, activation='relu', kernel_regularizer=l2(l2_reg)),
                Dropout(dropout_lstm * 0.7),
                Dense(1, activation='sigmoid')
            ])
            model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate), 
                         loss='binary_crossentropy', metrics=['accuracy'])
            early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
            model.fit(X_train, y_train, validation_data=(X_val, y_val),
                     epochs=50, batch_size=32, callbacks=[early_stop], verbose=0)
            _, accuracy = model.evaluate(X_val, y_val, verbose=0)
            return accuracy
        
        study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=42))
        study.optimize(objective, n_trials=15, show_progress_bar=False)
        
        best_params = study.best_params
        model = Sequential([
            Conv1D(best_params['conv_filters1'], 3, activation='relu', padding='same', 
                   input_shape=input_shape, kernel_regularizer=l2(best_params['l2_reg'])),
            BatchNormalization(),
            MaxPooling1D(2),
            Dropout(best_params['dropout_conv']),
            Conv1D(best_params['conv_filters2'], 3, activation='relu', padding='same', kernel_regularizer=l2(best_params['l2_reg'])),
            BatchNormalization(),
            Dropout(best_params['dropout_conv']),
            LSTM(best_params['lstm_units1'], return_sequences=True, kernel_regularizer=l2(best_params['l2_reg'])),
            BatchNormalization(),
            Dropout(best_params['dropout_lstm']),
            LSTM(best_params['lstm_units2'], kernel_regularizer=l2(best_params['l2_reg'])),
            BatchNormalization(),
            Dropout(best_params['dropout_lstm']),
            Dense(32, activation='relu', kernel_regularizer=l2(best_params['l2_reg'])),
            Dropout(best_params['dropout_lstm'] * 0.7),
            Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=best_params['learning_rate']), 
                     loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-7)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop, reduce_lr], verbose=0)
        return model
    
    @staticmethod
    def cnn_gru(X_train, y_train, X_val, y_val, input_shape):
        model = Sequential([
            Conv1D(64, 3, activation='relu', padding='same', 
                   input_shape=input_shape, kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            MaxPooling1D(2),
            Dropout(0.2),
            Conv1D(32, 3, activation='relu', padding='same', kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.2),
            GRU(128, return_sequences=True, kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            GRU(64, kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            Dense(32, activation='relu', kernel_regularizer=l2(0.01)),
            Dropout(0.2),
            Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-7)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop, reduce_lr], verbose=0)
        return model
    
    @staticmethod
    def cnn_bilstm(X_train, y_train, X_val, y_val, input_shape):
        model = Sequential([
            Conv1D(64, 3, activation='relu', padding='same', 
                   input_shape=input_shape, kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            MaxPooling1D(2),
            Dropout(0.2),
            Bidirectional(LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01))),
            BatchNormalization(),
            Dropout(0.3),
            Bidirectional(LSTM(64, kernel_regularizer=l2(0.01))),
            BatchNormalization(),
            Dropout(0.3),
            Dense(32, activation='relu', kernel_regularizer=l2(0.01)),
            Dropout(0.2),
            Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-7)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop, reduce_lr], verbose=0)
        return model
    
    @staticmethod
    def lstm_attention(X_train, y_train, X_val, y_val, input_shape):
        inputs = Input(shape=input_shape)
        lstm_out = LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01))(inputs)
        lstm_out = BatchNormalization()(lstm_out)
        lstm_out = Dropout(0.3)(lstm_out)
        lstm_out = LSTM(64, return_sequences=True, kernel_regularizer=l2(0.01))(lstm_out)
        lstm_out = BatchNormalization()(lstm_out)
        lstm_out = Dropout(0.3)(lstm_out)
        attention = Attention()([lstm_out, lstm_out])
        combined = Add()([lstm_out, attention])
        pooled = GlobalAveragePooling1D()(combined)
        dense = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(pooled)
        dense = BatchNormalization()(dense)
        dense = Dropout(0.3)(dense)
        outputs = Dense(1, activation='sigmoid')(dense)
        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-7)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop, reduce_lr], verbose=0)
        return model
    
    @staticmethod
    def transformer(X_train, y_train, X_val, y_val, input_shape):
        inputs = Input(shape=input_shape)
        attn_output = MultiHeadAttention(num_heads=8, key_dim=64, dropout=0.1)(inputs, inputs)
        attn_output = Dropout(0.1)(attn_output)
        x = LayerNormalization(epsilon=1e-6)(inputs + attn_output)
        ff = Dense(128, activation='relu', kernel_regularizer=l2(0.01))(x)
        ff = Dropout(0.1)(ff)
        ff = Dense(input_shape[1], kernel_regularizer=l2(0.01))(ff)
        x = LayerNormalization(epsilon=1e-6)(x + ff)
        x = GlobalAveragePooling1D()(x)
        x = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)
        outputs = Dense(1, activation='sigmoid')(x)
        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-7)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=50, batch_size=32, callbacks=[early_stop, reduce_lr], verbose=0)
        return model
    
    @staticmethod
    def tcn(X_train, y_train, X_val, y_val, input_shape):
        inputs = Input(shape=input_shape)
        x = inputs
        for dilation_rate in [1, 2, 4, 8]:
            conv = Conv1D(64, 3, padding='causal', dilation_rate=dilation_rate,
                         activation='relu', kernel_regularizer=l2(0.01))(x)
            conv = BatchNormalization()(conv)
            conv = Dropout(0.2)(conv)
            x = Add()([x, conv]) if x.shape[-1] == 64 else conv
        x = GlobalAveragePooling1D()(x)
        x = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)
        outputs = Dense(1, activation='sigmoid')(x)
        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-7)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop, reduce_lr], verbose=0)
        return model
    
    @staticmethod
    def dtw_lstm(X_train, y_train, X_val, y_val, input_shape):
        model = Sequential([
            LSTM(128, return_sequences=True, input_shape=input_shape, 
                 kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            LSTM(96, return_sequences=True, kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            LSTM(64, kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            Dense(32, activation='relu', kernel_regularizer=l2(0.01)),
            Dropout(0.2),
            Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-7)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop, reduce_lr], verbose=0)
        return model


    @staticmethod
    def tabnet(X_train, y_train, X_val, y_val):
        import optuna
        optuna.logging.set_verbosity(optuna.logging.WARNING)

        def objective(trial):
            param = {
                'n_d': trial.suggest_int('n_d', 32, 128, step=32),
                'n_a': trial.suggest_int('n_a', 32, 128, step=32),
                'n_steps': trial.suggest_int('n_steps', 3, 7),
                'gamma': trial.suggest_float('gamma', 1.0, 2.0),
                'lambda_sparse': trial.suggest_float('lambda_sparse', 1e-5, 1e-3, log=True),
                'momentum': trial.suggest_float('momentum', 0.1, 0.5),
                'optimizer_params': dict(lr=trial.suggest_float('lr', 1e-3, 5e-2, log=True)),
                'mask_type': 'entmax',
                'n_independent': 2,
                'n_shared': 2,
                'scheduler_params': {"step_size": 50, "gamma": 0.9},
                'scheduler_fn': torch.optim.lr_scheduler.StepLR,
                'verbose': 0,
                'seed': 42
            }
            model = TabNetClassifier(**param, optimizer_fn=torch.optim.Adam)
            model.fit(X_train, y_train, eval_set=[(X_val, y_val)],
                     max_epochs=100, patience=20, batch_size=256, virtual_batch_size=128)
            preds = model.predict(X_val)
            accuracy = (preds == y_val).sum() / len(y_val)
            return accuracy

        study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=42))
        study.optimize(objective, n_trials=20, show_progress_bar=False)

        best_params = study.best_params
        model = TabNetClassifier(
            n_d=best_params['n_d'], n_a=best_params['n_a'], n_steps=best_params['n_steps'],
            gamma=best_params['gamma'], lambda_sparse=best_params['lambda_sparse'],
            momentum=best_params['momentum'], optimizer_params=dict(lr=best_params['lr']),
            mask_type='entmax', n_independent=2, n_shared=2,
            scheduler_params={"step_size": 50, "gamma": 0.9},
            scheduler_fn=torch.optim.lr_scheduler.StepLR,
            optimizer_fn=torch.optim.Adam, verbose=0, seed=42
        )
        model.fit(X_train, y_train, eval_set=[(X_val, y_val)],
                 max_epochs=100, patience=20, batch_size=256, virtual_batch_size=128)
        return model

    @staticmethod
    def informer(X_train, y_train, X_val, y_val, input_shape):
        import optuna
        optuna.logging.set_verbosity(optuna.logging.WARNING)

        def objective(trial):
            num_heads = trial.suggest_int('num_heads', 2, 8, step=2)
            key_dim = trial.suggest_int('key_dim', 16, 64, step=16)
            dropout = trial.suggest_float('dropout', 0.1, 0.3)
            l2_reg = trial.suggest_float('l2_reg', 0.001, 0.1, log=True)
            learning_rate = trial.suggest_float('learning_rate', 0.0001, 0.01, log=True)

            inputs = Input(shape=input_shape)
            x = inputs
            for _ in range(2):
                attn = MultiHeadAttention(num_heads=num_heads, key_dim=key_dim, dropout=dropout)(x, x)
                attn = Dropout(dropout)(attn)
                x = LayerNormalization(epsilon=1e-6)(x + attn)
                x = Conv1D(input_shape[1], 1, activation='relu', kernel_regularizer=l2(l2_reg))(x)
                x = MaxPooling1D(2, padding='same')(x)

            x = GlobalAveragePooling1D()(x)
            x = Dense(64, activation='relu', kernel_regularizer=l2(l2_reg))(x)
            x = BatchNormalization()(x)
            x = Dropout(dropout + 0.2)(x)
            outputs = Dense(1, activation='sigmoid')(x)

            model = Model(inputs=inputs, outputs=outputs)
            model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                         loss='binary_crossentropy', metrics=['accuracy'])
            early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
            model.fit(X_train, y_train, validation_data=(X_val, y_val),
                     epochs=50, batch_size=32, callbacks=[early_stop], verbose=0)
            _, accuracy = model.evaluate(X_val, y_val, verbose=0)
            return accuracy

        study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=42))
        study.optimize(objective, n_trials=15, show_progress_bar=False)

        best_params = study.best_params
        inputs = Input(shape=input_shape)
        x = inputs
        for _ in range(2):
            attn = MultiHeadAttention(num_heads=best_params['num_heads'], key_dim=best_params['key_dim'], 
                                      dropout=best_params['dropout'])(x, x)
            attn = Dropout(best_params['dropout'])(attn)
            x = LayerNormalization(epsilon=1e-6)(x + attn)
            x = Conv1D(input_shape[1], 1, activation='relu', kernel_regularizer=l2(best_params['l2_reg']))(x)
            x = MaxPooling1D(2, padding='same')(x)

        x = GlobalAveragePooling1D()(x)
        x = Dense(64, activation='relu', kernel_regularizer=l2(best_params['l2_reg']))(x)
        x = BatchNormalization()(x)
        x = Dropout(best_params['dropout'] + 0.2)(x)
        outputs = Dense(1, activation='sigmoid')(x)

        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=best_params['learning_rate']),
                     loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-7)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop, reduce_lr], verbose=0)
        return model

    @staticmethod
    def nbeats(X_train, y_train, X_val, y_val, input_shape):
        model = Sequential([
            Flatten(input_shape=input_shape),
            Dense(256, activation='relu', kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            Dense(256, activation='relu', kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            Dense(128, activation='relu', kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            Dense(64, activation='relu', kernel_regularizer=l2(0.01)),
            Dropout(0.2),
            Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-7)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop, reduce_lr], verbose=0)
        return model

    @staticmethod
    def temporal_fusion_transformer(X_train, y_train, X_val, y_val, input_shape):
        inputs = Input(shape=input_shape)
        x = Flatten()(inputs)
        var_weights = Dense(input_shape[0] * input_shape[1], activation='softmax',
                           kernel_regularizer=l2(0.01))(x)
        var_weights = Reshape(input_shape)(var_weights)
        selected = Multiply()([inputs, var_weights])

        x = LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01))(selected)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)

        attn = MultiHeadAttention(num_heads=4, key_dim=32, dropout=0.1)(x, x)
        x = Add()([x, attn])
        x = LayerNormalization(epsilon=1e-6)(x)

        x = GlobalAveragePooling1D()(x)
        x = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = Dropout(0.3)(x)
        outputs = Dense(1, activation='sigmoid')(x)

        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-7)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop, reduce_lr], verbose=0)
        return model

    @staticmethod
    def performer(X_train, y_train, X_val, y_val, input_shape):
        inputs = Input(shape=input_shape)
        x = Dense(128, kernel_regularizer=l2(0.01))(inputs)
        x = LayerNormalization(epsilon=1e-6)(x)

        for _ in range(2):
            attn = MultiHeadAttention(num_heads=4, key_dim=32, dropout=0.1)(x, x)
            attn = Dropout(0.1)(attn)
            x = LayerNormalization(epsilon=1e-6)(x + attn)

            ff = Dense(256, activation='gelu', kernel_regularizer=l2(0.01))(x)
            ff = Dropout(0.1)(ff)
            ff = Dense(128, kernel_regularizer=l2(0.01))(ff)
            x = LayerNormalization(epsilon=1e-6)(x + ff)

        x = GlobalAveragePooling1D()(x)
        x = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)
        outputs = Dense(1, activation='sigmoid')(x)

        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-7)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop, reduce_lr], verbose=0)
        return model

    @staticmethod
    def patchtst(X_train, y_train, X_val, y_val, input_shape, patch_len=16, stride=8):
        inputs = Input(shape=input_shape)
        x = inputs
        patches = []
        for i in range(0, input_shape[0] - patch_len + 1, stride):
            patch = Lambda(lambda z: z[:, i:i+patch_len, :])(x)
            patch = Flatten()(patch)
            patch = Dense(128, kernel_regularizer=l2(0.01))(patch)
            patches.append(patch)

        if len(patches) > 1:
            x = tf.stack(patches, axis=1)
        else:
            x = tf.expand_dims(patches[0], axis=1)

        for _ in range(2):
            attn = MultiHeadAttention(num_heads=4, key_dim=32, dropout=0.1)(x, x)
            attn = Dropout(0.1)(attn)
            x = LayerNormalization(epsilon=1e-6)(x + attn)

            ff = Dense(256, activation='gelu', kernel_regularizer=l2(0.01))(x)
            ff = Dropout(0.1)(ff)
            ff = Dense(128, kernel_regularizer=l2(0.01))(ff)
            x = LayerNormalization(epsilon=1e-6)(x + ff)

        x = GlobalAveragePooling1D()(x)
        x = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)
        outputs = Dense(1, activation='sigmoid')(x)

        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-7)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop, reduce_lr], verbose=0)
        return model

    @staticmethod
    def autoformer(X_train, y_train, X_val, y_val, input_shape):
        inputs = Input(shape=input_shape)
        x = inputs
        trend = tf.keras.layers.AveragePooling1D(pool_size=25, strides=1, padding='same')(x)
        seasonal = tf.subtract(x, trend)

        x = seasonal
        for _ in range(2):
            attn = MultiHeadAttention(num_heads=4, key_dim=32, dropout=0.1)(x, x)
            attn = Dropout(0.1)(attn)
            x = LayerNormalization(epsilon=1e-6)(x + attn)

            ff = Dense(128, activation='gelu', kernel_regularizer=l2(0.01))(x)
            ff = Dropout(0.1)(ff)
            ff = Dense(input_shape[1], kernel_regularizer=l2(0.01))(ff)
            x = LayerNormalization(epsilon=1e-6)(x + ff)

        seasonal_out = GlobalAveragePooling1D()(x)
        trend_out = GlobalAveragePooling1D()(trend)
        combined = Concatenate()([seasonal_out, trend_out])

        combined = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(combined)
        combined = BatchNormalization()(combined)
        combined = Dropout(0.3)(combined)
        outputs = Dense(1, activation='sigmoid')(combined)

        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-7)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop, reduce_lr], verbose=0)
        return model

    @staticmethod
    def itransformer(X_train, y_train, X_val, y_val, input_shape):
        inputs = Input(shape=input_shape)
        x = tf.transpose(inputs, perm=[0, 2, 1])
        x = Dense(64, kernel_regularizer=l2(0.01))(x)
        x = LayerNormalization(epsilon=1e-6)(x)

        for _ in range(2):
            attn = MultiHeadAttention(num_heads=4, key_dim=16, dropout=0.1)(x, x)
            attn = Dropout(0.1)(attn)
            x = LayerNormalization(epsilon=1e-6)(x + attn)

            ff = Dense(128, activation='gelu', kernel_regularizer=l2(0.01))(x)
            ff = Dropout(0.1)(ff)
            ff = Dense(64, kernel_regularizer=l2(0.01))(ff)
            x = LayerNormalization(epsilon=1e-6)(x + ff)

        x = GlobalAveragePooling1D()(x)
        x = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)
        outputs = Dense(1, activation='sigmoid')(x)

        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-7)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop, reduce_lr], verbose=0)
        return model

    @staticmethod
    def ethervoyant(X_train, y_train, X_val, y_val, input_shape):
        inputs = Input(shape=input_shape)
        conv1 = Conv1D(64, 3, activation='relu', padding='same', kernel_regularizer=l2(0.01))(inputs)
        conv1 = BatchNormalization()(conv1)
        conv1 = Dropout(0.2)(conv1)

        conv2 = Conv1D(64, 5, activation='relu', padding='same', kernel_regularizer=l2(0.01))(inputs)
        conv2 = BatchNormalization()(conv2)
        conv2 = Dropout(0.2)(conv2)

        x = Concatenate()([conv1, conv2])
        x = MaxPooling1D(2)(x)

        x = Bidirectional(LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01)))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)

        attn = MultiHeadAttention(num_heads=4, key_dim=32, dropout=0.1)(x, x)
        x = Add()([x, attn])
        x = LayerNormalization(epsilon=1e-6)(x)

        x = Bidirectional(LSTM(64, kernel_regularizer=l2(0.01)))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)

        x = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = Dropout(0.3)(x)
        outputs = Dense(1, activation='sigmoid')(x)

        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-7)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop, reduce_lr], verbose=0)
        return model

    @staticmethod
    def vmd_hybrid(X_train, y_train, X_val, y_val, input_shape):
        inputs = Input(shape=input_shape)
        x = Conv1D(32, 1, padding='same', kernel_regularizer=l2(0.01))(inputs)
        x = BatchNormalization()(x)

        low_freq = AveragePooling1D(pool_size=5, strides=1, padding='same')(x)
        low_freq = Conv1D(32, 3, activation='relu', padding='same', kernel_regularizer=l2(0.01))(low_freq)

        mid_freq = x - low_freq
        mid_freq = Conv1D(32, 3, activation='relu', padding='same', kernel_regularizer=l2(0.01))(mid_freq)

        high_freq = x - low_freq - mid_freq
        high_freq = Conv1D(32, 3, activation='relu', padding='same', kernel_regularizer=l2(0.01))(high_freq)

        x = Concatenate()([low_freq, mid_freq, high_freq])
        x = BatchNormalization()(x)
        x = Dropout(0.2)(x)

        for _ in range(2):
            attn = MultiHeadAttention(num_heads=4, key_dim=32, dropout=0.1)(x, x)
            attn = Dropout(0.1)(attn)
            x = LayerNormalization(epsilon=1e-6)(x + attn)

            ff = Dense(128, activation='gelu', kernel_regularizer=l2(0.01))(x)
            ff = Dropout(0.1)(ff)
            ff = Dense(96, kernel_regularizer=l2(0.01))(ff)
            x = LayerNormalization(epsilon=1e-6)(x + ff)

        x = GlobalAveragePooling1D()(x)
        x = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)
        outputs = Dense(1, activation='sigmoid')(x)

        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-7)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop, reduce_lr], verbose=0)
        return model

    @staticmethod
    def logistic_regression(X_train, y_train, X_val, y_val):
        from sklearn.model_selection import GridSearchCV
        param_grid = {
            'C': [0.01, 0.1, 1.0, 10.0],
            'penalty': ['l2'],
            'solver': ['lbfgs', 'liblinear']
        }
        model = LogisticRegression(max_iter=1000, random_state=42, n_jobs=-1)
        grid_search = GridSearchCV(model, param_grid, cv=3, scoring='accuracy', n_jobs=-1, verbose=0)
        grid_search.fit(X_train, y_train)
        return grid_search.best_estimator_

    @staticmethod
    def naive_bayes(X_train, y_train, X_val, y_val):
        model = GaussianNB()
        model.fit(X_train, y_train)
        return model

    @staticmethod
    def knn(X_train, y_train, X_val, y_val):
        from sklearn.model_selection import GridSearchCV
        param_grid = {
            'n_neighbors': [3, 5, 7, 9, 11],
            'weights': ['uniform', 'distance'],
            'metric': ['euclidean', 'manhattan', 'minkowski']
        }
        model = KNeighborsClassifier(n_jobs=-1)
        grid_search = GridSearchCV(model, param_grid, cv=3, scoring='accuracy', n_jobs=-1, verbose=0)
        grid_search.fit(X_train, y_train)
        return grid_search.best_estimator_

    @staticmethod
    def adaboost(X_train, y_train, X_val, y_val):
        from sklearn.model_selection import GridSearchCV
        param_grid = {
            'n_estimators': [50, 100, 200, 300],
            'learning_rate': [0.1, 0.5, 1.0, 1.5]
        }
        model = AdaBoostClassifier(algorithm='SAMME', random_state=42)
        grid_search = GridSearchCV(model, param_grid, cv=3, scoring='accuracy', n_jobs=-1, verbose=0)
        grid_search.fit(X_train, y_train)
        return grid_search.best_estimator_

    @staticmethod
    def catboost(X_train, y_train, X_val, y_val):
        import optuna
        optuna.logging.set_verbosity(optuna.logging.WARNING)

        def objective(trial):
            param = {
                'iterations': trial.suggest_int('iterations', 100, 300),
                'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
                'depth': trial.suggest_int('depth', 4, 10),
                'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1, 10),
                'border_count': trial.suggest_int('border_count', 32, 255),
                'random_seed': 42,
                'verbose': False
            }
            model = CatBoostClassifier(**param)
            model.fit(X_train, y_train, eval_set=(X_val, y_val),
                     early_stopping_rounds=50, verbose=False)
            preds = model.predict(X_val)
            accuracy = (preds == y_val).sum() / len(y_val)
            return accuracy

        study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=42))
        study.optimize(objective, n_trials=30, show_progress_bar=False)

        model = CatBoostClassifier(**study.best_params, random_seed=42, verbose=False)
        model.fit(X_train, y_train, eval_set=(X_val, y_val),
                 early_stopping_rounds=50, verbose=False)
        return model


    @staticmethod
    def decision_tree(X_train, y_train, X_val, y_val):
        from sklearn.model_selection import GridSearchCV
        param_grid = {
            'max_depth': [10, 15, 20, None],
            'min_samples_split': [10, 15, 20],
            'min_samples_leaf': [4, 6, 8],
            'criterion': ['gini', 'entropy']
        }
        model = DecisionTreeClassifier(random_state=42)
        grid_search = GridSearchCV(model, param_grid, cv=3, scoring='accuracy', n_jobs=-1, verbose=0)
        grid_search.fit(X_train, y_train)
        return grid_search.best_estimator_

    @staticmethod
    def extra_trees(X_train, y_train, X_val, y_val):
        from sklearn.model_selection import GridSearchCV
        param_grid = {
            'n_estimators': [100, 200, 300],
            'max_depth': [10, 15, 20],
            'min_samples_split': [10, 15, 20],
            'min_samples_leaf': [4, 6, 8],
            'max_features': ['sqrt', 'log2']
        }
        model = ExtraTreesClassifier(random_state=42, n_jobs=-1)
        grid_search = GridSearchCV(model, param_grid, cv=3, scoring='accuracy', n_jobs=-1, verbose=0)
        grid_search.fit(X_train, y_train)
        return grid_search.best_estimator_

    @staticmethod
    def bagging(X_train, y_train, X_val, y_val):
        from sklearn.model_selection import GridSearchCV
        param_grid = {
            'n_estimators': [50, 100, 150],
            'max_samples': [0.6, 0.8, 1.0],
            'max_features': [0.6, 0.8, 1.0]
        }
        base_estimator = DecisionTreeClassifier(max_depth=10, random_state=42)
        model = BaggingClassifier(estimator=base_estimator, random_state=42, n_jobs=-1)
        grid_search = GridSearchCV(model, param_grid, cv=3, scoring='accuracy', n_jobs=-1, verbose=0)
        grid_search.fit(X_train, y_train)
        return grid_search.best_estimator_

    @staticmethod
    def gradient_boosting(X_train, y_train, X_val, y_val):
        from sklearn.model_selection import GridSearchCV
        param_grid = {
            'n_estimators': [100, 200, 300],
            'learning_rate': [0.01, 0.05, 0.1],
            'max_depth': [5, 7, 9],
            'subsample': [0.7, 0.8, 0.9],
            'min_samples_split': [10, 15, 20]
        }
        model = GradientBoostingClassifier(random_state=42)
        grid_search = GridSearchCV(model, param_grid, cv=3, scoring='accuracy', n_jobs=-1, verbose=0)
        grid_search.fit(X_train, y_train)
        return grid_search.best_estimator_

    @staticmethod
    def simple_rnn(X_train, y_train, X_val, y_val, input_shape):
        import optuna
        optuna.logging.set_verbosity(optuna.logging.WARNING)

        def objective(trial):
            units1 = trial.suggest_int('units1', 64, 192, step=32)
            units2 = trial.suggest_int('units2', 32, 96, step=32)
            dropout = trial.suggest_float('dropout', 0.2, 0.5)
            l2_reg = trial.suggest_float('l2_reg', 0.001, 0.1, log=True)
            learning_rate = trial.suggest_float('learning_rate', 0.0001, 0.01, log=True)

            model = Sequential([
                SimpleRNN(units1, activation='tanh', return_sequences=True,
                         input_shape=input_shape, kernel_regularizer=l2(l2_reg)),
                BatchNormalization(),
                Dropout(dropout),
                SimpleRNN(units2, activation='tanh', kernel_regularizer=l2(l2_reg)),
                BatchNormalization(),
                Dropout(dropout),
                Dense(32, activation='relu', kernel_regularizer=l2(l2_reg)),
                Dropout(dropout * 0.7),
                Dense(1, activation='sigmoid')
            ])
            model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                         loss='binary_crossentropy', metrics=['accuracy'])
            early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
            model.fit(X_train, y_train, validation_data=(X_val, y_val),
                     epochs=50, batch_size=32, callbacks=[early_stop], verbose=0)
            _, accuracy = model.evaluate(X_val, y_val, verbose=0)
            return accuracy

        study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=42))
        study.optimize(objective, n_trials=15, show_progress_bar=False)

        best_params = study.best_params
        model = Sequential([
            SimpleRNN(best_params['units1'], activation='tanh', return_sequences=True,
                     input_shape=input_shape, kernel_regularizer=l2(best_params['l2_reg'])),
            BatchNormalization(),
            Dropout(best_params['dropout']),
            SimpleRNN(best_params['units2'], activation='tanh', kernel_regularizer=l2(best_params['l2_reg'])),
            BatchNormalization(),
            Dropout(best_params['dropout']),
            Dense(32, activation='relu', kernel_regularizer=l2(best_params['l2_reg'])),
            Dropout(best_params['dropout'] * 0.7),
            Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=best_params['learning_rate']),
                     loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-7)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop, reduce_lr], verbose=0)
        return model

    @staticmethod
    def mlp(X_train, y_train, X_val, y_val):
        import optuna
        optuna.logging.set_verbosity(optuna.logging.WARNING)

        def objective(trial):
            units1 = trial.suggest_int('units1', 128, 512, step=128)
            units2 = trial.suggest_int('units2', 64, 256, step=64)
            units3 = trial.suggest_int('units3', 32, 128, step=32)
            dropout = trial.suggest_float('dropout', 0.2, 0.5)
            l2_reg = trial.suggest_float('l2_reg', 0.001, 0.1, log=True)
            learning_rate = trial.suggest_float('learning_rate', 0.0001, 0.01, log=True)

            input_dim = X_train.shape[1]
            model = Sequential([
                Dense(units1, activation='relu', input_dim=input_dim, kernel_regularizer=l2(l2_reg)),
                BatchNormalization(),
                Dropout(dropout),
                Dense(units2, activation='relu', kernel_regularizer=l2(l2_reg)),
                BatchNormalization(),
                Dropout(dropout),
                Dense(units3, activation='relu', kernel_regularizer=l2(l2_reg)),
                BatchNormalization(),
                Dropout(dropout),
                Dense(32, activation='relu', kernel_regularizer=l2(l2_reg)),
                Dropout(dropout * 0.7),
                Dense(1, activation='sigmoid')
            ])
            model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                         loss='binary_crossentropy', metrics=['accuracy'])
            early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
            model.fit(X_train, y_train, validation_data=(X_val, y_val),
                     epochs=50, batch_size=32, callbacks=[early_stop], verbose=0)
            _, accuracy = model.evaluate(X_val, y_val, verbose=0)
            return accuracy

        study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=42))
        study.optimize(objective, n_trials=15, show_progress_bar=False)

        best_params = study.best_params
        input_dim = X_train.shape[1]
        model = Sequential([
            Dense(best_params['units1'], activation='relu', input_dim=input_dim, kernel_regularizer=l2(best_params['l2_reg'])),
            BatchNormalization(),
            Dropout(best_params['dropout']),
            Dense(best_params['units2'], activation='relu', kernel_regularizer=l2(best_params['l2_reg'])),
            BatchNormalization(),
            Dropout(best_params['dropout']),
            Dense(best_params['units3'], activation='relu', kernel_regularizer=l2(best_params['l2_reg'])),
            BatchNormalization(),
            Dropout(best_params['dropout']),
            Dense(32, activation='relu', kernel_regularizer=l2(best_params['l2_reg'])),
            Dropout(best_params['dropout'] * 0.7),
            Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=best_params['learning_rate']),
                     loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-7)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop, reduce_lr], verbose=0)
        return model

    @staticmethod
    def emd_lstm(X_train, y_train, X_val, y_val, input_shape):
        inputs = Input(shape=input_shape)
        low_freq = tf.keras.layers.AveragePooling1D(pool_size=5, strides=1, padding='same')(inputs)
        low_freq = LSTM(64, return_sequences=True, kernel_regularizer=l2(0.01))(low_freq)

        high_freq = inputs - tf.keras.layers.AveragePooling1D(pool_size=5, strides=1, padding='same')(inputs)
        high_freq = LSTM(64, return_sequences=True, kernel_regularizer=l2(0.01))(high_freq)

        x = Concatenate()([low_freq, high_freq])
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)

        x = LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)

        x = LSTM(64, kernel_regularizer=l2(0.01))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)

        x = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = Dropout(0.2)(x)
        outputs = Dense(1, activation='sigmoid')(x)

        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-7)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop, reduce_lr], verbose=0)
        return model

    @staticmethod
    def hybrid_lstm_gru(X_train, y_train, X_val, y_val, input_shape):
        model = Sequential([
            LSTM(128, return_sequences=True, input_shape=input_shape, kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            GRU(96, return_sequences=True, kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            LSTM(64, return_sequences=True, kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            GRU(32, kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            Dense(32, activation='relu', kernel_regularizer=l2(0.01)),
            Dropout(0.2),
            Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-7)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop, reduce_lr], verbose=0)
        return model

    @staticmethod
    def parallel_cnn(X_train, y_train, X_val, y_val, input_shape):
        inputs = Input(shape=input_shape)

        conv1 = Conv1D(64, 3, activation='relu', padding='same', kernel_regularizer=l2(0.01))(inputs)
        conv1 = BatchNormalization()(conv1)
        conv1 = MaxPooling1D(2)(conv1)
        conv1 = Dropout(0.2)(conv1)

        conv2 = Conv1D(64, 5, activation='relu', padding='same', kernel_regularizer=l2(0.01))(inputs)
        conv2 = BatchNormalization()(conv2)
        conv2 = MaxPooling1D(2)(conv2)
        conv2 = Dropout(0.2)(conv2)

        conv3 = Conv1D(64, 7, activation='relu', padding='same', kernel_regularizer=l2(0.01))(inputs)
        conv3 = BatchNormalization()(conv3)
        conv3 = MaxPooling1D(2)(conv3)
        conv3 = Dropout(0.2)(conv3)

        x = Concatenate()([conv1, conv2, conv3])
        x = Conv1D(128, 3, activation='relu', padding='same', kernel_regularizer=l2(0.01))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)

        x = GlobalAveragePooling1D()(x)
        x = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)
        outputs = Dense(1, activation='sigmoid')(x)

        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-7)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop, reduce_lr], verbose=0)
        return model

    @staticmethod
    def stacking_ensemble(X_train, y_train, X_val, y_val):
        import optuna
        optuna.logging.set_verbosity(optuna.logging.WARNING)

        def objective(trial):
            rf_estimators = trial.suggest_int('rf_estimators', 50, 200)
            rf_depth = trial.suggest_int('rf_depth', 5, 15)
            xgb_estimators = trial.suggest_int('xgb_estimators', 50, 200)
            xgb_depth = trial.suggest_int('xgb_depth', 3, 10)
            lgbm_estimators = trial.suggest_int('lgbm_estimators', 50, 200)
            lgbm_depth = trial.suggest_int('lgbm_depth', 3, 10)

            base_learners = [
                ('rf', RandomForestClassifier(n_estimators=rf_estimators, max_depth=rf_depth, 
                                             random_state=42, n_jobs=-1)),
                ('xgb', XGBClassifier(n_estimators=xgb_estimators, max_depth=xgb_depth, 
                                     learning_rate=0.1, random_state=42)),
                ('lgbm', LGBMClassifier(n_estimators=lgbm_estimators, max_depth=lgbm_depth, 
                                       learning_rate=0.1, random_state=42, verbose=-1))
            ]
            meta_learner = LogisticRegression(max_iter=1000, random_state=42)
            model = StackingClassifier(estimators=base_learners, final_estimator=meta_learner, 
                                       cv=3, n_jobs=-1)
            model.fit(X_train, y_train)
            preds = model.predict(X_val)
            accuracy = (preds == y_val).sum() / len(y_val)
            return accuracy

        study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=42))
        study.optimize(objective, n_trials=20, show_progress_bar=False)

        best_params = study.best_params
        base_learners = [
            ('rf', RandomForestClassifier(n_estimators=best_params['rf_estimators'], 
                                         max_depth=best_params['rf_depth'], random_state=42, n_jobs=-1)),
            ('xgb', XGBClassifier(n_estimators=best_params['xgb_estimators'], 
                                 max_depth=best_params['xgb_depth'], learning_rate=0.1, random_state=42)),
            ('lgbm', LGBMClassifier(n_estimators=best_params['lgbm_estimators'], 
                                   max_depth=best_params['lgbm_depth'], learning_rate=0.1, 
                                   random_state=42, verbose=-1))
        ]
        meta_learner = LogisticRegression(max_iter=1000, random_state=42)
        model = StackingClassifier(estimators=base_learners, final_estimator=meta_learner, cv=5, n_jobs=-1)
        model.fit(X_train, y_train)
        return model

    @staticmethod
    def voting_hard(X_train, y_train, X_val, y_val):
        from sklearn.model_selection import GridSearchCV
        param_grid = {
            'rf__n_estimators': [50, 100, 150],
            'rf__max_depth': [10, 15, 20],
            'xgb__n_estimators': [50, 100, 150],
            'xgb__max_depth': [5, 7, 9]
        }
        estimators = [
            ('rf', RandomForestClassifier(random_state=42, n_jobs=-1)),
            ('xgb', XGBClassifier(random_state=42)),
            ('lgbm', LGBMClassifier(random_state=42, verbose=-1))
        ]
        model = VotingClassifier(estimators=estimators, voting='hard', n_jobs=-1)
        grid_search = GridSearchCV(model, param_grid, cv=3, scoring='accuracy', n_jobs=-1, verbose=0)
        grid_search.fit(X_train, y_train)
        return grid_search.best_estimator_

    @staticmethod
    def voting_soft(X_train, y_train, X_val, y_val):
        from sklearn.model_selection import GridSearchCV
        param_grid = {
            'rf__n_estimators': [50, 100, 150],
            'rf__max_depth': [10, 15, 20],
            'xgb__n_estimators': [50, 100, 150],
            'xgb__max_depth': [5, 7, 9]
        }
        estimators = [
            ('rf', RandomForestClassifier(random_state=42, n_jobs=-1)),
            ('xgb', XGBClassifier(random_state=42)),
            ('lgbm', LGBMClassifier(random_state=42, verbose=-1)),
            ('lr', LogisticRegression(max_iter=1000, random_state=42))
        ]
        model = VotingClassifier(estimators=estimators, voting='soft', n_jobs=-1)
        grid_search = GridSearchCV(model, param_grid, cv=3, scoring='accuracy', n_jobs=-1, verbose=0)
        grid_search.fit(X_train, y_train)
        return grid_search.best_estimator_

    @staticmethod
    def lstm_xgboost_hybrid(X_train, y_train, X_val, y_val, input_shape):
        inputs = Input(shape=input_shape)
        x = LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01))(inputs)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)
        x = LSTM(64, return_sequences=False, kernel_regularizer=l2(0.01))(x)
        x = BatchNormalization()(x)
        lstm_features = Dropout(0.3)(x)

        x = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(lstm_features)
        x = Dropout(0.2)(x)
        outputs = Dense(1, activation='sigmoid')(x)

        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-7)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop, reduce_lr], verbose=0)
        return model

    @staticmethod
    def residual_lstm(X_train, y_train, X_val, y_val, input_shape):
        inputs = Input(shape=input_shape)
        x = LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01))(inputs)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)

        lstm_out = LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01))(x)
        lstm_out = BatchNormalization()(lstm_out)
        x = Add()([x, lstm_out])
        x = Dropout(0.3)(x)

        x = LSTM(64, kernel_regularizer=l2(0.01))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)

        x = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = Dropout(0.2)(x)
        outputs = Dense(1, activation='sigmoid')(x)

        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-7)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop, reduce_lr], verbose=0)
        return model

    @staticmethod
    def wavenet(X_train, y_train, X_val, y_val, input_shape):
        inputs = Input(shape=input_shape)
        x = inputs
        skip_connections = []

        for dilation_rate in [1, 2, 4, 8, 16, 32]:
            conv = Conv1D(64, 2, padding='causal', dilation_rate=dilation_rate,
                         activation='relu', kernel_regularizer=l2(0.01))(x)
            conv = BatchNormalization()(conv)
            conv = Dropout(0.2)(conv)

            skip = Conv1D(64, 1, kernel_regularizer=l2(0.01))(conv)
            skip_connections.append(skip)

            res = Conv1D(64, 1, kernel_regularizer=l2(0.01))(conv)
            if x.shape[-1] != 64:
                x = Conv1D(64, 1, kernel_regularizer=l2(0.01))(x)
            x = Add()([x, res])

        x = Add()(skip_connections)
        x = Activation('relu')(x)
        x = GlobalAveragePooling1D()(x)

        x = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)
        outputs = Dense(1, activation='sigmoid')(x)

        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-7)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop, reduce_lr], verbose=0)
        return model


In [8]:
class ModelEvaluator:
    """모델 평가 및 백테스팅 (Task별 전략 구현)"""
    
    def __init__(self):
        self.results = []
        self.predictions={}

    
    def _predict_model(self, model, X):
        pred = model.predict(X)

        if isinstance(pred, list):
            cleaned = []
            for i, p in enumerate(pred):
                if isinstance(p, np.ndarray):
                    cleaned.append(p.squeeze() if p.shape[-1] == 1 else p)
                else:
                    cleaned.append(p)
            return cleaned
        else:
            return pred.squeeze() if pred.shape[-1] == 1 else pred
        
    def evaluate_classification_model(self, model, X_train, y_train, X_val, y_val, 
                                     X_test, y_test, test_returns, test_dates, model_name,
                                     is_deep_learning=False):
        """분류 모델 평가 - Binary Signal 전략"""
        
        # 예측
        train_pred = self._predict_model(model, X_train)
        val_pred = self._predict_model(model, X_val)
        test_pred = self._predict_model(model, X_test)
        
        # ===== 확률값 추출 =====
        test_pred_proba = None
        if is_deep_learning:
            test_pred_proba = test_pred.copy()
            # 멀티태스크: 분류 output만 선택
            if isinstance(train_pred, list):
                train_pred = train_pred[0]
                val_pred = val_pred[0]
                test_pred = test_pred[0]
                test_pred_proba = test_pred_proba[0] if isinstance(test_pred_proba, list) else test_pred_proba
            train_pred = (train_pred > 0.5).astype(int).ravel()
            val_pred = (val_pred > 0.5).astype(int).ravel()
            test_pred = (test_pred > 0.5).astype(int).ravel()
        else:
            # ML 모델 확률값 추출
            if hasattr(model, 'predict_proba'):
                test_pred_proba = model.predict_proba(X_test)
        
        # 분류 지표
        train_acc = accuracy_score(y_train, train_pred)
        val_acc = accuracy_score(y_val, val_pred)
        test_acc = accuracy_score(y_test, test_pred)
        
        test_prec = precision_score(y_test, test_pred, zero_division=0)
        test_rec = recall_score(y_test, test_pred, zero_division=0)
        test_f1 = f1_score(y_test, test_pred, zero_division=0)
        test_roc_auc = roc_auc_score(y_test, test_pred)
        
        # ===== 예측값 저장 =====
        self._save_predictions(
            model_name, test_pred, test_pred_proba,
            y_test, test_returns, test_dates
        )

        self.results.append({
            'Model': model_name,
            'Train_Accuracy': train_acc,
            'Val_Accuracy': val_acc,
            'Test_Accuracy': test_acc,
            'Test_Precision': test_prec,
            'Test_Recall': test_rec,
            'Test_F1': test_f1,
            'Test_AUC_ROC': test_roc_auc
        })
        
        return self.results[-1]

    # ===== 추가: 예측값 저장 메서드 =====
    def _save_predictions(self, model_name, pred_direction, pred_proba,
                         actual_direction, actual_returns, dates):
        """예측값 저장 (백테스팅용)"""
        
        # 확률값 처리
        if pred_proba is not None:
            if pred_proba.ndim == 2 and pred_proba.shape[1] == 2:
                # Binary classification: [P(down), P(up)]
                pred_proba_up = pred_proba[:, 1]
                pred_proba_down = pred_proba[:, 0]
            else:
                # Single output (sigmoid)
                pred_proba_up = pred_proba.ravel()
                pred_proba_down = 1 - pred_proba_up
        else:
            # 확률 미지원: 0.9/0.1 근사
            pred_proba_up = np.where(pred_direction == 1, 0.9, 0.1)
            pred_proba_down = 1 - pred_proba_up
        
        # 신뢰도 계산
        confidence = np.abs(pred_proba_up - 0.5) * 2
        max_proba = np.maximum(pred_proba_up, pred_proba_down)
        
        # DataFrame 생성
        predictions_df = pd.DataFrame({
            'date': dates,
            'actual_direction': actual_direction,
            'actual_return': actual_returns,
            'pred_direction': pred_direction,
            'pred_proba_up': pred_proba_up,
            'pred_proba_down': pred_proba_down,
            'confidence': confidence,
            'max_proba': max_proba,
            'correct': (pred_direction == actual_direction).astype(int)
        })
        
        # 딕셔너리에 저장
        self.predictions[model_name] = predictions_df

    def get_summary_dataframe(self):
        return pd.DataFrame(self.results)
    
    # ===== 예측값 반환 메서드 =====
    def get_predictions_dict(self):
        """저장된 예측값 딕셔너리 반환"""
        return self.predictions
    

In [9]:
# # ============================================================================
# # 모델 설정 정의 (Classification)
# # ============================================================================

# ML_MODELS_CLASSIFICATION = [
#     {'index': 1, 'name': 'RandomForest', 'func': DirectionModels.random_forest, 'needs_val': True},
#     {'index': 2, 'name': 'LightGBM', 'func': DirectionModels.lightgbm, 'needs_val': True},
#     {'index': 3, 'name': 'XGBoost', 'func': DirectionModels.xgboost, 'needs_val': True},
#     {'index': 4, 'name': 'SVM', 'func': DirectionModels.svm, 'needs_val': True},
#     {'index': 5, 'name': 'LogisticRegression', 'func': DirectionModels.logistic_regression, 'needs_val': True},
#     {'index': 6, 'name': 'NaiveBayes', 'func': DirectionModels.naive_bayes, 'needs_val': True},
#     {'index': 7, 'name': 'KNN', 'func': DirectionModels.knn, 'needs_val': True},
#     {'index': 8, 'name': 'AdaBoost', 'func': DirectionModels.adaboost, 'needs_val': True},
#     {'index': 9, 'name': 'CatBoost', 'func': DirectionModels.catboost, 'needs_val': True},
#     {'index': 10, 'name': 'DecisionTree', 'func': DirectionModels.decision_tree, 'needs_val': True},
#     {'index': 11, 'name': 'ExtraTrees', 'func': DirectionModels.extra_trees, 'needs_val': True},
#     {'index': 12, 'name': 'Bagging', 'func': DirectionModels.bagging, 'needs_val': True},
#     {'index': 13, 'name': 'GradientBoosting', 'func': DirectionModels.gradient_boosting, 'needs_val': True},
#     {'index': 14, 'name': 'TabNet', 'func': DirectionModels.tabnet, 'needs_val': True},
#     {'index': 15, 'name': 'StackingEnsemble', 'func': DirectionModels.stacking_ensemble, 'needs_val': True},
#     {'index': 16, 'name': 'VotingHard', 'func': DirectionModels.voting_hard, 'needs_val': True},
#     {'index': 17, 'name': 'VotingSoft', 'func': DirectionModels.voting_soft, 'needs_val': True},
#     {'index': 18, 'name': 'MLP', 'func': DirectionModels.mlp, 'needs_val': True},
# ]

# DL_MODELS_CLASSIFICATION = [
#     {'index': 19, 'name': 'LSTM', 'func': DirectionModels.lstm, 'needs_val': True},
#     {'index': 20, 'name': 'BiLSTM', 'func': DirectionModels.bilstm, 'needs_val': True},
#     {'index': 21, 'name': 'GRU', 'func': DirectionModels.gru, 'needs_val': True},
#     {'index': 22, 'name': 'Stacked_LSTM', 'func': DirectionModels.stacked_lstm, 'needs_val': True},
#     {'index': 23, 'name': 'CNN_LSTM', 'func': DirectionModels.cnn_lstm, 'needs_val': True},
#     {'index': 24, 'name': 'CNN_GRU', 'func': DirectionModels.cnn_gru, 'needs_val': True},
#     {'index': 25, 'name': 'CNN_BiLSTM', 'func': DirectionModels.cnn_bilstm, 'needs_val': True},
#     {'index': 26, 'name': 'LSTM_Attention', 'func': DirectionModels.lstm_attention, 'needs_val': True},
#     {'index': 27, 'name': 'Transformer', 'func': DirectionModels.transformer, 'needs_val': True},
#     {'index': 28, 'name': 'TCN', 'func': DirectionModels.tcn, 'needs_val': True},
#     {'index': 29, 'name': 'DTW_LSTM', 'func': DirectionModels.dtw_lstm, 'needs_val': True},
#     {'index': 30, 'name': 'Informer', 'func': DirectionModels.informer, 'needs_val': True},
#     {'index': 31, 'name': 'NBEATS', 'func': DirectionModels.nbeats, 'needs_val': True},
#     {'index': 32, 'name': 'TFT', 'func': DirectionModels.temporal_fusion_transformer, 'needs_val': True},
#     {'index': 33, 'name': 'Performer', 'func': DirectionModels.performer, 'needs_val': True},
#     {'index': 34, 'name': 'PatchTST', 'func': DirectionModels.patchtst, 'needs_val': True},
#     {'index': 35, 'name': 'Autoformer', 'func': DirectionModels.autoformer, 'needs_val': True},
#     {'index': 36, 'name': 'iTransformer', 'func': DirectionModels.itransformer, 'needs_val': True},
#     {'index': 37, 'name': 'EtherVoyant', 'func': DirectionModels.ethervoyant, 'needs_val': True},
#     {'index': 38, 'name': 'VMD_Hybrid', 'func': DirectionModels.vmd_hybrid, 'needs_val': True},
#     {'index': 39, 'name': 'SimpleRNN', 'func': DirectionModels.simple_rnn, 'needs_val': True},
#     {'index': 40, 'name': 'EMD_LSTM', 'func': DirectionModels.emd_lstm, 'needs_val': True},
#     {'index': 41, 'name': 'Hybrid_LSTM_GRU', 'func': DirectionModels.hybrid_lstm_gru, 'needs_val': True},
#     {'index': 42, 'name': 'Parallel_CNN', 'func': DirectionModels.parallel_cnn, 'needs_val': True},
#     {'index': 43, 'name': 'LSTM_XGBoost_Hybrid', 'func': DirectionModels.lstm_xgboost_hybrid, 'needs_val': True},
#     {'index': 44, 'name': 'Residual_LSTM', 'func': DirectionModels.residual_lstm, 'needs_val': True},
#     {'index': 45, 'name': 'WaveNet', 'func': DirectionModels.wavenet, 'needs_val': True},
# ]
# ============================================================================
# ML Models (15 models - 52.84% threshold)
# ============================================================================

ML_MODELS_CLASSIFICATION = [
    {'index': 1, 'name': 'RandomForest', 'func': DirectionModels.random_forest, 'needs_val': True},
    {'index': 2, 'name': 'LightGBM', 'func': DirectionModels.lightgbm, 'needs_val': True},
    {'index': 3, 'name': 'XGBoost', 'func': DirectionModels.xgboost, 'needs_val': True},
    {'index': 4, 'name': 'SVM', 'func': DirectionModels.svm, 'needs_val': True},
    {'index': 5, 'name': 'LogisticRegression', 'func': DirectionModels.logistic_regression, 'needs_val': True},
    {'index': 6, 'name': 'NaiveBayes', 'func': DirectionModels.naive_bayes, 'needs_val': True},
    {'index': 7, 'name': 'KNN', 'func': DirectionModels.knn, 'needs_val': True},
    {'index': 8, 'name': 'AdaBoost', 'func': DirectionModels.adaboost, 'needs_val': True},
    {'index': 9, 'name': 'CatBoost', 'func': DirectionModels.catboost, 'needs_val': True},
    {'index': 10, 'name': 'DecisionTree', 'func': DirectionModels.decision_tree, 'needs_val': True},
    {'index': 11, 'name': 'ExtraTrees', 'func': DirectionModels.extra_trees, 'needs_val': True},
    {'index': 12, 'name': 'Bagging', 'func': DirectionModels.bagging, 'needs_val': True},
    {'index': 13, 'name': 'GradientBoosting', 'func': DirectionModels.gradient_boosting, 'needs_val': True},
    {'index': 14, 'name': 'TabNet', 'func': DirectionModels.tabnet, 'needs_val': True},
    {'index': 15, 'name': 'StackingEnsemble', 'func': DirectionModels.stacking_ensemble, 'needs_val': True},
    {'index': 16, 'name': 'VotingHard', 'func': DirectionModels.voting_hard, 'needs_val': True},
    {'index': 17, 'name': 'VotingSoft', 'func': DirectionModels.voting_soft, 'needs_val': True},
    {'index': 18, 'name': 'MLP', 'func': DirectionModels.mlp, 'needs_val': True},
]

# ============================================================================
# DL Models (8 models - 55% threshold)
# ============================================================================

DL_MODELS_CLASSIFICATION = [
    {'index': 19, 'name': 'LSTM', 'func': DirectionModels.lstm, 'needs_val': True},
    {'index': 20, 'name': 'BiLSTM', 'func': DirectionModels.bilstm, 'needs_val': True},
    {'index': 21, 'name': 'GRU', 'func': DirectionModels.gru, 'needs_val': True},
    # {'index': 22, 'name': 'Stacked_LSTM', 'func': DirectionModels.stacked_lstm, 'needs_val': True},
    # {'index': 23, 'name': 'CNN_LSTM', 'func': DirectionModels.cnn_lstm, 'needs_val': True},
    # {'index': 24, 'name': 'CNN_GRU', 'func': DirectionModels.cnn_gru, 'needs_val': True},
    # {'index': 25, 'name': 'CNN_BiLSTM', 'func': DirectionModels.cnn_bilstm, 'needs_val': True},
    # {'index': 26, 'name': 'LSTM_Attention', 'func': DirectionModels.lstm_attention, 'needs_val': True},
    # {'index': 27, 'name': 'Transformer', 'func': DirectionModels.transformer, 'needs_val': True},
    # {'index': 28, 'name': 'TCN', 'func': DirectionModels.tcn, 'needs_val': True},
    {'index': 29, 'name': 'DTW_LSTM', 'func': DirectionModels.dtw_lstm, 'needs_val': True},
    # {'index': 30, 'name': 'Informer', 'func': DirectionModels.informer, 'needs_val': True},
    # {'index': 31, 'name': 'NBEATS', 'func': DirectionModels.nbeats, 'needs_val': True},
    # {'index': 32, 'name': 'TFT', 'func': DirectionModels.temporal_fusion_transformer, 'needs_val': True},
    # {'index': 33, 'name': 'Performer', 'func': DirectionModels.performer, 'needs_val': True},
    # {'index': 34, 'name': 'PatchTST', 'func': DirectionModels.patchtst, 'needs_val': True},
    # {'index': 35, 'name': 'Autoformer', 'func': DirectionModels.autoformer, 'needs_val': True},
    # {'index': 36, 'name': 'iTransformer', 'func': DirectionModels.itransformer, 'needs_val': True},
    # {'index': 37, 'name': 'EtherVoyant', 'func': DirectionModels.ethervoyant, 'needs_val': True},
    {'index': 38, 'name': 'VMD_Hybrid', 'func': DirectionModels.vmd_hybrid, 'needs_val': True},
    # {'index': 39, 'name': 'SimpleRNN', 'func': DirectionModels.simple_rnn, 'needs_val': True},
    {'index': 40, 'name': 'EMD_LSTM', 'func': DirectionModels.emd_lstm, 'needs_val': True},
    {'index': 41, 'name': 'Hybrid_LSTM_GRU', 'func': DirectionModels.hybrid_lstm_gru, 'needs_val': True},
    # {'index': 42, 'name': 'Parallel_CNN', 'func': DirectionModels.parallel_cnn, 'needs_val': True},
    # {'index': 43, 'name': 'LSTM_XGBoost_Hybrid', 'func': DirectionModels.lstm_xgboost_hybrid, 'needs_val': True},
    {'index': 44, 'name': 'Residual_LSTM', 'func': DirectionModels.residual_lstm, 'needs_val': True},
    # {'index': 45, 'name': 'WaveNet', 'func': DirectionModels.wavenet, 'needs_val': True},
]


In [10]:
def train_all_models(X_train, y_train, X_val, y_val, X_test, y_test,
                    test_returns, test_dates, evaluator, lookback=30,
                    ml_models=None, dl_models=None, task='classification'):
    print("\n" + "="*80)
    print(f"{task.capitalize()} 모델 학습 시작 (총 {len(ml_models) + len(dl_models)}개 모델)")
    print("="*80)
    trainer = ModelTrainer(evaluator, lookback)

    # ML 모델
    print(f"\n[Part 1/2] Machine Learning 모델 ({len(ml_models)}개)")
    print("-" * 80)
    ml_success_count = 0
    for model_config in ml_models:
        success = trainer.train_ml_model(
            model_config, X_train, y_train, X_val, y_val,
            X_test, y_test, test_returns, test_dates, task=task
        )
        if success:
            ml_success_count += 1
    print(f"\n✓ ML 모델 완료: {ml_success_count}/{len(ml_models)}개 성공")

    # DL 모델
    print(f"\n[Part 2/2] Deep Learning/시계열 모델 ({len(dl_models)}개)")
    print("-" * 80)
    print(f"\n시퀀스 데이터 생성 중 (lookback={lookback})...")
    trainer = ModelTrainer(evaluator, lookback)
    X_train_seq, y_train_seq = trainer.create_sequences(X_train, y_train, lookback)
    X_val_seq, y_val_seq = trainer.create_sequences(X_val, y_val, lookback)
    X_test_seq, y_test_seq = trainer.create_sequences(X_test, y_test, lookback)
    test_returns_seq = test_returns[lookback:]
    test_dates_seq = test_dates[lookback:]
    input_shape = (X_train_seq.shape[1], X_train_seq.shape[2])
    print(f"  ✓ Train shape: {X_train_seq.shape}")
    print(f"  ✓ Val shape: {X_val_seq.shape}")
    print(f"  ✓ Test shape: {X_test_seq.shape}")
    print(f"  ✓ Input shape: {input_shape}\n")
    dl_success_count = 0
    for model_config in dl_models:
        if model_config['name'] in ['TabNet', 'TabNet_Reg', 'Ensemble_Stacking', 'Ensemble_Voting']:
            success = trainer.train_ml_model(
                model_config, X_train, y_train, X_val, y_val,
                X_test, y_test, test_returns, test_dates, task=task
            )
        else:
            if 'outputs' in model_config and len(model_config['outputs']) > 1:
                y_train_list = [y_train_seq[:, i] for i in range(y_train_seq.shape[1])]
                y_val_list = [y_val_seq[:, i] for i in range(y_val_seq.shape[1])]
                y_test_list = [y_test_seq[:, i] for i in range(y_test_seq.shape[1])]
                success = trainer.train_dl_multitask_model(
                    model_config, X_train_seq, y_train_list, X_val_seq, y_val_list,
                    X_test_seq, y_test_list, test_returns_seq, test_dates_seq, input_shape
                )
            else:
                success = trainer.train_dl_model(
                    model_config, X_train_seq, y_train_seq, X_val_seq, y_val_seq,
                    X_test_seq, y_test_seq, test_returns_seq, test_dates_seq, input_shape, task=task
                )
        if success:
            dl_success_count += 1
    print(f"\n✓ DL 모델 완료: {dl_success_count}/{len(dl_models)}개 성공")
    total_success = ml_success_count + dl_success_count
    total_models = len(ml_models) + len(dl_models)
    print("\n" + "="*80)
    print(f"전체 학습 완료: {total_success}/{total_models}개 모델 성공")
    print("="*80)
    return total_success

def train_models_for_fold(fold_idx, X_train, y_train, X_val, y_val,
                          X_test, y_test, test_returns, test_dates,
                          evaluator, all_fold_results, lookback=30,
                          ml_models=None, dl_models=None, task='classification'):
    print(f"\n{'='*80}")
    print(f"Fold {fold_idx + 1} - {task.capitalize()} 모델 학습")
    print(f"{'='*80}")
    success_count = train_all_models(
        X_train, y_train, X_val, y_val, X_test, y_test,
        test_returns, test_dates, evaluator, lookback,
        ml_models=ml_models, dl_models=dl_models, task=task
    )
    fold_summary = evaluator.get_summary_dataframe()
    fold_summary['Fold'] = fold_idx + 1
    all_fold_results.append(fold_summary)
    print(f"\n✓ Fold {fold_idx + 1} 완료 ({success_count}개 모델)")
    return fold_summary


class ModelTrainer:
    """모델 학습 및 평가를 위한 통합 클래스 (분류/회귀 공통)"""
    def __init__(self, evaluator, lookback=30):
        self.evaluator = evaluator
        self.lookback = lookback

    @staticmethod
    def create_sequences(X, y, lookback):
        Xs, ys = [], []
        for i in range(lookback, len(X)):
            Xs.append(X[i-lookback:i])
            # DataFrame이면 .iloc, array면 직접 인덱싱
            ys.append(y.iloc[i] if hasattr(y, 'iloc') else y[i])
        return np.array(Xs), np.array(ys)

    def train_ml_model(self, model_config, X_train, y_train, X_val, y_val,
                       X_test, y_test, test_returns, test_dates, task='classification'):
        try:
            print(f"  [{model_config['index']}] {model_config['name']}...")
            if model_config.get('needs_val', False):
                model = model_config['func'](X_train, y_train, X_val, y_val)
            else:
                model = model_config['func'](X_train, y_train)
            
            is_mlp = (model_config['name'] == 'MLP')
        
            # 평가
            if task == 'classification':
                self.evaluator.evaluate_classification_model(
                    model, X_train, y_train, X_val, y_val, X_test, y_test,
                    test_returns, test_dates, model_config['name'],
                    is_deep_learning=is_mlp  # 여기만 수정
                )
            else:
                self.evaluator.evaluate_regression_model(
                    model, X_train, y_train, X_val, y_val, X_test, y_test,
                    test_returns, test_dates, model_config['name'],
                    is_deep_learning=is_mlp  # 여기만 수정
                )
            return True
        except Exception as e:
            import traceback
            print(f"    ⚠ {model_config['name']} 스킵: {type(e).__name__}: {str(e)}")
            print(f"    상세: {traceback.format_exc()}")
            return False

    def train_dl_model(self, model_config, X_train_seq, y_train_seq,
                       X_val_seq, y_val_seq, X_test_seq, y_test_seq,
                       test_returns_seq, test_dates_seq, input_shape, task='classification'):
        try:
            print(f"  [{model_config['index']}] {model_config['name']}...")
            model = model_config['func'](
                X_train_seq, y_train_seq, X_val_seq, y_val_seq, input_shape
            )
            if task == 'classification':
                self.evaluator.evaluate_classification_model(
                    model, X_train_seq, y_train_seq, X_val_seq, y_val_seq,
                    X_test_seq, y_test_seq, test_returns_seq, test_dates_seq,
                    model_config['name'], is_deep_learning=True
                )
            else:
                self.evaluator.evaluate_regression_model(
                    model, X_train_seq, y_train_seq, X_val_seq, y_val_seq,
                    X_test_seq, y_test_seq, test_returns_seq, test_dates_seq,
                    model_config['name'], is_deep_learning=True
                )
            return True
        except Exception as e:
            import traceback
            print(f"    ⚠ {model_config['name']} 스킵: {type(e).__name__}: {str(e)}")
            print(f"    상세: {traceback.format_exc()}")
            return False
        

In [11]:
target_cases = [
    {'name': 'direction', 'target_type': 'direction', 'outputs': ['next_direction']}
]

split_methods = [
    {'name': 'walk_forward', 'method': 'walk_forward'},
    {'name': 'tvt', 'method': 'tvt'}
]


RESULT_DIR = "model_results"
os.makedirs(RESULT_DIR, exist_ok=True)

def save_walk_forward_results(all_fold_results, all_fold_predictions, target_name, task):
    """
    Walk-Forward 결과 저장 (walk-forward vs final holdout 분리)
    
    Args:
        all_fold_results: [(fold_df, fold_type), ...] 형태로 수정
        all_fold_predictions: [(fold_pred_dict, fold_type), ...] 형태로 수정
    """
    
    # 1. Detailed results (모든 fold 포함)
    detailed_results = []
    for fold_idx, (fold_df, fold_type) in enumerate(all_fold_results, start=1):
        fold_df_copy = fold_df.copy()
        fold_df_copy.insert(0, 'Fold', fold_idx)
        fold_df_copy.insert(1, 'fold_type', fold_type)  # ← 추가
        detailed_results.append(fold_df_copy)
    
    detailed_df = pd.concat(detailed_results, ignore_index=True)
    
    if 'Test_Accuracy' in detailed_df.columns:
        detailed_df = detailed_df.sort_values(
            by=['Fold', 'Test_Accuracy'], 
            ascending=[True, False]
        ).reset_index(drop=True)
    
    detailed_path = os.path.join(RESULT_DIR, f"{target_name}_walk_forward___detailed.csv")
    detailed_df.to_csv(detailed_path, index=False, encoding='utf-8-sig')
    print(f"Saved: {detailed_path}")
    
    # 2. Walk-forward vs Final holdout 분리 ← 핵심!
    wf_data = detailed_df[detailed_df['fold_type'] == 'walk_forward'].copy()
    final_data = detailed_df[detailed_df['fold_type'] == 'final_holdout'].copy()
    
    numeric_cols = detailed_df.select_dtypes(include=[np.number]).columns
    numeric_cols = [col for col in numeric_cols if col != 'Fold']
    
    # 3. Walk-forward 평균 계산 (모델 선택용) ← 핵심!
    avg_results = []
    for model in detailed_df['Model'].unique():
        avg_row = {'Model': model}
        
        # Walk-forward 평균 및 표준편차
        model_wf = wf_data[wf_data['Model'] == model]
        if len(model_wf) > 0:
            for col in numeric_cols:
                if col in model_wf.columns:
                    avg_row[f'WF_{col}_Mean'] = model_wf[col].mean()
                    avg_row[f'WF_{col}_Std'] = model_wf[col].std()
        
        # Final holdout 성능 (보고용) ← 핵심!
        model_final = final_data[final_data['Model'] == model]
        if len(model_final) > 0:
            for col in numeric_cols:
                if col in model_final.columns:
                    avg_row[f'Final_{col}'] = model_final[col].iloc[0]
        
        avg_results.append(avg_row)
    
    avg_df = pd.DataFrame(avg_results)
    
    # 4. Walk-forward 평균으로 정렬 (모델 선택 기준) ← 핵심!
    if 'WF_Test_Accuracy_Mean' in avg_df.columns:
        avg_df = avg_df.sort_values(by='WF_Test_Accuracy_Mean', ascending=False).reset_index(drop=True)
    elif 'WF_Test_RMSE_Mean' in avg_df.columns:
        avg_df = avg_df.sort_values(by='WF_Test_RMSE_Mean', ascending=True).reset_index(drop=True)
    
    avg_path = os.path.join(RESULT_DIR, f"{target_name}_walk_forward___avg.csv")
    avg_df.to_csv(avg_path, index=False, encoding='utf-8-sig')
    print(f"Saved: {avg_path}")
    
    # 5. 성능 요약 출력 ← 추가
    if len(avg_df) > 0:
        best_model = avg_df.iloc[0]['Model']
        print(f"\n성능 요약 (Best Model: {best_model}):")
        if 'WF_Test_Accuracy_Mean' in avg_df.columns:
            wf_score = avg_df.iloc[0]['WF_Test_Accuracy_Mean']
            final_score = avg_df.iloc[0].get('Final_Test_Accuracy', 'N/A')
            print(f"  WF 평균 (2022-2024): {wf_score:.2f}%")
            print(f"  Final Holdout (2025): {final_score}%")
    
    # 6. 예측값 저장 (fold_type 구분) ← 수정
    if all_fold_predictions:
        pred_dir = os.path.join(RESULT_DIR, "predictions", f"{target_name}_walk_forward")
        os.makedirs(pred_dir, exist_ok=True)
        
        all_models = set()
        for fold_pred, _ in all_fold_predictions:  # ← 튜플 언팩
            all_models.update(fold_pred.keys())
        
        for model_name in all_models:
            combined_predictions = []
            
            for fold_idx, (fold_pred, fold_type) in enumerate(all_fold_predictions, start=1):  # ← 튜플 언팩
                if model_name in fold_pred:
                    fold_df = fold_pred[model_name].copy()
                    fold_df.insert(0, 'fold', fold_idx)
                    fold_df.insert(1, 'fold_type', fold_type)  # ← 추가
                    combined_predictions.append(fold_df)
            
            if combined_predictions:
                combined_df = pd.concat(combined_predictions, ignore_index=True)
                pred_filename = f"{model_name}_all_folds.csv"
                pred_path = os.path.join(pred_dir, pred_filename)
                combined_df.to_csv(pred_path, index=False, encoding='utf-8-sig')
        
        print(f"Saved {len(all_models)} combined prediction files to {pred_dir}")
    
    return detailed_df, avg_df




def save_summary_csv(summary_df, predictions_dict, target_name, split_name, task):
    """
    모델 평가 지표 + 예측값 저장
    
    Args:
        predictions_dict: {model_name: predictions_df} 딕셔너리 추가
    """
    
    # 1. 평가 지표 저장 (기존 코드)
    if task == 'classification':
        metric_cols = ['Model', 'Train_Accuracy', 'Val_Accuracy', 'Test_Accuracy', 
                       'Test_Precision', 'Test_Recall', 'Test_F1', 'Test_AUC_ROC']
        
    elif task == 'regression':
        metric_cols = ['Model', 'Train_RMSE', 'Val_RMSE', 'Test_RMSE', 
                       'Train_MAE', 'Val_MAE', 'Test_MAE', 'Test_R2', 'Test_MAPE', 'Direction_Accuracy']
        backtest_cols = ['Model', 'Directional_Return(%)', 'Directional_Sharpe',
                         'Total_Return(%)', 'Sharpe', 'Sortino', 'Calmar',
                         'Max_Drawdown(%)', 'Win_Rate(%)', 'Total_Trades', 'Profit_Factor',
                         'VolScaled_Return(%)', 'VolScaled_Sharpe']
                         
    elif task == 'multitask':
        metric_cols = ['Model', 'Train_Accuracy', 'Val_Accuracy', 'Test_Accuracy', 'Test_Precision', 
                       'Test_Recall', 'Test_F1', 'Train_RMSE', 'Val_RMSE', 'Test_RMSE', 
                       'Test_MAE', 'Test_R2', 'Direction_Accuracy']
        backtest_cols = ['Model', 'Total_Return(%)', 'Sharpe', 'Sortino', 'Calmar',
                         'Max_Drawdown(%)', 'Win_Rate(%)', 'Total_Trades', 'Profit_Factor']
        if 'Directional_Return(%)' in summary_df.columns:
            backtest_cols += ['Directional_Return(%)', 'Directional_Sharpe']
        if 'VolScaled_Return(%)' in summary_df.columns:
            backtest_cols += ['VolScaled_Return(%)', 'VolScaled_Sharpe']
        if 'Confident_Return(%)' in summary_df.columns:
            backtest_cols += ['Confident_Return(%)', 'Confident_Sharpe', 'Confident_Trades']
    
    # 기존 지표 저장
    if task == 'classification':
        available_cols = [col for col in metric_cols if col in summary_df.columns]
    else:
        available_cols = [col for col in metric_cols + backtest_cols if col in summary_df.columns]
    
    save_df = summary_df[available_cols]
    
    if 'Test_Accuracy' in save_df.columns:
        save_df = save_df.sort_values(by='Test_Accuracy', ascending=False).reset_index(drop=True)
    elif 'Test_RMSE' in save_df.columns:
        save_df = save_df.sort_values(by='Test_RMSE', ascending=True).reset_index(drop=True)
    
    filename = f"{target_name}_{split_name}__metrics.csv"
    file_path = os.path.join(RESULT_DIR, filename)
    save_df.to_csv(file_path, index=False, encoding='utf-8-sig')
    print(f"Saved metrics: {file_path}")
    
    # ===== 예측값 저장 =====
    if predictions_dict:
        pred_dir = os.path.join(RESULT_DIR, "predictions", f"{target_name}_{split_name}")
        os.makedirs(pred_dir, exist_ok=True)
        
        for model_name, pred_df in predictions_dict.items():
            pred_filename = f"{model_name}.csv"
            pred_path = os.path.join(pred_dir, pred_filename)
            pred_df.to_csv(pred_path, index=False, encoding='utf-8-sig')
        
        print(f"Saved {len(predictions_dict)} prediction files to {pred_dir}")



In [12]:
all_results = {}

for target_case in target_cases:
    for split_method in split_methods:
        print(f"\n{'='*80}")
        print(f"Experiment: {target_case['name']} x {split_method['name']}")
        print(f"{'='*80}")
        
        result = build_complete_pipeline_corrected(
            df_merged, train_start_date,
            method=split_method['method'],
            target_type=target_case['target_type'],
            test_start_date='2025-01-01'  # 2025 고정
        )
        
        if split_method['method'] == 'tvt':
            # TVT 방식
            X_train = result['train']['X_robust']
            X_val = result['val']['X_robust']
            X_test = result['test']['X_robust']
            test_returns = result['test']['y']['next_log_return'].values  
            test_dates = result['test']['dates'].values 
            
            if len(target_case['outputs']) == 1:
                y_train = result['train']['y'][target_case['outputs'][0]].values
                y_val = result['val']['y'][target_case['outputs'][0]].values
                y_test = result['test']['y'][target_case['outputs'][0]].values
                ml_models = ML_MODELS_CLASSIFICATION
                dl_models = DL_MODELS_CLASSIFICATION
                task = 'classification'
            
            evaluator = ModelEvaluator()
            train_all_models(
                X_train, y_train, X_val, y_val, X_test, y_test,
                test_returns, test_dates, evaluator,
                ml_models=ml_models, dl_models=dl_models, task=task
            )
            

            summary_df = evaluator.get_summary_dataframe()
            predictions_dict = evaluator.get_predictions_dict()  
            
            all_results[f"{target_case['name']}_{split_method['name']}"] = summary_df

            save_summary_csv(
                summary_df, predictions_dict,  
                target_case['name'], split_method['name'], task
            )
            
        else:
            fold_results = []
            fold_predictions = []

            for fold_idx, fold in enumerate(result, start=1):
                fold_type = fold.get('fold_type', 'walk_forward')  # ← fold_type 추출

                print(f"\n  Processing Fold {fold_idx}/{len(result)} ({fold_type})")  # ← 표시

                X_train = fold['train']['X_robust']
                X_val = fold['val']['X_robust']
                X_test = fold['test']['X_robust']
                test_returns = fold['test']['y']['next_log_return'].values  
                test_dates = fold['test']['dates'].values  

                if len(target_case['outputs']) == 1:
                    y_train = fold['train']['y'][target_case['outputs'][0]].values  
                    y_val = fold['val']['y'][target_case['outputs'][0]].values
                    y_test = fold['test']['y'][target_case['outputs'][0]].values
                    ml_models = ML_MODELS_CLASSIFICATION
                    dl_models = DL_MODELS_CLASSIFICATION
                    task = 'classification'

                evaluator = ModelEvaluator()
                train_all_models(
                    X_train, y_train, X_val, y_val, X_test, y_test,
                    test_returns, test_dates, evaluator,
                    ml_models=ml_models, dl_models=dl_models, task=task
                )

                fold_summary = evaluator.get_summary_dataframe()
                fold_pred_dict = evaluator.get_predictions_dict()  

                fold_results.append((fold_summary, fold_type))
                fold_predictions.append((fold_pred_dict, fold_type))

                print(f"  Fold {fold_idx} ({fold_type}) completed")

            print(f"\n  Aggregating {len(fold_results)} folds...")
            detailed_df, avg_df = save_walk_forward_results(
                fold_results, fold_predictions,
                target_case['name'], task
            )
            all_results[f"{target_case['name']}_{split_method['name']}"] = avg_df




Experiment: direction x walk_forward

Walk-Forward Configuration
Total: 1752 days
Pre-final: 1474 days | Final holdout: 278 days
Target: 4 walk-forward + 1 final holdout = 5 folds

Fold 1 (walk_forward)
  Train:  600d  2020-12-19 ~ 2022-08-10
  Val:     60d  2022-08-11 ~ 2022-10-09
  Test:    60d  2022-10-10 ~ 2022-12-08

Fold 2 (walk_forward)
  Train:  840d  2020-12-19 ~ 2023-04-07
  Val:     60d  2023-04-08 ~ 2023-06-06
  Test:    60d  2023-06-07 ~ 2023-08-05

Fold 3 (walk_forward)
  Train: 1080d  2020-12-19 ~ 2023-12-03
  Val:     60d  2023-12-04 ~ 2024-02-01
  Test:    60d  2024-02-02 ~ 2024-04-01

Fold 4 (walk_forward)
  Train: 1320d  2020-12-19 ~ 2024-07-30
  Val:     60d  2024-07-31 ~ 2024-09-28
  Test:    60d  2024-09-29 ~ 2024-11-27

Fold 5 (final_holdout)
  Train: 1414d  2020-12-19 ~ 2024-11-01
  Val:     60d  2024-11-02 ~ 2024-12-31
  Test:   278d  2025-01-01 ~ 2025-10-05

Created 5 folds total


Processing Fold 1 (walk_forward)

[Feature Selection for Fold 1]
Training data

KeyboardInterrupt: 