In [1]:
import pandas as pd
import numpy as np
import os
import pandas_ta as ta    
from sklearn.preprocessing import RobustScaler, StandardScaler
from datetime import datetime, timedelta
from sklearn.feature_selection import SelectKBest, mutual_info_regression, RFE
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier, GradientBoostingRegressor
from sklearn.preprocessing import RobustScaler
from sklearn.metrics import mean_absolute_percentage_error, r2_score, accuracy_score, mean_squared_error
import warnings
from statsmodels.tsa.stattools import grangercausalitytests
from statsmodels.tsa.vector_ar.var_model import VAR
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score # 분류 모델에 roc_auc_score 추가
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import (AdaBoostClassifier, ExtraTreesClassifier, 
                              BaggingClassifier, GradientBoostingClassifier,
                              StackingClassifier, VotingClassifier)
from sklearn.tree import DecisionTreeClassifier
from catboost import CatBoostClassifier
from tensorflow.keras.layers import SimpleRNN, Add, Activation
from pytorch_tabnet.tab_model import TabNetClassifier
from tensorflow.keras.layers import Lambda 
from tensorflow.keras.layers import Permute
warnings.filterwarnings('ignore')
warnings.filterwarnings('ignore')

# ============================================================================ 
# 1. 날짜 파싱 및 CSV 로드 함수
# ============================================================================ 
def standardize_date_column(df,file_name):
    """날짜 컬럼 자동 탐지 + datetime 통일 + tz 제거 + 시각 제거"""

    date_cols = [col for col in df.columns if 'date' in col.lower()]
    if not date_cols:
        print("[Warning] 날짜 컬럼을 찾을 수 없습니다.")
        return df
    date_col = date_cols[0]
    

    if date_col != 'date':
        df.rename(columns={date_col: 'date'}, inplace=True)
    

    if file_name == 'eth_onchain.csv':
        df['date'] = pd.to_datetime(df['date'], format='%y-%m-%d', errors='coerce')
    else:
        df['date'] = pd.to_datetime(df['date'], errors='coerce', infer_datetime_format=True)
    
    #print(df.shape)
    df = df.dropna(subset=['date'])
    #print(df.shape)
    df['date'] = df['date'].dt.normalize()  
    if pd.api.types.is_datetime64tz_dtype(df['date']):
        df['date'] = df['date'].dt.tz_convert(None)
    else:
        df['date'] = df['date'].dt.tz_localize(None)
    #print(df.shape)
    return df

def load_and_standardize_data(filepath):

    df = pd.read_csv(filepath)
    df = standardize_date_column(df,filepath)
    return df
# ============================================================================ 
# 2. 데이터 로딩
# ============================================================================ 
DATA_DIR = './macro_data'

def load_from_macro_data(filename):
    return load_and_standardize_data(os.path.join(DATA_DIR, filename))

macro_df = load_from_macro_data('macro_crypto_data.csv')
news_df = load_from_macro_data('news_data.csv')
eth_onchain_df = load_from_macro_data('eth_onchain.csv')
fear_greed_df = load_from_macro_data('fear_greed.csv')
usdt_eth_mcap_df = load_from_macro_data('usdt_eth_mcap.csv')
aave_tvl_df = load_from_macro_data('aave_eth_tvl.csv')
lido_tvl_df = load_from_macro_data('lido_eth_tvl.csv')
makerdao_tvl_df = load_from_macro_data('makerdao_eth_tvl.csv')
eth_chain_tvl_df = load_from_macro_data('eth_chain_tvl.csv')
eth_funding_df = load_from_macro_data('eth_funding_rate.csv')
sp500_df = load_from_macro_data('SP500.csv')
vix_df = load_from_macro_data('VIX.csv')
gold_df = load_from_macro_data('GOLD.csv')
dxy_df = load_from_macro_data('DXY.csv')

# ============================================================================ 
# 3. 기준 날짜 설정 (Lido TVL 시작일 기준)
# ============================================================================ 
train_start_date = pd.to_datetime('2020-12-19')
lookback_start_date = train_start_date - timedelta(days=200)
end_date= pd.to_datetime('2025-10-06')

# ============================================================================ 
# 4. 뉴스 감성 피처 생성 
# ============================================================================ 
def create_sentiment_features(news_df):
    """
    한국어 뉴스 감성 지표 생성
    출처: "Cryptocurrency Price Prediction Model Based on Sentiment Analysis" (2024)
    """
    
    sentiment_agg = news_df.groupby('date').agg(
        # ===== 기본 통계 =====
        sentiment_mean=('label', 'mean'),
        sentiment_std=('label', 'std'),
        news_count=('label', 'count'),
        positive_ratio=('label', lambda x: (x == 1).sum() / len(x)),
        negative_ratio=('label', lambda x: (x == -1).sum() / len(x)),
        
        # ===== 추가 지표 =====
        # 1. 극단 감성 카운트
        extreme_positive_count=('label', lambda x: (x == 1).sum()),
        extreme_negative_count=('label', lambda x: (x == -1).sum()),
        
        # 2. 총 감성 점수
        sentiment_sum=('label', 'sum'),
    ).reset_index()
    
    sentiment_agg = sentiment_agg.fillna(0)
    
    # ===== 파생 지표 계산 =====
    
    # 1. Sentiment Polarity 
    sentiment_agg['sentiment_polarity'] = (
        sentiment_agg['positive_ratio'] - sentiment_agg['negative_ratio']
    )
    
    # 2. Sentiment Intensity (감성 강도) 
    sentiment_agg['sentiment_intensity'] = (
        sentiment_agg['positive_ratio'] + sentiment_agg['negative_ratio']
    )
    
    # 3. Sentiment Disagreement 
    sentiment_agg['sentiment_disagreement'] = (
        sentiment_agg['positive_ratio'] * sentiment_agg['negative_ratio']
    )
    
    # 4. Bull/Bear Ratio 
    sentiment_agg['bull_bear_ratio'] = (
        sentiment_agg['positive_ratio'] / (sentiment_agg['negative_ratio'] + 1e-10)
    )
    
    # 5. Weighted Sentiment 
    sentiment_agg['weighted_sentiment'] = (
        sentiment_agg['sentiment_mean'] * np.log1p(sentiment_agg['news_count'])
    )
    
    # 6. Extremity Index 
    sentiment_agg['extremity_index'] = (
        (sentiment_agg['extreme_positive_count'] + sentiment_agg['extreme_negative_count']) / 
        (sentiment_agg['news_count'] + 1e-10)
    )
    
    # ===== 시계열 파생 지표 (이동 평균) =====
    
    for window in [3, 7, 14]:
        # 감성 이동 평균
        sentiment_agg[f'sentiment_ma{window}'] = (
            sentiment_agg['sentiment_mean'].rolling(window=window, min_periods=1).mean()
        )
        
        # 감성 변동성 (이동 표준편차)
        sentiment_agg[f'sentiment_volatility_{window}'] = (
            sentiment_agg['sentiment_mean'].rolling(window=window, min_periods=1).std()
        )
    
    # 7. Sentiment Trend 
    sentiment_agg['sentiment_trend'] = sentiment_agg['sentiment_mean'].diff()
    
    # 8. Sentiment Acceleration
    sentiment_agg['sentiment_acceleration'] = sentiment_agg['sentiment_trend'].diff()
    
    # 9. News Volume Change
    sentiment_agg['news_volume_change'] = sentiment_agg['news_count'].pct_change()
    
    # 10. News Volume MA 
    for window in [7, 14]:
        sentiment_agg[f'news_volume_ma{window}'] = (
            sentiment_agg['news_count'].rolling(window=window, min_periods=1).mean()
        )
    
    print(f"✓ 감성 지표 생성 완료: {sentiment_agg.shape[1] - 1}개 (date 제외)")
    sentiment_agg = sentiment_agg.fillna(0)
    
    return sentiment_agg


sentiment_features = create_sentiment_features(news_df)



# ============================================================================ 
# 5. 데이터 병합
# ============================================================================ 
def add_prefix(df, prefix):
    df.columns = [prefix + '_' + col if col != 'date' else col for col in df.columns]
    return df

eth_onchain_df = add_prefix(eth_onchain_df, 'eth')
fear_greed_df = add_prefix(fear_greed_df, 'fg')
usdt_eth_mcap_df = add_prefix(usdt_eth_mcap_df, 'usdt')
aave_tvl_df = add_prefix(aave_tvl_df, 'aave')
lido_tvl_df = add_prefix(lido_tvl_df, 'lido')
makerdao_tvl_df = add_prefix(makerdao_tvl_df, 'makerdao')
eth_chain_tvl_df = add_prefix(eth_chain_tvl_df, 'chain')
eth_funding_df = add_prefix(eth_funding_df, 'funding')
sp500_df = add_prefix(sp500_df, 'sp500')
vix_df = add_prefix(vix_df, 'vix')
gold_df = add_prefix(gold_df, 'gold')
dxy_df = add_prefix(dxy_df, 'dxy')

date_range = pd.date_range(start=lookback_start_date, end=end_date, freq='D')
df_merged = pd.DataFrame(date_range, columns=['date'])

dataframes_to_merge = [
    macro_df, sentiment_features, eth_onchain_df, fear_greed_df, usdt_eth_mcap_df,
    aave_tvl_df, lido_tvl_df, makerdao_tvl_df, eth_chain_tvl_df,
    eth_funding_df, sp500_df, vix_df, gold_df, dxy_df
]

# 1. 외부 데이터 Merge 후
for df_to_merge in dataframes_to_merge:
    df_merged = pd.merge(df_merged, df_to_merge, on='date', how='left')

# 2. 감성 지표 결측 처리 (0)
sentiment_cols = [col for col in df_merged.columns 
                 if any(x in col for x in ['sentiment', 'news', 'ext', 'bull_bear','positive','negative','extreme'])]

print(f"\n감성 지표 결측 처리:")
for col in sentiment_cols:
    missing_before = df_merged[col].isnull().sum()
    if missing_before > 0:
        df_merged[col] = df_merged[col].fillna(0)
        print(f"  {col}: {missing_before}개 → 0 (데이터 없음 = 중립)")

# 3. 외부 변수 FFill (bfill 절대 금지!)
external_cols = [col for col in df_merged.columns 
                if any(x in col for x in ['eth_', 'fg_', 'usdt_', 'aave_', 'lido_', 
                                         'makerdao_', 'chain_', 'funding_',
                                         'sp500_', 'vix_', 'gold_', 'dxy_'])]

print(f"\n외부 변수 FFill 처리:")
missing_before = df_merged[external_cols].isnull().sum().sum()
df_merged[external_cols] = df_merged[external_cols].fillna(method='ffill')
missing_after = df_merged[external_cols].isnull().sum().sum()
print(f"  {missing_before:,} → {missing_after:,}개 (FFill)")

# 4. Lookback 기간 제거
print(f"\nLookback 기간 제거:")
before = len(df_merged)
df_merged = df_merged[df_merged['date'] >= lookback_start_date].reset_index(drop=True)
print(f"  {before} → {len(df_merged)}행")

remaining_missing = df_merged[external_cols].isnull().sum().sum()
if remaining_missing > 0:
    print(f"\n초기 결측치 처리:")
    print(f"  남은 결측: {remaining_missing}개 → 0")
    df_merged[external_cols] = df_merged[external_cols].fillna(0)

# 6. Lookback 기간 동안 모두 NaN인 컬럼 제거
lookback_df = df_merged[df_merged['date'] < train_start_date]
cols_to_drop = [col for col in lookback_df.columns 
               if lookback_df[col].isnull().all() and col != 'date']

if cols_to_drop:
    print(f"\nLookback 기간 완전 결측 컬럼 제거:")
    print(f"  {cols_to_drop}")
    df_merged = df_merged.drop(columns=cols_to_drop)

print(f"\n✓ 최종 데이터: {df_merged.shape}")
print(f"  날짜: {df_merged['date'].min().date()} ~ {df_merged['date'].max().date()}")
print(f"  결측: {df_merged.isnull().sum().sum()}개")

2025-10-20 23:53:25.645324: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-10-20 23:53:25.645365: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-10-20 23:53:25.646532: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-10-20 23:53:25.652949: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


✓ 감성 지표 생성 완료: 25개 (date 제외)

감성 지표 결측 처리:
  sentiment_mean: 39개 → 0 (데이터 없음 = 중립)
  sentiment_std: 39개 → 0 (데이터 없음 = 중립)
  news_count: 39개 → 0 (데이터 없음 = 중립)
  positive_ratio: 39개 → 0 (데이터 없음 = 중립)
  negative_ratio: 39개 → 0 (데이터 없음 = 중립)
  extreme_positive_count: 39개 → 0 (데이터 없음 = 중립)
  extreme_negative_count: 39개 → 0 (데이터 없음 = 중립)
  sentiment_sum: 39개 → 0 (데이터 없음 = 중립)
  sentiment_polarity: 39개 → 0 (데이터 없음 = 중립)
  sentiment_intensity: 39개 → 0 (데이터 없음 = 중립)
  sentiment_disagreement: 39개 → 0 (데이터 없음 = 중립)
  bull_bear_ratio: 39개 → 0 (데이터 없음 = 중립)
  weighted_sentiment: 39개 → 0 (데이터 없음 = 중립)
  extremity_index: 39개 → 0 (데이터 없음 = 중립)
  sentiment_ma3: 39개 → 0 (데이터 없음 = 중립)
  sentiment_volatility_3: 39개 → 0 (데이터 없음 = 중립)
  sentiment_ma7: 39개 → 0 (데이터 없음 = 중립)
  sentiment_volatility_7: 39개 → 0 (데이터 없음 = 중립)
  sentiment_ma14: 39개 → 0 (데이터 없음 = 중립)
  sentiment_volatility_14: 39개 → 0 (데이터 없음 = 중립)
  sentiment_trend: 39개 → 0 (데이터 없음 = 중립)
  sentiment_acceleration: 39개 → 0 (데이터 없음 = 중립)
  news_volum

In [2]:
news_df.head(10)

Unnamed: 0,date,news,label
0,2020-01-01,"세계 최대 암호화폐인 비트코인(Bitcoin, BTC)은 전일대비 1.54% 하락한...",-1
1,2020-01-01,업비트 암호화폐(가상화폐) 거래소 오전 9시 25분(한국시간) 기준으로 비트코인은 ...,-1
2,2020-01-02,"지난 24시간 동안 세계 최대 암호화폐인 비트코인(Bitcoin, BTC)은 단기 ...",0
3,2020-01-02,업비트 암호화폐(가상화폐) 거래소 오전 9시 50분(한국시간) 기준으로 비트코인은 ...,0
4,2020-01-03,이더리움(Ethereum) 네트워크가 '빙하기'를 늦추기 위한 긴급 하드포크 ‘뮤어...,1
5,2020-01-03,"지난 24시간 동안 세계 최대 암호화폐인 비트코인(Bitcoin, BTC)은 곰(b...",-1
6,2020-01-03,업비트 암호화폐(가상화폐) 거래소 오전 9시 30분(한국시간) 기준으로 비트코인은 ...,-1
7,2020-01-04,패스트푸드 대기업인 버거킹이 베네수엘라 지점에서 비트코인(BTC) 결제를 지원하고 ...,1
8,2020-01-04,"지난 24시간 동안 세계 최대 암호화폐인 비트코인(Bitcoin, BTC)은 일시적...",1
9,2020-01-04,업비트 암호화폐(가상화폐) 거래소 1월 4일(한국시간) 오전 10시 20분 기준으로...,1


In [3]:
# ============================================================================
# 결측치 상세 분석
# ============================================================================

print("\n" + "="*80)
print("결측치 상세 분석")
print("="*80)

# 1. 컬럼별 결측치 확인
missing_summary = df_merged.isnull().sum()
missing_cols = missing_summary[missing_summary > 0].sort_values(ascending=False)

if len(missing_cols) > 0:
    print(f"\n[1] 결측치가 있는 컬럼 ({len(missing_cols)}개):")
    for col, count in missing_cols.items():
        pct = (count / len(df_merged)) * 100
        print(f"  {col}: {count}개 ({pct:.2f}%)")
    
    # 2. 결측치 발생 위치 확인 (날짜별)
    print(f"\n[2] 결측치 발생 날짜 범위:")
    
    for col in missing_cols.index[:5]:  # 상위 5개만
        null_dates = df_merged[df_merged[col].isnull()]['date']
        if len(null_dates) > 0:
            print(f"\n  {col}:")
            print(f"    첫 결측: {null_dates.min().date()}")
            print(f"    마지막 결측: {null_dates.max().date()}")
            
            # Lookback 기간 내 결측인지 확인
            lookback_nulls = null_dates[null_dates < train_start_date]
            train_nulls = null_dates[null_dates >= train_start_date]
            
            print(f"    Lookback 기간 내: {len(lookback_nulls)}개")
            print(f"    Train 기간 내: {len(train_nulls)}개")
            
            if len(train_nulls) > 0:
                print(f"    ⚠ Train 기간에 결측 존재!")
    
    # 3. 행별 결측치 분포
    print(f"\n[3] 행별 결측치 분포:")
    missing_per_row = df_merged.isnull().sum(axis=1)
    print(f"  최대 결측 컬럼 수: {missing_per_row.max()}개/행")
    print(f"  결측이 있는 행: {(missing_per_row > 0).sum()}개")
    
    # 결측이 많은 행 확인
    if missing_per_row.max() > 0:
        worst_rows = missing_per_row.nlargest(5)
        print(f"\n  결측이 많은 상위 5개 행:")
        for idx, count in worst_rows.items():
            date_val = df_merged.loc[idx, 'date']
            print(f"    날짜 {date_val.date()}: {count}개 컬럼 결측")
    
    # 4. Train 시작 날짜 이후 결측치만 추출
    train_period_df = df_merged[df_merged['date'] >= train_start_date]
    train_missing = train_period_df.isnull().sum()
    train_missing_cols = train_missing[train_missing > 0].sort_values(ascending=False)
    
    print(f"\n[4] Train 기간 이후 ({train_start_date.date()}~) 결측:")
    if len(train_missing_cols) > 0:
        print(f"  ⚠ Train 기간에 {train_missing_cols.sum()}개 결측 발견!")
        for col, count in train_missing_cols.items():
            print(f"    {col}: {count}개")
    else:
        print(f"  ✓ Train 기간에는 결측 없음!")
    
    # 5. Lookback 기간만의 결측치
    lookback_period_df = df_merged[df_merged['date'] < train_start_date]
    lookback_missing = lookback_period_df.isnull().sum()
    lookback_missing_cols = lookback_missing[lookback_missing > 0].sort_values(ascending=False)
    
    print(f"\n[5] Lookback 기간만 ({lookback_start_date.date()}~{train_start_date.date()}) 결측:")
    if len(lookback_missing_cols) > 0:
        print(f"  Lookback에만 {lookback_missing_cols.sum()}개 결측")
        for col, count in lookback_missing_cols.head(10).items():
            print(f"    {col}: {count}개")
    else:
        print(f"  ✓ Lookback 기간에도 결측 없음!")

else:
    print("\n✓ 결측치 없음!")

print("="*80)



결측치 상세 분석

[1] 결측치가 있는 컬럼 (10개):
  AVAX_Open: 110개 (5.63%)
  AVAX_High: 110개 (5.63%)
  AVAX_Low: 110개 (5.63%)
  AVAX_Close: 110개 (5.63%)
  AVAX_Volume: 110개 (5.63%)
  DOT_Open: 79개 (4.05%)
  DOT_High: 79개 (4.05%)
  DOT_Low: 79개 (4.05%)
  DOT_Close: 79개 (4.05%)
  DOT_Volume: 79개 (4.05%)

[2] 결측치 발생 날짜 범위:

  AVAX_Open:
    첫 결측: 2020-06-02
    마지막 결측: 2020-09-21
    Lookback 기간 내: 110개
    Train 기간 내: 0개

  AVAX_High:
    첫 결측: 2020-06-02
    마지막 결측: 2020-09-21
    Lookback 기간 내: 110개
    Train 기간 내: 0개

  AVAX_Low:
    첫 결측: 2020-06-02
    마지막 결측: 2020-09-21
    Lookback 기간 내: 110개
    Train 기간 내: 0개

  AVAX_Close:
    첫 결측: 2020-06-02
    마지막 결측: 2020-09-21
    Lookback 기간 내: 110개
    Train 기간 내: 0개

  AVAX_Volume:
    첫 결측: 2020-06-02
    마지막 결측: 2020-09-21
    Lookback 기간 내: 110개
    Train 기간 내: 0개

[3] 행별 결측치 분포:
  최대 결측 컬럼 수: 10개/행
  결측이 있는 행: 112개

  결측이 많은 상위 5개 행:
    날짜 2020-06-02: 10개 컬럼 결측
    날짜 2020-06-03: 10개 컬럼 결측
    날짜 2020-06-04: 10개 컬럼 결측
    날짜 2020-06-05: 10개 컬럼 결

In [4]:

def add_indicator_to_df(df_ta, indicator):
    """pandas_ta 지표 결과를 DataFrame에 안전하게 추가"""
    if indicator is None:
        return

    if isinstance(indicator, pd.DataFrame) and not indicator.empty:
        for col in indicator.columns:
            df_ta[col] = indicator[col]
    elif isinstance(indicator, pd.Series) and not indicator.empty:
        colname = indicator.name if indicator.name else 'Unnamed'
        df_ta[colname] = indicator

def safe_add(df_ta, func, *args, **kwargs):
    """지표 생성 시 오류 방지를 위한 래퍼 함수"""
    try:
        result = func(*args, **kwargs)
        add_indicator_to_df(df_ta, result)
        return True
    except Exception as e:
        func_name = func.__name__ if hasattr(func, '__name__') else str(func)
        print(f"    ⚠ {func_name.upper()} 생성 실패: {str(e)[:50]}")
        return False

def calculate_technical_indicators(df):
    """
    출처: 
    - "CryptoPulse: Short-Term Cryptocurrency Forecasting" (2024)
    - "Enhancing Price Prediction in Cryptocurrency Using Transformer" (2024)
    - "Bitcoin Trend Prediction with Attention-Based Deep Learning" (2024)
    """
    #print("\n=== 기술적 지표 생성 중 ===")
    df = df.sort_values('date').reset_index(drop=True)
    df_ta = df.copy()

    close = df['ETH_Close']
    high = df.get('ETH_High', close)
    low = df.get('ETH_Low', close)
    volume = df.get('ETH_Volume', pd.Series(index=df.index, data=1))
    open_ = df.get('ETH_Open', close)

    try:
        # ===== [핵심] MOMENTUM INDICATORS =====
        
        # RSI (필수)
        df_ta['RSI_14'] = ta.rsi(close, length=14)
        df_ta['RSI_30'] = ta.rsi(close, length=30)
        df_ta['RSI_200'] = ta.rsi(close, length=200)  # 장기 RSI 추가
        
        # MACD (필수 - top feature importance)
        safe_add(df_ta, ta.macd, close, fast=12, slow=26, signal=9)
        
        # Stochastic Oscillator (%K, %D - 논문에서 핵심 지표)
        safe_add(df_ta, ta.stoch, high, low, close, k=14, d=3)
        safe_add(df_ta, ta.stoch, high, low, close, k=30, d=3)  # 30일 추가
        safe_add(df_ta, ta.stoch, high, low, close, k=200, d=3)  # 200일 추가
        
        # Williams %R
        df_ta['WILLR_14'] = ta.willr(high, low, close, length=14)
        
        # ROC (Rate of Change)
        df_ta['ROC_10'] = ta.roc(close, length=10)
        df_ta['ROC_20'] = ta.roc(close, length=20)
        
        # MOM (Momentum - 다양한 기간)
        df_ta['MOM_10'] = ta.mom(close, length=10)
        df_ta['MOM_30'] = ta.mom(close, length=30) 
        
        # CCI (Commodity Channel Index)
        df_ta['CCI_14'] = ta.cci(high, low, close, length=14)
        df_ta['CCI_20'] = ta.cci(high, low, close, length=20)
        df_ta['CCI_50'] = ta.cci(high, low, close, length=50)
        df_ta['CCI_SIGNAL'] = (df_ta['CCI_20'] > 100).astype(int)
      
        # TSI (True Strength Index)
        safe_add(df_ta, ta.tsi, close, fast=13, slow=25, signal=13)

        
        # =====  Ichimoku Cloud (암호화폐 트렌드 분석에 효과적) =====
        try:
            ichimoku = ta.ichimoku(high, low, close)
            if ichimoku is not None and isinstance(ichimoku, tuple):
                ichimoku_df = ichimoku[0]
                if ichimoku_df is not None:
                    for col in ichimoku_df.columns:
                        df_ta[col] = ichimoku_df[col]
        except Exception as e:
            print(f"    ⚠ ICHIMOKU 생성 실패")

        # ===== [핵심] OVERLAP INDICATORS =====
        
        # SMA (필수! - Golden/Death Cross)
        df_ta['SMA_10'] = ta.sma(close, length=10)
        df_ta['SMA_20'] = ta.sma(close, length=20)
        df_ta['SMA_50'] = ta.sma(close, length=50)
        df_ta['SMA_200'] = ta.sma(close, length=200)
        
        # EMA (필수!)
        df_ta['EMA_12'] = ta.ema(close, length=12)
        df_ta['EMA_26'] = ta.ema(close, length=26)
        df_ta['EMA_50'] = ta.ema(close, length=50)
        df_ta['EMA_200'] = ta.ema(close, length=200) 
        
        # TEMA (Triple EMA - 논문에서 high importance)
        df_ta['TEMA_10'] = ta.tema(close, length=10)
        df_ta['TEMA_30'] = ta.tema(close, length=30) 
        
        # WMA (Weighted Moving Average)
        df_ta['WMA_10'] = ta.wma(close, length=10)
        df_ta['WMA_20'] = ta.wma(close, length=20)  
        
        # HMA (Hull Moving Average)
        df_ta['HMA_9'] = ta.hma(close, length=9)
        
        # DEMA (Double EMA)
        df_ta['DEMA_10'] = ta.dema(close, length=10)
        
        
        # VWMA (Volume Weighted)
        df_ta['VWMA_20'] = ta.vwma(close, volume, length=20)
        
        # 가격 조합
        df_ta['HL2'] = ta.hl2(high, low)
        df_ta['HLC3'] = ta.hlc3(high, low, close)
        df_ta['OHLC4'] = ta.ohlc4(open_, high, low, close)

        # ===== [핵심] VOLATILITY INDICATORS =====
        
        # Bollinger Bands (필수 )
        safe_add(df_ta, ta.bbands, close, length=20, std=2)
        safe_add(df_ta, ta.bbands, close, length=50, std=2)  
        
        # ATR 
        df_ta['ATR_7'] = ta.atr(high, low, close, length=7)
        df_ta['ATR_14'] = ta.atr(high, low, close, length=14)
        df_ta['ATR_21'] = ta.atr(high, low, close, length=21) 
        
        # NATR (Normalized ATR)
        df_ta['NATR_14'] = ta.natr(high, low, close, length=14)
        
        # True Range
        try:
            tr = ta.true_range(high, low, close)
            if isinstance(tr, pd.Series) and not tr.empty:
                df_ta['TRUERANGE'] = tr
            elif isinstance(tr, pd.DataFrame) and not tr.empty:
                df_ta['TRUERANGE'] = tr.iloc[:, 0]
        except:
            pass
        
        # Keltner Channel
        safe_add(df_ta, ta.kc, high, low, close, length=20)
        
        # Donchian Channel 
        try:
            dc = ta.donchian(high, low, lower_length=20, upper_length=20)
            if dc is not None and isinstance(dc, pd.DataFrame) and not dc.empty:
                for col in dc.columns:
                    df_ta[col] = dc[col]
        except:
            pass
        
        atr_10 = ta.atr(high, low, close, length=10)
        hl2_calc = (high + low) / 2
        upper_band = hl2_calc + (3 * atr_10)
        lower_band = hl2_calc - (3 * atr_10)
        
        df_ta['SUPERTREND'] = 0
        for i in range(1, len(df_ta)):
            if close.iloc[i] > upper_band.iloc[i-1]:
                df_ta.loc[df_ta.index[i], 'SUPERTREND'] = 1
            elif close.iloc[i] < lower_band.iloc[i-1]:
                df_ta.loc[df_ta.index[i], 'SUPERTREND'] = -1
            else:
                df_ta.loc[df_ta.index[i], 'SUPERTREND'] = df_ta['SUPERTREND'].iloc[i-1]

        
        
        # ===== [핵심] VOLUME INDICATORS =====
        
        # OBV (필수)
        df_ta['OBV'] = ta.obv(close, volume)
        
        # AD (Accumulation/Distribution)
        df_ta['AD'] = ta.ad(high, low, close, volume)
        
        # ADOSC
        df_ta['ADOSC_3_10'] = ta.adosc(high, low, close, volume, fast=3, slow=10)
        
        # MFI (Money Flow Index)
        df_ta['MFI_14'] = ta.mfi(high, low, close, volume, length=14)
        
        # CMF (Chaikin Money Flow - 논문에서 중요 지표)
        df_ta['CMF_20'] = ta.cmf(high, low, close, volume, length=20)
        
        # EFI (Elder Force Index)
        df_ta['EFI_13'] = ta.efi(close, volume, length=13)
        
        # EOM (Ease of Movement)
        safe_add(df_ta, ta.eom, high, low, close, volume, length=14)
        
        # VWAP (Volume Weighted Average Price) 
        try:
            df_ta['VWAP'] = ta.vwap(high, low, close, volume)
        except:
            pass

        # ===== TREND INDICATORS =====
        
        # ADX 
        safe_add(df_ta, ta.adx, high, low, close, length=14)
        
        # Aroon 
        try:
            aroon = ta.aroon(high, low, length=25)
            if aroon is not None and isinstance(aroon, pd.DataFrame):
                for col in aroon.columns:
                    df_ta[col] = aroon[col]
        except:
            pass
        
        # PSAR
        try:
            psar = ta.psar(high, low, close)
            if psar is not None:
                if isinstance(psar, pd.DataFrame) and not psar.empty:
                    for col in psar.columns:
                        df_ta[col] = psar[col]
                elif isinstance(psar, pd.Series) and not psar.empty:
                    df_ta[psar.name] = psar
        except:
            pass
        
        # Vortex
        safe_add(df_ta, ta.vortex, high, low, close, length=14)
        
        # DPO (Detrended Price Oscillator)
        try:
            df_ta['DPO_20'] = ta.dpo(close, length=20)
        except:
            pass

        # ===== 파생 지표 =====
        
        # 가격 변화율 
        df_ta['PRICE_CHANGE'] = close.pct_change()
        df_ta['PRICE_CHANGE_2'] = close.pct_change(periods=2)
        df_ta['PRICE_CHANGE_5'] = close.pct_change(periods=5)
        df_ta['PRICE_CHANGE_10'] = close.pct_change(periods=10) 
        
        # 변동성 (Rolling Std)
        df_ta['VOLATILITY_5'] = close.pct_change().rolling(window=5).std()
        df_ta['VOLATILITY_10'] = close.pct_change().rolling(window=10).std()
        df_ta['VOLATILITY_20'] = close.pct_change().rolling(window=20).std()
        df_ta['VOLATILITY_30'] = close.pct_change().rolling(window=30).std() 
        
        # 모멘텀 (Price Ratio)
        df_ta['MOMENTUM_5'] = close / close.shift(5) - 1
        df_ta['MOMENTUM_10'] = close / close.shift(10) - 1
        df_ta['MOMENTUM_20'] = close / close.shift(20) - 1
        df_ta['MOMENTUM_30'] = close / close.shift(30) - 1  
        
        # 이동평균 대비 위치 
        df_ta['PRICE_VS_SMA10'] = close / df_ta['SMA_10'] - 1
        df_ta['PRICE_VS_SMA20'] = close / df_ta['SMA_20'] - 1
        df_ta['PRICE_VS_SMA50'] = close / df_ta['SMA_50'] - 1
        df_ta['PRICE_VS_SMA200'] = close / df_ta['SMA_200'] - 1
        df_ta['PRICE_VS_EMA12'] = close / df_ta['EMA_12'] - 1 
        df_ta['PRICE_VS_EMA26'] = close / df_ta['EMA_26'] - 1  
        
        # 크로스 신호 
        df_ta['SMA_CROSS_SIGNAL'] = (df_ta['SMA_10'] > df_ta['SMA_20']).astype(int)
        df_ta['SMA_GOLDEN_CROSS'] = (df_ta['SMA_50'] > df_ta['SMA_200']).astype(int) 
        df_ta['EMA_CROSS_SIGNAL'] = (df_ta['EMA_12'] > df_ta['EMA_26']).astype(int)
        
        # 거래량 지표
        df_ta['VOLUME_SMA_20'] = ta.sma(volume, length=20)
        df_ta['VOLUME_RATIO'] = volume / (df_ta['VOLUME_SMA_20'] + 1e-10)
        df_ta['VOLUME_CHANGE'] = volume.pct_change()
        df_ta['VOLUME_CHANGE_5'] = volume.pct_change(periods=5)  
        
        # Range 지표
        df_ta['HIGH_LOW_RANGE'] = (high - low) / (close + 1e-10)
        df_ta['HIGH_CLOSE_RANGE'] = np.abs(high - close.shift()) / (close + 1e-10)
        df_ta['CLOSE_LOW_RANGE'] = (close - low) / (close + 1e-10)
        
        # 일중 가격 위치 
        df_ta['INTRADAY_POSITION'] = (close - low) / ((high - low) + 1e-10)  
        
        # Linear Regression Slope
        try:
            df_ta['SLOPE_5'] = ta.linreg(close, length=5, slope=True)
            df_ta['SLOPE_10'] = ta.linreg(close, length=10, slope=True)
            df_ta['LINREG_14'] = ta.linreg(close, length=14)
        except:
            df_ta['SLOPE_5'] = close.rolling(window=5).apply(
                lambda x: np.polyfit(np.arange(len(x)), x, 1)[0] if len(x) == 5 else np.nan, raw=True
            )
            df_ta['SLOPE_10'] = close.rolling(window=10).apply(
                lambda x: np.polyfit(np.arange(len(x)), x, 1)[0] if len(x) == 10 else np.nan, raw=True
            )
        
        # Increasing/Decreasing 신호
        df_ta['INC_1'] = (close > close.shift(1)).astype(int)
        df_ta['DEC_1'] = (close < close.shift(1)).astype(int)
        df_ta['INC_3'] = (close > close.shift(3)).astype(int)
        df_ta['INC_5'] = (close > close.shift(5)).astype(int)  
        
        # BOP 
        df_ta['BOP'] = (close - open_) / ((high - low) + 1e-10)
        df_ta['BOP'] = df_ta['BOP'].fillna(0)
        
        # ===== 고급 파생 지표 =====
        
        # Bollinger Bands 관련 파생
        if 'BBL_20' in df_ta.columns and 'BBU_20' in df_ta.columns and 'BBM_20' in df_ta.columns:
            df_ta['BB_WIDTH'] = (df_ta['BBU_20'] - df_ta['BBL_20']) / (df_ta['BBM_20'] + 1e-8)
            df_ta['BB_POSITION'] = (close - df_ta['BBL_20']) / (df_ta['BBU_20'] - df_ta['BBL_20'] + 1e-8)
        else:
            print(f"    ⚠ Bollinger Bands 컬럼 미발견")
        
        # RSI 파생 (Overbought/Oversold)
        df_ta['RSI_OVERBOUGHT'] = (df_ta['RSI_14'] > 70).astype(int)
        df_ta['RSI_OVERSOLD'] = (df_ta['RSI_14'] < 30).astype(int)
        
        # MACD 히스토그램 변화율
        if 'MACDh_12_26_9' in df_ta.columns:
            df_ta['MACD_HIST_CHANGE'] = df_ta['MACDh_12_26_9'].diff()
        
        # Volume Profile (상대적 거래량 강도)
        df_ta['VOLUME_STRENGTH'] = volume / volume.rolling(window=50).mean()
        
        # Price Acceleration (2차 미분)
        df_ta['PRICE_ACCELERATION'] = close.pct_change().diff()
        
        # Gap (시가-전일종가)
        df_ta['GAP'] = (open_ - close.shift(1)) / (close.shift(1) + 1e-10)
        
        df_ta['ROLLING_MAX_20'] = close.rolling(window=20).max()
        df_ta['ROLLING_MIN_20'] = close.rolling(window=20).min()
        df_ta['DISTANCE_FROM_HIGH'] = (df_ta['ROLLING_MAX_20'] - close) / (df_ta['ROLLING_MAX_20'] + 1e-10)
        df_ta['DISTANCE_FROM_LOW'] = (close - df_ta['ROLLING_MIN_20']) / (close + 1e-10)

        # Realized Volatility 
        ret_squared = close.pct_change() ** 2
        df_ta['RV_5'] = ret_squared.rolling(5).sum()
        df_ta['RV_20'] = ret_squared.rolling(20).sum()
        df_ta['RV_RATIO'] = df_ta['RV_5'] / (df_ta['RV_20'] + 1e-10)
        
        # Fibonacci Pivots 
        high_20 = high.rolling(20).max()
        low_20 = low.rolling(20).min()
        diff = high_20 - low_20
        
        df_ta['FIB_0'] = high_20
        df_ta['FIB_236'] = high_20 - 0.236 * diff
        df_ta['FIB_382'] = high_20 - 0.382 * diff
        df_ta['FIB_500'] = high_20 - 0.500 * diff
        df_ta['FIB_618'] = high_20 - 0.618 * diff
        df_ta['FIB_1'] = low_20
        
        #Directional Change Events 
        df_ta['DC_EVENT'] = 0
        df_ta['DC_TYPE'] = 0
        
        threshold = 0.05
        last_extreme = close.iloc[0]
        last_type = 0
        
        for i in range(1, len(df_ta)):
            price = close.iloc[i]
            change = (price - last_extreme) / last_extreme
            
            if last_type <= 0 and change >= threshold:
                df_ta.loc[df_ta.index[i], 'DC_EVENT'] = 1
                df_ta.loc[df_ta.index[i], 'DC_TYPE'] = 1
                last_extreme = price
                last_type = 1
            elif last_type >= 0 and change <= -threshold:
                df_ta.loc[df_ta.index[i], 'DC_EVENT'] = 1
                df_ta.loc[df_ta.index[i], 'DC_TYPE'] = -1
                last_extreme = price
                last_type = -1
        
        
        added = df_ta.shape[1] - df.shape[1]

                
    except Exception as e:
        print(f"\n❌ Error: {e}")
        import traceback
        traceback.print_exc()

    return df_ta


def add_enhanced_cross_crypto_features(df):
    df_enhanced = df.copy()

    df_enhanced['eth_return'] = df['ETH_Close'].pct_change()
    df_enhanced['btc_return'] = df['BTC_Close'].pct_change()

    for lag in [1, 2, 3, 5, 10]:
        df_enhanced[f'btc_return_lag{lag}'] = df_enhanced['btc_return'].shift(lag)

    for window in [3, 7, 14, 30, 60]:
        df_enhanced[f'eth_btc_corr_{window}d'] = (
            df_enhanced['eth_return'].rolling(window).corr(df_enhanced['btc_return'])
        )

    eth_vol = df_enhanced['eth_return'].abs()
    btc_vol = df_enhanced['btc_return'].abs()

    for window in [7, 14, 30]:
        df_enhanced[f'eth_btc_volcorr_{window}d'] = eth_vol.rolling(window).corr(btc_vol)
        df_enhanced[f'eth_btc_volcorr_sq_{window}d'] = (
            (df_enhanced['eth_return']**2).rolling(window).corr(df_enhanced['btc_return']**2)
        )

    df_enhanced['btc_eth_strength_ratio'] = (
        df_enhanced['btc_return'] / (df_enhanced['eth_return'].abs() + 1e-8)
    )
    df_enhanced['btc_eth_strength_ratio_7d'] = (
        df_enhanced['btc_eth_strength_ratio'].rolling(7).mean()
    )

    alt_returns = []
    for coin in ['BNB', 'XRP', 'SOL', 'ADA']:
        if f'{coin}_Close' in df.columns:
            alt_returns.append(df[f'{coin}_Close'].pct_change())

    if alt_returns:
        market_return = pd.concat(
            alt_returns + [df_enhanced['eth_return'], df_enhanced['btc_return']], axis=1
        ).mean(axis=1)
        df_enhanced['btc_dominance'] = df_enhanced['btc_return'] / (market_return + 1e-8)

    for window in [30, 60, 90]:
        covariance = df_enhanced['eth_return'].rolling(window).cov(df_enhanced['btc_return'])
        btc_variance = df_enhanced['btc_return'].rolling(window).var()
        df_enhanced[f'eth_btc_beta_{window}d'] = covariance / (btc_variance + 1e-8)

    df_enhanced['eth_btc_spread'] = df_enhanced['eth_return'] - df_enhanced['btc_return']
    df_enhanced['eth_btc_spread_ma7'] = df_enhanced['eth_btc_spread'].rolling(7).mean()
    df_enhanced['eth_btc_spread_std7'] = df_enhanced['eth_btc_spread'].rolling(7).std()

    btc_vol_ma = btc_vol.rolling(30).mean()
    high_vol_mask = btc_vol > btc_vol_ma

    df_enhanced['eth_btc_corr_highvol'] = np.nan
    df_enhanced['eth_btc_corr_lowvol'] = np.nan

    for i in range(30, len(df_enhanced)):
        window_data = df_enhanced.iloc[i-30:i]
        high_vol_data = window_data[high_vol_mask.iloc[i-30:i]]
        low_vol_data = window_data[~high_vol_mask.iloc[i-30:i]]

        if len(high_vol_data) > 5:
            df_enhanced.loc[df_enhanced.index[i], 'eth_btc_corr_highvol'] = (
                high_vol_data['eth_return'].corr(high_vol_data['btc_return'])
            )
        if len(low_vol_data) > 5:
            df_enhanced.loc[df_enhanced.index[i], 'eth_btc_corr_lowvol'] = (
                low_vol_data['eth_return'].corr(low_vol_data['btc_return'])
            )

    return df_enhanced


def remove_raw_prices_and_transform(df):
    df_transformed = df.copy()

    if 'eth_log_return' not in df_transformed.columns:
        df_transformed['eth_log_return'] = np.log(df['ETH_Close'] / df['ETH_Close'].shift(1))
    if 'eth_intraday_range' not in df_transformed.columns:
        df_transformed['eth_intraday_range'] = (df['ETH_High'] - df['ETH_Low']) / (df['ETH_Close'] + 1e-8)
    if 'eth_body_ratio' not in df_transformed.columns:
        df_transformed['eth_body_ratio'] = (df['ETH_Close'] - df['ETH_Open']) / (df['ETH_Close'] + 1e-8)
    if 'eth_close_position' not in df_transformed.columns:
        df_transformed['eth_close_position'] = (
            (df['ETH_Close'] - df['ETH_Low']) / (df['ETH_High'] - df['ETH_Low'] + 1e-8)
        )

    if 'BTC_Close' in df_transformed.columns:
        if 'btc_log_return' not in df_transformed.columns:
            df_transformed['btc_log_return'] = np.log(df['BTC_Close'] / df['BTC_Close'].shift(1))
        for period in [5, 10, 20, 30]:
            col_name = f'btc_return_{period}d'
            if col_name not in df_transformed.columns:
                df_transformed[col_name] = np.log(df['BTC_Close'] / df['BTC_Close'].shift(period)).fillna(0)
        for period in [7, 14, 30]:
            col_name = f'btc_volatility_{period}d'
            if col_name not in df_transformed.columns:
                df_transformed[col_name] = (
                    df_transformed['btc_log_return'].rolling(period, min_periods=max(3, period//3)).std()
                ).fillna(0)
        if 'btc_intraday_range' not in df_transformed.columns:
            df_transformed['btc_intraday_range'] = (df['BTC_High'] - df['BTC_Low']) / (df['BTC_Close'] + 1e-8)
        if 'btc_body_ratio' not in df_transformed.columns:
            df_transformed['btc_body_ratio'] = (df['BTC_Close'] - df['BTC_Open']) / (df['BTC_Close'] + 1e-8)

        if 'BTC_Volume' in df.columns:
            btc_volume = df['BTC_Volume']
            if 'btc_volume_change' not in df_transformed.columns:
                df_transformed['btc_volume_change'] = btc_volume.pct_change().fillna(0)
            if 'btc_volume_ratio_20d' not in df_transformed.columns:
                volume_ma20 = btc_volume.rolling(20, min_periods=5).mean()
                df_transformed['btc_volume_ratio_20d'] = (btc_volume / (volume_ma20 + 1e-8)).fillna(1)
            if 'btc_volume_volatility_30d' not in df_transformed.columns:
                df_transformed['btc_volume_volatility_30d'] = (
                    btc_volume.pct_change().rolling(30, min_periods=10).std()
                ).fillna(0)
            if 'btc_obv' not in df_transformed.columns:
                btc_close = df['BTC_Close']
                obv = np.where(btc_close > btc_close.shift(1), btc_volume,
                               np.where(btc_close < btc_close.shift(1), -btc_volume, 0))
                df_transformed['btc_obv'] = pd.Series(obv, index=df.index).cumsum().fillna(0)
            if 'btc_volume_price_corr_30d' not in df_transformed.columns:
                df_transformed['btc_volume_price_corr_30d'] = (
                    btc_volume.pct_change().rolling(30, min_periods=10).corr(
                        df_transformed['btc_log_return']
                    )
                ).fillna(0)

    altcoins = ['BNB', 'XRP', 'SOL', 'ADA', 'DOGE', 'AVAX', 'DOT']
    for coin in altcoins:
        if f'{coin}_Close' in df_transformed.columns:
            col_name = f'{coin.lower()}_return'
            if col_name not in df_transformed.columns:
                df_transformed[col_name] = np.log(df[f'{coin}_Close'] / df[f'{coin}_Close'].shift(1)).fillna(0)
            vol_col = f'{coin.lower()}_volatility_30d'
            if vol_col not in df_transformed.columns:
                df_transformed[vol_col] = (
                    df_transformed[col_name].rolling(30, min_periods=10).std()
                ).fillna(0)
            if f'{coin}_Volume' in df.columns:
                coin_volume = df[f'{coin}_Volume']
                volume_change_col = f'{coin.lower()}_volume_change'
                if volume_change_col not in df_transformed.columns:
                    df_transformed[volume_change_col] = coin_volume.pct_change().fillna(0)
                volume_ratio_col = f'{coin.lower()}_volume_ratio_20d'
                if volume_ratio_col not in df_transformed.columns:
                    volume_ma20 = coin_volume.rolling(20, min_periods=5).mean()
                    df_transformed[volume_ratio_col] = (coin_volume / (volume_ma20 + 1e-8)).fillna(1)

    if 'ETH_Volume' in df.columns and 'BTC_Volume' in df.columns:
        eth_volume = df['ETH_Volume']
        btc_volume = df['BTC_Volume']
        if 'eth_btc_volume_corr_30d' not in df_transformed.columns:
            df_transformed['eth_btc_volume_corr_30d'] = (
                eth_volume.pct_change().rolling(30, min_periods=10).corr(
                    btc_volume.pct_change()
                )
            ).fillna(0)
        if 'eth_btc_volume_ratio' not in df_transformed.columns:
            df_transformed['eth_btc_volume_ratio'] = (
                eth_volume / (btc_volume + 1e-8)
            ).fillna(0)
        if 'eth_btc_volume_ratio_ma30' not in df_transformed.columns:
            df_transformed['eth_btc_volume_ratio_ma30'] = (
                df_transformed['eth_btc_volume_ratio'].rolling(30, min_periods=10).mean()
            ).fillna(0)

    remove_patterns = ['_Close', '_Open', '_High', '_Low', '_Volume']
    cols_to_remove = [
        col for col in df_transformed.columns
        if any(p in col for p in remove_patterns)
        and not any(d in col.lower() for d in ['_lag', '_position', '_ratio', '_range', '_change', '_corr', '_volatility', '_obv'])
    ]
    df_transformed.drop(cols_to_remove, axis=1, inplace=True)

    return_cols = [
        col for col in df_transformed.columns
        if 'return' in col.lower() and 'next' not in col
    ]
    if return_cols:
        df_transformed[return_cols] = df_transformed[return_cols].fillna(0)

    return df_transformed


In [5]:
# ============================================================================
# 2. Lag 적용
# ============================================================================
def apply_lag_features(df, news_lag=2, onchain_lag=1):
    """
    Lag 피처 적용 (원본 유지 + lag 추가)
    
    핵심 원칙:
    1. 원본(lag0) 피처는 그대로 유지
    2. lag1, lag2 피처를 추가로 생성
    3. 이동평균/차분은 lag 불필요 (이미 과거 참조)
    4. 이벤트는 lag 없음 (당일 반영)
    
    출처: "Seeing Beyond Noise" (2024), scikit-learn
    """
    df_lagged = df.copy()
    
    # ===== Lag 적용 대상: 원본 감성 지표만 =====
    raw_sentiment_cols = [
        'sentiment_mean', 'sentiment_std', 'sentiment_sum',
        'news_count', 'positive_ratio', 'negative_ratio',
        'sentiment_polarity', 'sentiment_intensity', 
        'sentiment_disagreement', 'bull_bear_ratio',
        'weighted_sentiment', 'extremity_index',
        'extreme_positive_count', 'extreme_negative_count'
    ]
    
    # ===== Lag 제외: 이동평균, 차분 (이미 과거 참조) =====
    no_lag_patterns = [
        '_ma', '_volatility_', '_trend', '_acceleration', 
        '_volume_change', '_volume_ma'
    ]
    
    # ===== 온체인 데이터 =====
    onchain_cols = [col for col in df.columns if any(keyword in col.lower() 
                    for keyword in ['eth_tx', 'eth_active', 'eth_new', 
                                  'eth_large', 'eth_token', 'eth_contract',
                                  'eth_avg_gas', 'eth_total_gas', 
                                  'eth_avg_block'])]
    
    # ===== 기타 외부 변수 =====
    other_cols = [col for col in df.columns if any(keyword in col.lower() 
                  for keyword in ['tvl', 'funding', 'lido_', 'aave_', 'makerdao_', 
                                'chain_', 'usdt_', 'sp500_', 'vix_', 'gold_', 'dxy_', 'fg_'])]
    
    # ===== 제외 컬럼 =====
    exclude_cols = ['ETH_Close', 'ETH_High', 'ETH_Low', 'ETH_Open','date']
    exclude_cols.extend([col for col in df.columns if 'event_' in col or 'period_' in col])
    exclude_cols.extend([col for col in df.columns if '_lag' in col])
    
    lag_count = 0
    
    # ===== 1. 원본 감성 지표에만 lag 적용 =====
    for col in raw_sentiment_cols:
        if col in df.columns:
            is_derived = any(pattern in col for pattern in no_lag_patterns)
            
            if not is_derived:
                for lag in range(1, news_lag):
                    new_col = f"{col}_lag{lag}"
                    df_lagged[new_col] = df[col].shift(lag)
                    lag_count += 1
    
    # ===== 2. 온체인 lag =====
    onchain_lag_count = 0
    for col in onchain_cols:
        if col not in exclude_cols:
            df_lagged[f"{col}_lag1"] = df[col].shift(onchain_lag)
            onchain_lag_count += 1
    
    # ===== 3. 기타 외부 변수 lag  =====
    other_lag_count = 0
    for col in other_cols:
        if col not in exclude_cols:
            df_lagged[f"{col}_lag1"] = df[col].shift(1)
            other_lag_count += 1
    
    total_lag = lag_count + onchain_lag_count + other_lag_count
    
    return df_lagged


def add_price_lag_features_first(df):
    """
    과거 가격을 피처로 추가 
    """
    df_new = df.copy()
    close = df['ETH_Close']
    high = df['ETH_High']
    low = df['ETH_Low']
    volume = df['ETH_Volume']
    
    # 과거 종가 
    for lag in [1, 2, 3, 5, 7, 14, 21, 30]:
        df_new[f'close_lag{lag}'] = close.shift(lag)
    
    # 과거 고가/저가
    for lag in [1, 2, 3, 5, 7]:
        df_new[f'high_lag{lag}'] = high.shift(lag)
        df_new[f'low_lag{lag}'] = low.shift(lag)
    
    # 과거 거래량
    for lag in [1, 2, 3, 5, 7]:
        df_new[f'volume_lag{lag}'] = volume.shift(lag)
    
    # 과거 수익률
    for lag in [1, 2, 3, 5, 7]:
        df_new[f'return_lag{lag}'] = close.pct_change(periods=lag).shift(1)
    
    # 과거 가격 비율
    for lag in [1, 7, 30]:
        df_new[f'close_ratio_lag{lag}'] = close / close.shift(lag)
    
    added = df_new.shape[1] - df.shape[1]
    
    return df_new


# ============================================================================
# 3. 타겟 변수 생성
# ============================================================================

def create_targets(df):
    """타겟 변수 생성"""
    df_target = df.copy()
    close = df['ETH_Close']

    # 내일 종가
    next_close = close.shift(-1)
    
    # 오늘 → 내일 로그 수익률
    df_target['next_log_return'] = np.log(next_close / close)
    
    # 오늘 → 내일 방향성
    df_target['next_direction'] = (next_close > close).astype(int)
    
    # 내일 실제 종가
    df_target['next_close'] = next_close   
    
    return df_target



In [6]:
def add_temporal_cyclic_features(df):
    """
    시간 주기성 특징 추가 
    
    Reference:
    - "The Importance of Time-Based Cyclic Features" (2025)
    - "Feature engineering for time-series data" (Statsig, 2025)
    """
    df_temporal = df.copy()
    
    # 기본 시간 특징
    df_temporal['day_of_week'] = df['date'].dt.dayofweek
    df_temporal['day_of_month'] = df['date'].dt.day
    df_temporal['month'] = df['date'].dt.month
    df_temporal['quarter'] = df['date'].dt.quarter
    df_temporal['week_of_year'] = df['date'].dt.isocalendar().week
    
    # 월말/월초 효과 
    df_temporal['is_month_start'] = (df['date'].dt.is_month_start).astype(int)
    df_temporal['is_month_end'] = (df['date'].dt.is_month_end).astype(int)
    df_temporal['is_quarter_start'] = (df['date'].dt.is_quarter_start).astype(int)
    df_temporal['is_quarter_end'] = (df['date'].dt.is_quarter_end).astype(int)
    
    # 주말 효과 
    df_temporal['is_weekend'] = (df['date'].dt.dayofweek >= 5).astype(int)
    
    # Cyclical Encoding (Sine/Cosine for periodicity)
    df_temporal['day_of_week_sin'] = np.sin(2 * np.pi * df_temporal['day_of_week'] / 7)
    df_temporal['day_of_week_cos'] = np.cos(2 * np.pi * df_temporal['day_of_week'] / 7)
    df_temporal['month_sin'] = np.sin(2 * np.pi * df_temporal['month'] / 12)
    df_temporal['month_cos'] = np.cos(2 * np.pi * df_temporal['month'] / 12)
    df_temporal['day_of_month_sin'] = np.sin(2 * np.pi * df_temporal['day_of_month'] / 31)
    df_temporal['day_of_month_cos'] = np.cos(2 * np.pi * df_temporal['day_of_month'] / 31)
    
    added = df_temporal.shape[1] - df.shape[1]
    
    return df_temporal


def add_interaction_features(df):
    """
    고차원 상호작용 특징 추가
    
    Reference:
    - "Optimizing Forecast Accuracy" (2025): Momentum × Volatility 상호작용 중요
    - "Causal Feature Engineering" (2023): 특징 조합이 단일 특징보다 예측력 높음
    """
    df_interact = df.copy()
    
    # 1. RSI × Volume
    if 'RSI_14' in df.columns and 'VOLUME_RATIO' in df.columns:
        df_interact['RSI_Volume_Strength'] = df['RSI_14'] * df['VOLUME_RATIO']
    
    # 2. Bollinger Band Position × Sentiment
    if 'BB_POSITION' in df.columns and 'sentiment_polarity' in df.columns:
        df_interact['BB_Sentiment_Consensus'] = df['BB_POSITION'] * df['sentiment_polarity']
    
    # 3. VIX × ETH Volatility
    if 'vix_VIX' in df.columns and 'VOLATILITY_20' in df.columns:
        df_interact['VIX_ETH_Vol_Cross'] = df['vix_VIX'] * df['VOLATILITY_20']
    
    # 4. MACD × Volume
    if 'MACD_12_26_9' in df.columns and 'VOLUME_RATIO' in df.columns:
        df_interact['MACD_Volume_Momentum'] = df['MACD_12_26_9'] * df['VOLUME_RATIO']
    
    # 5. BTC Return × ETH-BTC Correlation
    if 'btc_return' in df.columns and 'eth_btc_corr_30d' in df.columns:
        df_interact['BTC_Weighted_Impact'] = df['btc_return'] * df['eth_btc_corr_30d']
    
    # 6. Sentiment × News Volume
    if 'sentiment_polarity' in df.columns and 'news_count' in df.columns:
        df_interact['Sentiment_Volume_Intensity'] = df['sentiment_polarity'] * np.log1p(df['news_count'])
    
    # 7. ATR × Volume Ratio
    if 'ATR_14' in df.columns and 'VOLUME_RATIO' in df.columns:
        df_interact['Liquidity_Risk'] = df['ATR_14'] * (1 / (df['VOLUME_RATIO'] + 1e-8))
    
    # 8. RSI Overbought × High Volume
    if 'RSI_OVERBOUGHT' in df.columns and 'VOLUME_RATIO' in df.columns:
        df_interact['Overbought_High_Volume'] = df['RSI_OVERBOUGHT'] * (df['VOLUME_RATIO'] > 1.5).astype(int)
    
    # 9. Golden Cross × Positive Sentiment
    if 'SMA_GOLDEN_CROSS' in df.columns and 'sentiment_polarity' in df.columns:
        df_interact['Golden_Sentiment_Align'] = df['SMA_GOLDEN_CROSS'] * (df['sentiment_polarity'] > 0).astype(int)
    
    # 10. Price Acceleration × Momentum
    if 'PRICE_ACCELERATION' in df.columns and 'MOMENTUM_10' in df.columns:
        df_interact['Acceleration_Momentum'] = df['PRICE_ACCELERATION'] * df['MOMENTUM_10']
    
    added = df_interact.shape[1] - df.shape[1]
    
    return df_interact


def add_volatility_regime_features(df):
    """
    변동성 체제 특징 추가
    
    Reference:
    - "Intraday trading of cryptocurrencies" (2023): 변동성 체제별 예측 정확도 차이 존재

    """
    df_regime = df.copy()
    
    if 'VOLATILITY_20' in df.columns:
        # 1. 고변동성 vs 저변동성 
        vol_median = df['VOLATILITY_20'].rolling(60, min_periods=20).median()
        df_regime['vol_regime_high'] = (df['VOLATILITY_20'] > vol_median).astype(int)
        
        # 2. 변동성 급증 이벤트
        vol_mean = df['VOLATILITY_20'].rolling(30, min_periods=10).mean()
        vol_std = df['VOLATILITY_20'].rolling(30, min_periods=10).std()
        df_regime['vol_spike'] = (df['VOLATILITY_20'] > vol_mean + 2 * vol_std).astype(int)
        
        # 3. 변동성 백분위수
        df_regime['vol_percentile_90d'] = df['VOLATILITY_20'].rolling(90, min_periods=30).apply(
            lambda x: (x.iloc[-1] > x).sum() / len(x) if len(x) > 0 else 0.5
        )
        
        # 4. 변동성 추세
        df_regime['vol_trend'] = df['VOLATILITY_20'].pct_change(5)
        
        # 5. 변동성 체제 지속기간
        df_regime['vol_regime_duration'] = df_regime.groupby(
            (df_regime['vol_regime_high'] != df_regime['vol_regime_high'].shift()).cumsum()
        ).cumcount() + 1

    added = df_regime.shape[1] - df.shape[1]
    
    return df_regime


def add_normalized_price_lags(df):
    """
    정규화된 가격 Lag 특징 추가 (분류 모델용)
    
    Reference:
    - "Financial Forecasting with ML: Price vs Return" (2021)
    - 분류 문제에서 절대 가격보다 비율이 2-3배 더 예측력 높음
    """
    df_norm = df.copy()
    
    if 'ETH_Close' in df.columns:
        current_close = df['ETH_Close']
    else:
        return df_norm
    
    # 1. 가격 Lag를 현재 가격 대비 비율로 변환
    lag_cols = [col for col in df.columns if 'close_lag' in col and col.replace('close_lag', '').isdigit()]
    
    for col in lag_cols:
        lag_num = col.replace('close_lag', '')
        df_norm[f'close_lag{lag_num}_ratio'] = df[col] / (current_close + 1e-8)
        
        next_lag_col = f'close_lag{int(lag_num)+1}'
        if next_lag_col in df.columns:
            df_norm[f'close_lag{lag_num}_logret'] = np.log(df[col] / (df[next_lag_col] + 1e-8))
    
    # 2. High/Low Lag를 Close 대비 비율
    for col in df.columns:
        if 'high_lag' in col:
            lag_num = col.replace('high_lag', '')
            df_norm[f'high_lag{lag_num}_ratio'] = df[col] / (current_close + 1e-8)
        
        if 'low_lag' in col:
            lag_num = col.replace('low_lag', '')
            df_norm[f'low_lag{lag_num}_ratio'] = df[col] / (current_close + 1e-8)
    
    added = df_norm.shape[1] - df.shape[1]

    return df_norm


def add_cumulative_streak_features(df):
    """
    누적 및 연속 패턴 특징 추가
    
    Reference:
    - "Feature engineering for time-series" (2025): 연속 패턴은 모멘텀 지속성 예측에 핵심
    """
    df_cum = df.copy()
    
    if 'eth_log_return' in df.columns:
        returns = df['eth_log_return']
        
        # 1. 연속 상승 일수
        df_cum['consecutive_up_days'] = (returns > 0).astype(int).groupby(
            (returns <= 0).cumsum()
        ).cumsum()
        
        # 2. 연속 하락 일수
        df_cum['consecutive_down_days'] = (returns < 0).astype(int).groupby(
            (returns >= 0).cumsum()
        ).cumsum()
        
        # 3. 최근 20일 내 최대 연속 상승
        df_cum['max_consecutive_up_20d'] = df_cum['consecutive_up_days'].rolling(20, min_periods=5).max()
        
        # 4. 최근 20일 내 최대 연속 하락
        df_cum['max_consecutive_down_20d'] = df_cum['consecutive_down_days'].rolling(20, min_periods=5).max()
        
        # 5. 누적 수익률 (20일)
        df_cum['cumulative_return_20d'] = returns.rolling(20, min_periods=5).sum()
        
        # 6. 상승/하락 비율 (20일 내)
        df_cum['up_down_ratio_20d'] = (
            (returns > 0).rolling(20, min_periods=5).sum() / 
            ((returns < 0).rolling(20, min_periods=5).sum() + 1e-8)
        )

    added = df_cum.shape[1] - df.shape[1]
    
    return df_cum


def add_percentile_features(df):
    """

    Reference:
    - "Optimizing Forecast Accuracy" (2025): 백분위수 특징이 상대적 위치 파악에 효과적
    """
    df_pct = df.copy()
    
    # 1. 가격 백분위수 (250일)
    if 'ETH_Close' in df.columns:
        df_pct['price_percentile_250d'] = df['ETH_Close'].rolling(250, min_periods=60).apply(
            lambda x: (x.iloc[-1] > x).sum() / len(x) if len(x) > 0 else 0.5
        )
    
    # 2. 거래량 백분위수 (90일)
    if 'ETH_Volume' in df.columns:
        df_pct['volume_percentile_90d'] = df['ETH_Volume'].rolling(90, min_periods=30).apply(
            lambda x: (x.iloc[-1] > x).sum() / len(x) if len(x) > 0 else 0.5
        )
    
    # 3. RSI 백분위수 (60일)
    if 'RSI_14' in df.columns:
        df_pct['RSI_percentile_60d'] = df['RSI_14'].rolling(60, min_periods=20).apply(
            lambda x: (x.iloc[-1] > x).sum() / len(x) if len(x) > 0 else 0.5
        )
    
    added = df_pct.shape[1] - df.shape[1]
    
    return df_pct


def handle_missing_values_paper_based(df_clean, train_start_date, is_train=True, train_stats=None):
    """
    암호화폐 시계열 결측치 처리
    
    참고문헌:
    1. "Quantifying Cryptocurrency Unpredictability" (2025)

    2. "Time Series Data Forecasting" 
    
    3. "Dealing with Leaky Missing Data in Production" (2021)

    """
    
    # ===== 1. Lookback 제거 =====
    if isinstance(train_start_date, str):
        train_start_date = pd.to_datetime(train_start_date)
    
    before = len(df_clean)
    df_clean = df_clean[df_clean['date'] >= train_start_date].reset_index(drop=True)
    
    # ===== 2. Feature 컬럼 선택 =====
    target_cols = ['next_log_return', 'next_direction', 'next_close']
    feature_cols = [col for col in df_clean.columns 
                   if col not in target_cols + ['date']]
    
    # ===== 3. 결측 확인 =====
    missing_before = df_clean[feature_cols].isnull().sum().sum()
    
    # ===== 4. FFill → 0 =====
    df_clean[feature_cols] = df_clean[feature_cols].fillna(method='ffill')
    df_clean[feature_cols] = df_clean[feature_cols].fillna(0)
    
    missing_after = df_clean[feature_cols].isnull().sum().sum()
    
    # ===== 5. 무한대 처리 =====
    inf_count = 0
    for col in feature_cols:
        if np.isinf(df_clean[col]).sum() > 0:
            inf_count += np.isinf(df_clean[col]).sum()
            df_clean[col] = df_clean[col].replace([np.inf, -np.inf], np.nan)
            df_clean[col] = df_clean[col].fillna(method='ffill').fillna(0)
    
    # ===== 6. 최종 확인 =====
    final_missing = df_clean[feature_cols].isnull().sum().sum()
    
    if final_missing > 0:
        df_clean[feature_cols] = df_clean[feature_cols].fillna(0)
    
    
    if is_train:
        return df_clean, {}
    else:
        return df_clean

In [7]:

from sklearn.feature_selection import mutual_info_classif, mutual_info_regression, RFE
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from lightgbm import LGBMClassifier, LGBMRegressor
import numpy as np
from collections import Counter

def select_features_multi_target(X_train, y_train, target_type='direction', top_n=40):
    """
    Multi-Target Feature Selection
    
    5가지 케이스별 최적화된 feature selection:
    1. direction (분류)
    2. return (회귀)  
    3. price (회귀)
    4. direction_return (혼합)
    5. direction_price (혼합)
    
    Reference:
    - "Multi-target HSIC-Lasso" (2024)
    - "Feature selection for multi-target regression" (2021)
    """

    
    if target_type == 'direction':
        # 순수 분류
        selected, stats = select_features_verified(
            X_train, 
            y_train['next_direction'], 
            task='class', 
            top_n=top_n
        )
        
    elif target_type == 'return':
        # 순수 회귀 (수익률)
        selected, stats = select_features_verified(
            X_train, 
            y_train['next_log_return'], 
            task='reg', 
            top_n=top_n
        )
        
    elif target_type == 'price':
        # 순수 회귀 (가격)
        selected, stats = select_features_verified(
            X_train, 
            y_train['next_close'], 
            task='reg', 
            top_n=top_n
        )
        
    elif target_type == 'direction_return':
        # 혼합: 분류 + 회귀 (방향 + 수익률)
        print("\n[Hybrid] Direction (50%) + Return (50%)")
        
        # 각각 절반씩 선택
        dir_features, dir_stats = select_features_verified(
            X_train, 
            y_train['next_direction'], 
            task='class', 
            top_n=top_n // 2,
            verbose=False
        )
        
        ret_features, ret_stats = select_features_verified(
            X_train, 
            y_train['next_log_return'], 
            task='reg', 
            top_n=top_n // 2,
            verbose=False
        )
        
        # 합집합으로 결합 (중복 제거)
        selected = list(dict.fromkeys(dir_features + ret_features))
        
        # 부족하면 MI 스코어 높은 순으로 추가
        if len(selected) < top_n:
            all_mi_scores = {**dir_stats['mi_scores'], **ret_stats['mi_scores']}
            sorted_features = sorted(all_mi_scores.items(), key=lambda x: x[1], reverse=True)
            
            for feat, _ in sorted_features:
                if feat not in selected:
                    selected.append(feat)
                    if len(selected) >= top_n:
                        break
        
        # 너무 많으면 자르기
        selected = selected[:top_n]
        
        stats = {
            'dir_stats': dir_stats,
            'ret_stats': ret_stats,
            'overlap': len(set(dir_features) & set(ret_features))
        }
        
        
    elif target_type == 'direction_price':
        # 혼합: 분류 + 회귀 (방향 + 가격)
        print("\n[Hybrid] Direction (50%) + Price (50%)")
        
        dir_features, dir_stats = select_features_verified(
            X_train, 
            y_train['next_direction'], 
            task='class', 
            top_n=top_n // 2,
            verbose=False
        )
        
        price_features, price_stats = select_features_verified(
            X_train, 
            y_train['next_close'], 
            task='reg', 
            top_n=top_n // 2,
            verbose=False
        )
        
        selected = list(dict.fromkeys(dir_features + price_features))
        
        if len(selected) < top_n:
            all_mi_scores = {**dir_stats['mi_scores'], **price_stats['mi_scores']}
            sorted_features = sorted(all_mi_scores.items(), key=lambda x: x[1], reverse=True)
            
            for feat, _ in sorted_features:
                if feat not in selected:
                    selected.append(feat)
                    if len(selected) >= top_n:
                        break
        
        selected = selected[:top_n]
        
        stats = {
            'dir_stats': dir_stats,
            'price_stats': price_stats,
            'overlap': len(set(dir_features) & set(price_features))
        }
        
    else:
        raise ValueError(f"Unknown target_type: {target_type}")
    
    print("선택된 지표들")
    print(", ".join(selected))
    return selected, stats


def select_features_verified(X_train, y_train, task='class', top_n=40, verbose=True):
    """
    검증된 Feature Selection 방법 (2025 연구 기반)
    
    핵심 원칙:
    1. 하이퍼파라미터 튜닝 없이 기본 파라미터 사용
    2. MI + RFE + RF Importance 앙상블
    3. 빠른 실행 속도
    
    Reference:
    - "Optimizing Forecast Accuracy in Cryptocurrency Markets" (2025)
    - "Feature Selection After Split" (Reddit, 2022)
    
    """
    
    if task == 'class':
        mi_scores = mutual_info_classif(X_train, y_train, random_state=42)
    else:
        mi_scores = mutual_info_regression(X_train, y_train, random_state=42)
    
    mi_idx = np.argsort(mi_scores)[::-1][:top_n]
    mi_features = X_train.columns[mi_idx].tolist()
    
    
    # 기본 파라미터만 사용 
    if task == 'class':
        estimator = LGBMClassifier(
            n_estimators=100,
            learning_rate=0.05,
            max_depth=5,
            random_state=42,
            verbose=-1
        )
    else:
        estimator = LGBMRegressor(
            n_estimators=100,
            learning_rate=0.05,
            max_depth=5,
            random_state=42,
            verbose=-1
        )
    
    rfe = RFE(
        estimator=estimator,
        n_features_to_select=top_n,
        step=0.1,  # 10%씩 제거
        verbose=0
    )
    
    rfe.fit(X_train, y_train)
    rfe_features = X_train.columns[rfe.support_].tolist()

    
    if task == 'class':
        rf_model = RandomForestClassifier(
            n_estimators=100,
            max_depth=10,
            random_state=42,
            n_jobs=-1
        )
    else:
        rf_model = RandomForestRegressor(
            n_estimators=100,
            max_depth=10,
            random_state=42,
            n_jobs=-1
        )
    
    rf_model.fit(X_train, y_train)
    rf_importances = rf_model.feature_importances_
    rf_idx = np.argsort(rf_importances)[::-1][:top_n]
    rf_features = X_train.columns[rf_idx].tolist()
    all_features = mi_features + rfe_features + rf_features
    feature_votes = Counter(all_features)
    selected_features = [feat for feat, _ in feature_votes.most_common(top_n)]

    if len(selected_features) < top_n:
        remaining = top_n - len(selected_features)
        for feat in mi_features:
            if feat not in selected_features:
                selected_features.append(feat)
                remaining -= 1
                if remaining == 0:
                    break
    
    return selected_features, {
        'mi_features': mi_features,
        'rfe_features': rfe_features,
        'rf_features': rf_features,
        'feature_votes': feature_votes,
        'mi_scores': dict(zip(X_train.columns, mi_scores)),
        'rf_importances': dict(zip(X_train.columns, rf_importances))
    }

# ============================================================================
# 전체 파이프라인 
# ============================================================================

def build_complete_pipeline_corrected(df_raw, train_start_date, 
                                     method='tvt', target_type='direction', **kwargs):
    """

    1. Feature Engineering (전체 데이터)
    2. Target 생성 (전체 데이터)  
    3. Train/Val/Test Split
    4. Missing Value Handling 
    5. Feature Selection 
    6. Scaling (Train에서만 Fit)
    
    Reference:
    - "Feature Selection After Split" (Stack Overflow, 2019)
    - "Scaling After Feature Selection" (Reddit, 2023)
    """
    
    df = df_raw.copy()
    
    # Target 생성 
    df = create_targets(df)
    
    # 과거 가격 Lag
    df = add_price_lag_features_first(df)
    
    # 기술적 지표
    df = calculate_technical_indicators(df)
    
    # 시간 주기성
    df = add_temporal_cyclic_features(df)
    
    # BTC-ETH 교차 특징
    df = add_enhanced_cross_crypto_features(df)
    
    # 변동성 체제
    df = add_volatility_regime_features(df)
    
    # 상호작용 특징
    df = add_interaction_features(df)
    
    # 누적/연속 특징
    df = add_cumulative_streak_features(df)
    
    # 백분위수 특징
    df = add_percentile_features(df)
    
    # 정규화 가격 Lag
    df = add_normalized_price_lags(df)
    
    # Raw 가격 제거
    df = remove_raw_prices_and_transform(df)
    
    # Lag 적용 (감성, 온체인)
    df = apply_lag_features(df, news_lag=2, onchain_lag=1)


    # 1. 원본 VIX 확인
    if 'vix_VIX' in df.columns:
        vix_missing = df['vix_VIX'].isnull().sum()

    # 2. VOLATILITY_20 확인
    if 'VOLATILITY_20' in df.columns:
        vol_missing = df['VOLATILITY_20'].isnull().sum()

    # 3. 상호작용 특징 확인
    if 'VIX_ETH_Vol_Cross' in df.columns:
        cross_missing = df['VIX_ETH_Vol_Cross'].isnull().sum()


    # 4. Lag 적용 후 확인
    if 'VIX_ETH_Vol_Cross_lag1' in df.columns:
        cross_lag_missing = df['VIX_ETH_Vol_Cross_lag1'].isnull().sum()


    pd.set_option('display.max_columns', None)
    column_list = df.columns.tolist()
    df = df.iloc[:-1]  
    split_kwargs = {}
    if method == 'tvt':
        if 'train_ratio' in kwargs:
            split_kwargs['train_ratio'] = kwargs['train_ratio']
        if 'val_ratio' in kwargs:
            split_kwargs['val_ratio'] = kwargs['val_ratio']
        splits = split_tvt_method(df, train_start_date, **split_kwargs)
    elif method == 'walk_forward':
        if 'n_splits' in kwargs:
            split_kwargs['n_splits'] = kwargs['n_splits']
        if 'initial_train_size' in kwargs:
            split_kwargs['initial_train_size'] = kwargs['initial_train_size']
        if 'test_size' in kwargs:
            split_kwargs['test_size'] = kwargs['test_size']
        splits = split_walk_forward_method(df, train_start_date, **split_kwargs)
    else:
        raise ValueError(f"Unknown method: {method}")
    
    # ===================================================================
    # PHASE 3: 각 Split에 대해 Missing/Selection/Scaling 수행
    # ===================================================================

    
    if method == 'tvt':
            result = process_single_split(
        splits, 
        target_type=target_type,  
        top_n=40                 
        )
    else:
            result = [
        process_single_split(
            fold, 
            target_type=target_type,  
            top_n=40,
            fold_idx=i+1
        ) 
        for i, fold in enumerate(splits)
        ]
    return result


# ============================================================================
# Split 함수들 
# ============================================================================

def split_tvt_method(df, train_start_date, train_ratio=0.7, val_ratio=0.15):
    """TVT 분할 (결측치 처리 X, 단순 분할만)"""
    
    df_period = df[df['date'] >= train_start_date].copy()
    
    n = len(df_period)
    train_end = int(n * train_ratio)
    val_end = int(n * (train_ratio + val_ratio))
    
    train_df = df_period.iloc[:train_end].copy()
    val_df = df_period.iloc[train_end:val_end].copy()
    test_df = df_period.iloc[val_end:].copy()
    
    print(f"  Train: {len(train_df)} ({train_df['date'].min().date()} ~ {train_df['date'].max().date()})")
    print(f"  Val:   {len(val_df)} ({val_df['date'].min().date()} ~ {val_df['date'].max().date()})")
    print(f"  Test:  {len(test_df)} ({test_df['date'].min().date()} ~ {test_df['date'].max().date()})")
    
    return {'train': train_df, 'val': val_df, 'test': test_df}


def split_walk_forward_method(df, train_start_date, 
                              n_splits=None,
                              initial_train_size=600, 
                              val_size=60,      
                              test_size=60,
                              lookback=30):     
    """
    Walk-Forward 분할 (Anchored/Expanding Window)
    
    설정:
    - Initial Train: 600일
    - Val: 60일
    - Test: 60일
    - Step: 60일
    - n_splits: None이면 데이터 최대 활용하여 자동 계산
    
    Reference:
    - "Optimizing Forecast Accuracy in Cryptocurrency" (2025)
    - Anchored Window: Train이 매 Fold마다 확장
    """
    
    df_period = df[df['date'] >= train_start_date].copy()
    df_period = df_period.sort_values('date').reset_index(drop=True)
    
    step = 60
    
    if n_splits is None:
        total_data = len(df_period)
        min_required = initial_train_size + val_size + test_size
        remaining = total_data - min_required
        n_splits = (remaining // step) + 1
        print(f"Auto-calculated n_splits: {n_splits} (from {total_data} days)")
    
    folds = []
    
    print(f"\n{'='*80}")
    print(f"Walk-Forward Configuration")
    print(f"{'='*80}")
    print(f"Total data: {len(df_period)} days")
    print(f"Train={initial_train_size}d, Val={val_size}d, Test={test_size}d, Step={step}d")
    print(f"Lookback={lookback}d, Val sequences: {val_size - lookback}")
    print(f"Target folds: {n_splits}")
    print(f"{'='*80}\n")
    
    for fold_idx in range(n_splits):
        train_end_idx = initial_train_size + (fold_idx * step)
        val_start_idx = train_end_idx
        val_end_idx = val_start_idx + val_size
        test_start_idx = val_end_idx
        test_end_idx = test_start_idx + test_size
        
        if test_end_idx > len(df_period):
            print(f"Insufficient data: Fold {fold_idx+1} stopped (need {test_end_idx}, have {len(df_period)})")
            break
        
        train_fold = df_period.iloc[:train_end_idx].copy()
        val_fold = df_period.iloc[val_start_idx:val_end_idx].copy()
        test_fold = df_period.iloc[test_start_idx:test_end_idx].copy()
        
        assert train_fold['date'].max() < val_fold['date'].min(), "Train/Val overlap detected!"
        assert val_fold['date'].max() < test_fold['date'].min(), "Val/Test overlap detected!"
        
        print(f"Fold {fold_idx + 1:2d}:")
        print(f"  Train: {len(train_fold):4d}d  ({train_fold['date'].min().date()} ~ {train_fold['date'].max().date()})")
        print(f"  Val:   {len(val_fold):4d}d  ({val_fold['date'].min().date()} ~ {val_fold['date'].max().date()})")
        print(f"  Test:  {len(test_fold):4d}d  ({test_fold['date'].min().date()} ~ {test_fold['date'].max().date()})")
        
        folds.append({
            'train': train_fold,
            'val': val_fold,
            'test': test_fold,
            'fold_idx': fold_idx + 1
        })
    
    print(f"\n{'='*80}")
    print(f"Summary: {len(folds)} folds generated")
    print(f"Total test days: {len(folds) * test_size}")
    print(f"Test coverage: {folds[0]['test']['date'].min().date()} ~ {folds[-1]['test']['date'].max().date()}")
    print(f"Data utilization: {(test_end_idx/len(df_period)*100):.1f}%")
    print(f"{'='*80}\n")
    
    return folds



# ============================================================================
# 핵심: 각 Split 처리 
# ============================================================================

def process_single_split(split_data, target_type='direction', top_n=40, fold_idx=None):
    """
    개선된 전처리 파이프라인
    
    변경사항:
    1. GridSearchCV 제거 (feature selection 단계에서)
    2. 검증된 MI+RFE+RF 앙상블 사용
    3. Multi-target 지원
    """
    
    train_df = split_data['train']
    val_df = split_data['val']
    test_df = split_data['test']
    
    # ===== 1. 결측치 처리 =====
    
    train_processed, missing_stats = handle_missing_values_paper_based(
        train_df.copy(),
        train_start_date=train_df['date'].min(),
        is_train=True
    )
    
    val_processed = handle_missing_values_paper_based(
        val_df.copy(),
        train_start_date=val_df['date'].min(),
        is_train=False,
        train_stats=missing_stats
    )
    
    test_processed = handle_missing_values_paper_based(
        test_df.copy(),
        train_start_date=test_df['date'].min(),
        is_train=False,
        train_stats=missing_stats
    )
    
    target_cols = ['next_log_return', 'next_direction', 'next_close']
    
    train_processed = train_processed.dropna(subset=target_cols).reset_index(drop=True)
    val_processed = val_processed.dropna(subset=target_cols).reset_index(drop=True)
    test_processed = test_processed.dropna(subset=target_cols).reset_index(drop=True)

    
    feature_cols = [col for col in train_processed.columns 
                   if col not in target_cols + ['date']]
    
    X_train = train_processed[feature_cols]
    y_train = train_processed[target_cols]
    
    X_val = val_processed[feature_cols]
    y_val = val_processed[target_cols]
    
    X_test = test_processed[feature_cols]
    y_test = test_processed[target_cols]

    
    selected_features, selection_stats = select_features_multi_target(
        X_train, 
        y_train, 
        target_type=target_type, 
        top_n=top_n
    )
    
    X_train_sel = X_train[selected_features]
    X_val_sel = X_val[selected_features]
    X_test_sel = X_test[selected_features]
    
    robust_scaler = RobustScaler()
    standard_scaler = StandardScaler()
    
    X_train_robust = robust_scaler.fit_transform(X_train_sel)
    X_val_robust = robust_scaler.transform(X_val_sel)
    X_test_robust = robust_scaler.transform(X_test_sel)
    
    X_train_standard = standard_scaler.fit_transform(X_train_sel)
    X_val_standard = standard_scaler.transform(X_val_sel)
    X_test_standard = standard_scaler.transform(X_test_sel)
    
    # ===== 6. 결과 패키징 =====
    result = {
        'train': {
            'X_robust': X_train_robust,
            'X_standard': X_train_standard,
            'X_raw': X_train_sel,
            'y': y_train.reset_index(drop=True), 
            'dates': train_df['date'].reset_index(drop=True) 
        },
        'val': {
            'X_robust': X_val_robust,
            'X_standard': X_val_standard,
            'X_raw': X_val_sel,
            'y': y_val.reset_index(drop=True), 
            'dates': val_df['date'].reset_index(drop=True)  
        },
        'test': {
            'X_robust': X_test_robust,
            'X_standard': X_test_standard,
            'X_raw': X_test_sel,
            'y': y_test.reset_index(drop=True),  
            'dates': test_df['date'].reset_index(drop=True)  
        },
        'stats': {
            'robust_scaler': robust_scaler,
            'standard_scaler': standard_scaler,
            'selected_features': selected_features,
            'selection_stats': selection_stats,
            'target_type': target_type,
            'target_cols': target_cols
        }
    }
    
    return result



In [8]:
# ============================================================================
# 논문 기반 암호화폐 예측 모델 
# ============================================================================


import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error
from lightgbm import LGBMClassifier, LGBMRegressor
from xgboost import XGBClassifier, XGBRegressor
import tensorflow as tf
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import LSTM, GRU, Dense, Dropout, Bidirectional, Conv1D, MaxPooling1D, Flatten, Input, Concatenate
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import numpy as np
import pandas as pd
from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.svm import SVC, SVR
from sklearn.metrics import accuracy_score
from lightgbm import LGBMClassifier, LGBMRegressor, early_stopping
from xgboost import XGBClassifier, XGBRegressor
import tensorflow as tf
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import (
    LSTM, GRU, Dense, Dropout, Bidirectional, Conv1D, MaxPooling1D, 
    Flatten, Input, Concatenate, BatchNormalization, Attention, Add,
    MultiHeadAttention, LayerNormalization, GlobalAveragePooling1D,
    ConvLSTM2D, Reshape, TimeDistributed, RepeatVector
)
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.regularizers import l2


# ============================================================================
# DirectionModels (15개 모델)
# ============================================================================

class DirectionModels:
    """방향 예측 모델 (15개 논문 기반)"""
    
    @staticmethod
    def random_forest(X_train, y_train):
        """1. Random Forest - 과적합 방지"""
        model = RandomForestClassifier(
            n_estimators=200, max_depth=15, min_samples_split=10,
            min_samples_leaf=4, max_features='sqrt', random_state=42, n_jobs=-1
        )
        model.fit(X_train, y_train)
        return model
    
    @staticmethod
    def lightgbm(X_train, y_train, X_val, y_val):
        """
        2. LightGBM
        Reference: "Cryptocurrency Price Prediction" (IEEE 2024)
        """
        model = LGBMClassifier(
            n_estimators=200, max_depth=7, learning_rate=0.05, num_leaves=31,
            subsample=0.8, colsample_bytree=0.8, reg_alpha=0.1, reg_lambda=0.1,
            min_child_samples=20, random_state=42, verbose=-1
        )
        model.fit(X_train, y_train, eval_set=[(X_val, y_val)],
                 callbacks=[early_stopping(50, verbose=False)])
        return model
    
    @staticmethod
    def xgboost(X_train, y_train, X_val, y_val):
        """
        3. XGBoost
        Reference: "Cryptocurrency Value Prediction with Boosting" (2022)
        """
        model = XGBClassifier(
            n_estimators=200, max_depth=7, learning_rate=0.05,
            subsample=0.8, colsample_bytree=0.8, reg_alpha=0.1, reg_lambda=1.0,
            min_child_weight=3, gamma=0.1, random_state=42, eval_metric='logloss'
        )
        model.fit(X_train, y_train, eval_set=[(X_val, y_val)], verbose=False)
        return model
    
    @staticmethod
    def svm(X_train, y_train):
        """
        4. Support Vector Classifier
        Reference: "Support Vector Regression to Improve ETH Prediction" (2025)
        R²: 0.9985
        """
        model = SVC(
            kernel='rbf', C=100, gamma='scale', random_state=42, probability=True
        )
        model.fit(X_train, y_train)
        return model
    
    @staticmethod
    def lstm(X_train, y_train, X_val, y_val, input_shape):
        """
        5. LSTM
        Reference: "AI-based model for cryptocurrency prediction" (2025)
        R²: 97.44% (Ethereum)
        """
        model = Sequential([
            LSTM(128, activation='tanh', return_sequences=True, 
                 input_shape=input_shape, kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            LSTM(64, activation='tanh', kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            Dense(32, activation='relu', kernel_regularizer=l2(0.01)),
            Dropout(0.2),
            Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                     loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-7)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop, reduce_lr], verbose=0)
        return model
    
    @staticmethod
    def bilstm(X_train, y_train, X_val, y_val, input_shape):
        """
        6. Bidirectional LSTM
        Reference: "Predicting Bitcoin Prices Using Deep Learning" (2025)
        R²: 0.98, MSE: 0.001183
        """
        model = Sequential([
            Bidirectional(LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01)), 
                         input_shape=input_shape),
            BatchNormalization(),
            Dropout(0.3),
            Bidirectional(LSTM(64, kernel_regularizer=l2(0.01))),
            BatchNormalization(),
            Dropout(0.3),
            Dense(32, activation='relu', kernel_regularizer=l2(0.01)),
            Dropout(0.2),
            Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                     loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model
    
    @staticmethod
    def gru(X_train, y_train, X_val, y_val, input_shape):
        """
        7. GRU
        Reference: "Comparative Analysis of LSTM and GRU for ETH" (2025)
        RMSE: 0.0234, R²: 0.9442
        """
        model = Sequential([
            GRU(128, activation='tanh', return_sequences=True, 
                input_shape=input_shape, kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            GRU(64, activation='tanh', kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            Dense(32, activation='relu', kernel_regularizer=l2(0.01)),
            Dropout(0.2),
            Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                     loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model
    
    @staticmethod
    def stacked_lstm(X_train, y_train, X_val, y_val, input_shape):
        """
        8. Stacked LSTM (3 layers)
        Reference: "LSTM-Driven Cryptocurrency Forecasting" (2024)
        """
        model = Sequential([
            LSTM(128, return_sequences=True, input_shape=input_shape, kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            LSTM(96, return_sequences=True, kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            LSTM(64, kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            Dense(32, activation='relu', kernel_regularizer=l2(0.01)),
            Dropout(0.2),
            Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model
    
    @staticmethod
    def cnn_lstm(X_train, y_train, X_val, y_val, input_shape):
        """
        9. CNN-LSTM
        Reference: "Application of CNN-BiLSTM for ETH" (2025)
        MAPE: 2.8546%, R²: 0.9415
        """
        model = Sequential([
            Conv1D(64, 3, activation='relu', padding='same', 
                   input_shape=input_shape, kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            MaxPooling1D(2),
            Dropout(0.2),
            Conv1D(32, 3, activation='relu', padding='same', kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.2),
            LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            LSTM(64, kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            Dense(32, activation='relu', kernel_regularizer=l2(0.01)),
            Dropout(0.2),
            Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model
    
    @staticmethod
    def cnn_gru(X_train, y_train, X_val, y_val, input_shape):
        """
        10. CNN-GRU
        Reference: "Deep Learning Algorithms for Crypto" (ACM 2024)
        """
        model = Sequential([
            Conv1D(64, 3, activation='relu', padding='same', 
                   input_shape=input_shape, kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            MaxPooling1D(2),
            Dropout(0.2),
            Conv1D(32, 3, activation='relu', padding='same', kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.2),
            GRU(128, return_sequences=True, kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            GRU(64, kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            Dense(32, activation='relu', kernel_regularizer=l2(0.01)),
            Dropout(0.2),
            Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model
    
    @staticmethod
    def cnn_bilstm(X_train, y_train, X_val, y_val, input_shape):
        """
        11. CNN-BiLSTM
        Reference: "Application of CNN-BiLSTM for ETH" (2025)
        MAPE: 2.8546%, R²: 0.9415
        """
        model = Sequential([
            Conv1D(64, 3, activation='relu', padding='same', 
                   input_shape=input_shape, kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            MaxPooling1D(2),
            Dropout(0.2),
            Bidirectional(LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01))),
            BatchNormalization(),
            Dropout(0.3),
            Bidirectional(LSTM(64, kernel_regularizer=l2(0.01))),
            BatchNormalization(),
            Dropout(0.3),
            Dense(32, activation='relu', kernel_regularizer=l2(0.01)),
            Dropout(0.2),
            Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model
    
    @staticmethod
    def lstm_attention(X_train, y_train, X_val, y_val, input_shape):
        """
        12. LSTM with Self-Attention
        Reference: "Optimized EWT-Seq2Seq-LSTM with Attention" (2023)
        """
        inputs = Input(shape=input_shape)
        lstm_out = LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01))(inputs)
        lstm_out = BatchNormalization()(lstm_out)
        lstm_out = Dropout(0.3)(lstm_out)
        lstm_out = LSTM(64, return_sequences=True, kernel_regularizer=l2(0.01))(lstm_out)
        lstm_out = BatchNormalization()(lstm_out)
        lstm_out = Dropout(0.3)(lstm_out)
        attention = Attention()([lstm_out, lstm_out])
        combined = Add()([lstm_out, attention])
        pooled = GlobalAveragePooling1D()(combined)
        dense = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(pooled)
        dense = BatchNormalization()(dense)
        dense = Dropout(0.3)(dense)
        outputs = Dense(1, activation='sigmoid')(dense)
        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model
    
    @staticmethod
    def transformer(X_train, y_train, X_val, y_val, input_shape):
        """
        13. Transformer
        Reference: "Transformer-based approach for ETH" (2024)
        """
        inputs = Input(shape=input_shape)
        attn_output = MultiHeadAttention(num_heads=8, key_dim=64, dropout=0.1)(inputs, inputs)
        attn_output = Dropout(0.1)(attn_output)
        x = LayerNormalization(epsilon=1e-6)(inputs + attn_output)
        ff = Dense(128, activation='relu', kernel_regularizer=l2(0.01))(x)
        ff = Dropout(0.1)(ff)
        ff = Dense(input_shape[1], kernel_regularizer=l2(0.01))(ff)
        x = LayerNormalization(epsilon=1e-6)(x + ff)
        x = GlobalAveragePooling1D()(x)
        x = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)
        outputs = Dense(1, activation='sigmoid')(x)
        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=50, batch_size=32, callbacks=[early_stop], verbose=0)
        return model
    
    @staticmethod
    def tcn(X_train, y_train, X_val, y_val, input_shape):
        """
        14. Temporal Convolutional Network (TCN)
        Reference: "Utilising TCN for Cryptocurrency Forecasting" (2024)
        Superior to LSTM with lower complexity
        """
        inputs = Input(shape=input_shape)
        # TCN with dilated causal convolutions
        x = inputs
        for dilation_rate in [1, 2, 4, 8]:
            conv = Conv1D(64, 3, padding='causal', dilation_rate=dilation_rate,
                         activation='relu', kernel_regularizer=l2(0.01))(x)
            conv = BatchNormalization()(conv)
            conv = Dropout(0.2)(conv)
            x = Add()([x, conv]) if x.shape[-1] == 64 else conv
        
        x = GlobalAveragePooling1D()(x)
        x = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)
        outputs = Dense(1, activation='sigmoid')(x)
        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model
    
    @staticmethod
    def dtw_lstm(X_train, y_train, X_val, y_val, input_shape):
        """
        15. DTW-LSTM (Dynamic Time Warping + LSTM)
        Reference: "Application of DTW on ETH Prediction" (2024)
        23.4% better than baseline LSTM
        """
        # DTW-enhanced LSTM (simplified)
        model = Sequential([
            LSTM(128, return_sequences=True, input_shape=input_shape, 
                 kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            LSTM(96, return_sequences=True, kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            LSTM(64, kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            Dense(32, activation='relu', kernel_regularizer=l2(0.01)),
            Dropout(0.2),
            Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model

    @staticmethod
    def tabnet(X_train, y_train, X_val, y_val):
        """
        16. TabNet Classifier
        Reference: "TabNet: Attentive Interpretable Tabular Learning" (2019)
        암호화폐 예측에서 특징 중요도 해석 가능 (2024)
        """
        model = TabNetClassifier(
            n_d=64, n_a=64, n_steps=5,
            gamma=1.5, n_independent=2, n_shared=2,
            lambda_sparse=1e-4, momentum=0.3,
            mask_type='entmax', optimizer_fn=torch.optim.Adam,
            optimizer_params=dict(lr=2e-2),
            scheduler_params={"step_size": 50, "gamma": 0.9},
            scheduler_fn=torch.optim.lr_scheduler.StepLR,
            verbose=0, seed=42
        )
        model.fit(
            X_train, y_train,
            eval_set=[(X_val, y_val)],
            max_epochs=100, patience=20,
            batch_size=256, virtual_batch_size=128
        )
        return model
    
    @staticmethod
    def informer(X_train, y_train, X_val, y_val, input_shape):
        """
        17. Informer (Efficient Transformer)
        Reference: "Informer in Algorithmic Investment on Bitcoin" (2025)
        비트코인 고빈도 데이터에서 GMADL loss로 우수한 성과
        R²: 0.98+, 5분봉 데이터에서 buy-hold 전략 능가
        """
        inputs = Input(shape=input_shape)
        
        # ProbSparse Self-Attention (simplified)
        x = inputs
        for _ in range(2):
            # Multi-head attention with reduced complexity
            attn = MultiHeadAttention(num_heads=4, key_dim=32, dropout=0.1)(x, x)
            attn = Dropout(0.1)(attn)
            x = LayerNormalization(epsilon=1e-6)(x + attn)
            
            # Distilling layer (halving)
            x = Conv1D(input_shape[1], 1, activation='relu', 
                      kernel_regularizer=l2(0.01))(x)
            x = MaxPooling1D(2, padding='same')(x)
        
        # Decoder
        x = GlobalAveragePooling1D()(x)
        x = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)
        outputs = Dense(1, activation='sigmoid')(x)
        
        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model
    
    @staticmethod
    def nbeats(X_train, y_train, X_val, y_val, input_shape):
        """
        18. N-BEATS (Neural Basis Expansion Analysis)
        Reference: "Bitcoin Price Prediction Using N-BEATS" (2025)
        R²: 0.9998, MAE: 0.00240 - 비트코인 예측에서 LSTM 능가
        """
        inputs = Input(shape=input_shape)
        
        # Stack 1: Trend
        x1 = Flatten()(inputs)
        for _ in range(4):
            x1 = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x1)
            x1 = BatchNormalization()(x1)
            x1 = Dropout(0.2)(x1)
        trend_forecast = Dense(64, activation='linear', kernel_regularizer=l2(0.01))(x1)
        
        # Stack 2: Seasonality
        x2 = Flatten()(inputs)
        for _ in range(4):
            x2 = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x2)
            x2 = BatchNormalization()(x2)
            x2 = Dropout(0.2)(x2)
        season_forecast = Dense(64, activation='linear', kernel_regularizer=l2(0.01))(x2)
        
        # Ensemble forecasts
        combined = Add()([trend_forecast, season_forecast])
        combined = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(combined)
        combined = Dropout(0.3)(combined)
        outputs = Dense(1, activation='sigmoid')(combined)
        
        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model
    
    @staticmethod
    def temporal_fusion_transformer(X_train, y_train, X_val, y_val, input_shape):
        """
        19. Temporal Fusion Transformer (TFT)
        Reference: "Adaptive TFT for Cryptocurrency Prediction" (2025)
        ETH-USDT 10분봉에서 LSTM 대비 큰 성능 향상
        """
        inputs = Input(shape=input_shape)
        
        # Variable Selection Network
        x = Flatten()(inputs)
        var_weights = Dense(input_shape[0] * input_shape[1], activation='softmax',
                           kernel_regularizer=l2(0.01))(x)
        var_weights = Reshape(input_shape)(var_weights)
        selected = Multiply()([inputs, var_weights])
        
        # LSTM processing
        x = LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01))(selected)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)
        
        # Multi-head attention
        attn = MultiHeadAttention(num_heads=4, key_dim=32, dropout=0.1)(x, x)
        x = Add()([x, attn])
        x = LayerNormalization(epsilon=1e-6)(x)
        
        # Output
        x = GlobalAveragePooling1D()(x)
        x = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = Dropout(0.3)(x)
        outputs = Dense(1, activation='sigmoid')(x)
        
        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model

    @staticmethod
    def performer(X_train, y_train, X_val, y_val, input_shape):
        inputs = Input(shape=input_shape)

        # Projection layer to match dimensions
        x = Dense(128, kernel_regularizer=l2(0.01))(inputs)  
        x = LayerNormalization(epsilon=1e-6)(x)

        # Performer blocks
        for _ in range(2):
            attn = MultiHeadAttention(num_heads=4, key_dim=32, dropout=0.1)(x, x)
            attn = Dropout(0.1)(attn)
            x = LayerNormalization(epsilon=1e-6)(x + attn) 

            ff = Dense(256, activation='gelu', kernel_regularizer=l2(0.01))(x)
            ff = Dropout(0.1)(ff)
            ff = Dense(128, kernel_regularizer=l2(0.01))(ff)
            x = LayerNormalization(epsilon=1e-6)(x + ff)
        
        x = GlobalAveragePooling1D()(x)
        x = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)
        outputs = Dense(1, activation='sigmoid')(x)
        
        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model
    
    @staticmethod
    def patchtst(X_train, y_train, X_val, y_val, input_shape, patch_len=16, stride=8):
        """
        21. PatchTST (Patch Time Series Transformer)
        Reference: "Neural Foundations of Crypto Predictions" (2024)

        """
        inputs = Input(shape=input_shape)
        
        # Patching: 시계열을 패치로 분할
        x = inputs
        num_patches = (input_shape[0] - patch_len) // stride + 1
        
        # Patch embedding
        patches = []
        for i in range(0, input_shape[0] - patch_len + 1, stride):
            patch = Lambda(lambda z: z[:, i:i+patch_len, :])(x)
            patch = Flatten()(patch)
            patch = Dense(128, kernel_regularizer=l2(0.01))(patch)
            patches.append(patch)
        
        if len(patches) > 1:
            x = tf.stack(patches, axis=1)
        else:
            x = tf.expand_dims(patches[0], axis=1)
        
        # Transformer encoder with channel independence
        for _ in range(2):
            attn = MultiHeadAttention(num_heads=4, key_dim=32, dropout=0.1)(x, x)
            attn = Dropout(0.1)(attn)
            x = LayerNormalization(epsilon=1e-6)(x + attn)
            
            ff = Dense(256, activation='gelu', kernel_regularizer=l2(0.01))(x)
            ff = Dropout(0.1)(ff)
            ff = Dense(128, kernel_regularizer=l2(0.01))(ff)
            x = LayerNormalization(epsilon=1e-6)(x + ff)
        
        # Prediction head
        x = GlobalAveragePooling1D()(x)
        x = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)
        outputs = Dense(1, activation='sigmoid')(x)
        
        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model
    
    @staticmethod
    def autoformer(X_train, y_train, X_val, y_val, input_shape):
        """
        22. Autoformer (Decomposition Transformer with Auto-Correlation)
        Reference: "Autoformer: Decomposition Transformers" (2021)

        """
        inputs = Input(shape=input_shape)
        
        # Series Decomposition
        x = inputs
        # Moving average decomposition (trend)
        trend = tf.keras.layers.AveragePooling1D(pool_size=25, strides=1, 
                                                  padding='same')(x)
        seasonal = tf.subtract(x, trend)
        
        # Auto-Correlation mechanism on seasonal component
        x = seasonal
        for _ in range(2):
            # Auto-correlation (simplified with correlation-based attention)
            attn = MultiHeadAttention(num_heads=4, key_dim=32, dropout=0.1)(x, x)
            attn = Dropout(0.1)(attn)
            x = LayerNormalization(epsilon=1e-6)(x + attn)
            
            # Feed-forward with decomposition
            ff = Dense(128, activation='gelu', kernel_regularizer=l2(0.01))(x)
            ff = Dropout(0.1)(ff)
            ff = Dense(input_shape[1], kernel_regularizer=l2(0.01))(ff)
            x = LayerNormalization(epsilon=1e-6)(x + ff)
        
        # Combine seasonal and trend
        seasonal_out = GlobalAveragePooling1D()(x)
        trend_out = GlobalAveragePooling1D()(trend)
        combined = Concatenate()([seasonal_out, trend_out])
        
        combined = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(combined)
        combined = BatchNormalization()(combined)
        combined = Dropout(0.3)(combined)
        outputs = Dense(1, activation='sigmoid')(combined)
        
        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model
    
    @staticmethod
    def itransformer(X_train, y_train, X_val, y_val, input_shape):
        """
        23. iTransformer (Inverted Transformer)
        Reference: "Neural Foundations of Crypto Predictions" (2024)

        """
        inputs = Input(shape=input_shape)
        
        # Inverted: 변수를 토큰으로, 시간을 임베딩 차원으로
        # (batch, time, features) -> (batch, features, time)
        x = tf.transpose(inputs, perm=[0, 2, 1])
        
        # Embed each variate (now treated as tokens)
        x = Dense(64, kernel_regularizer=l2(0.01))(x)
        x = LayerNormalization(epsilon=1e-6)(x)
        
        # Transformer blocks on variate dimension
        for _ in range(2):
            attn = MultiHeadAttention(num_heads=4, key_dim=16, dropout=0.1)(x, x)
            attn = Dropout(0.1)(attn)
            x = LayerNormalization(epsilon=1e-6)(x + attn)
            
            ff = Dense(128, activation='gelu', kernel_regularizer=l2(0.01))(x)
            ff = Dropout(0.1)(ff)
            ff = Dense(64, kernel_regularizer=l2(0.01))(ff)
            x = LayerNormalization(epsilon=1e-6)(x + ff)
        
        # Aggregate
        x = GlobalAveragePooling1D()(x)
        x = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)
        outputs = Dense(1, activation='sigmoid')(x)
        
        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model
    
    @staticmethod
    def ethervoyant(X_train, y_train, X_val, y_val, input_shape):
        """
        24. EtherVoyant (Specialized Ethereum Forecasting Model)
        Reference: "Empowering Global Ethereum Price Prediction" (2024)

        """
        inputs = Input(shape=input_shape)
        
        # Multi-scale CNN feature extraction
        conv1 = Conv1D(64, 3, activation='relu', padding='same', 
                      kernel_regularizer=l2(0.01))(inputs)
        conv1 = BatchNormalization()(conv1)
        conv1 = Dropout(0.2)(conv1)
        
        conv2 = Conv1D(64, 5, activation='relu', padding='same', 
                      kernel_regularizer=l2(0.01))(inputs)
        conv2 = BatchNormalization()(conv2)
        conv2 = Dropout(0.2)(conv2)
        
        # Concatenate multi-scale features
        x = Concatenate()([conv1, conv2])
        x = MaxPooling1D(2)(x)
        
        # Bidirectional LSTM for temporal dependencies
        x = Bidirectional(LSTM(128, return_sequences=True, 
                              kernel_regularizer=l2(0.01)))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)
        
        # Multi-head self-attention
        attn = MultiHeadAttention(num_heads=4, key_dim=32, dropout=0.1)(x, x)
        x = Add()([x, attn])
        x = LayerNormalization(epsilon=1e-6)(x)
        
        # Final LSTM
        x = Bidirectional(LSTM(64, kernel_regularizer=l2(0.01)))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)
        
        # Output
        x = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = Dropout(0.3)(x)
        outputs = Dense(1, activation='sigmoid')(x)
        
        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-7)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop, reduce_lr], verbose=0)
        return model
    
    @staticmethod
    def vmd_hybrid(X_train, y_train, X_val, y_val, input_shape):
        """
        25. VMD-PatchTST Hybrid (Variational Mode Decomposition)
        Reference: "Enhanced Forecasting with VMD-PatchTST" (2024)

        """
        # 1. 입력을 32차원으로 먼저 투영 
        x = Conv1D(32, 1, padding='same', kernel_regularizer=l2(0.01))(inputs)
        x = BatchNormalization()(x)

        # 2. 주파수 대역 분해
        low_freq = AveragePooling1D(pool_size=5, strides=1, padding='same')(x)
        low_freq = Conv1D(32, 3, activation='relu', padding='same',
                         kernel_regularizer=l2(0.01))(low_freq)

        # Medium frequency 
        mid_freq = x - low_freq
        mid_freq = Conv1D(32, 3, activation='relu', padding='same',
                         kernel_regularizer=l2(0.01))(mid_freq)

        # High frequency 
        high_freq = x - low_freq - mid_freq
        high_freq = Conv1D(32, 3, activation='relu', padding='same',
                          kernel_regularizer=l2(0.01))(high_freq)

        # 3. 모든 주파수 대역 결합
        x = Concatenate()([low_freq, mid_freq, high_freq])  # (batch, time, 96)
        x = BatchNormalization()(x)
        x = Dropout(0.2)(x)

        # 4. PatchTST-style Transformer 처리
        for _ in range(2):
            attn = MultiHeadAttention(num_heads=4, key_dim=32, dropout=0.1)(x, x)
            attn = Dropout(0.1)(attn)
            x = LayerNormalization(epsilon=1e-6)(x + attn)

            ff = Dense(128, activation='gelu', kernel_regularizer=l2(0.01))(x)
            ff = Dropout(0.1)(ff)
            ff = Dense(96, kernel_regularizer=l2(0.01))(ff)
            x = LayerNormalization(epsilon=1e-6)(x + ff)

        # 5. 출력 레이어
        x = GlobalAveragePooling1D()(x)
        x = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)
        outputs = Dense(1, activation='sigmoid')(x)

        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='binary_crossentropy', 
                     metrics=['accuracy'])

        early_stop = EarlyStopping(monitor='val_loss', patience=20, 
                                  restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)

        return model

    @staticmethod
    def logistic_regression(X_train, y_train):
        """
        26. Logistic Regression
        Reference: "Forecasting mid-price movement of Bitcoin futures" (2021)
        """
        model = LogisticRegression(
            C=1.0, penalty='l2', solver='lbfgs', 
            max_iter=1000, random_state=42, n_jobs=-1
        )
        model.fit(X_train, y_train)
        return model

    @staticmethod
    def naive_bayes(X_train, y_train):
        """
        27. Gaussian Naive Bayes
        Reference: "Forecasting mid-price movement of Bitcoin futures" (2021)
        """
        model = GaussianNB()
        model.fit(X_train, y_train)
        return model

    @staticmethod
    def knn(X_train, y_train):
        """
        28. K-Nearest Neighbors
        Reference: "Forecasting mid-price movement of Bitcoin futures" (2021)
        """
        model = KNeighborsClassifier(
            n_neighbors=7, weights='distance', 
            metric='minkowski', p=2, n_jobs=-1
        )
        model.fit(X_train, y_train)
        return model

    @staticmethod
    def adaboost(X_train, y_train):
        """
        29. AdaBoost Classifier
        Reference: "Comparative Analysis of Ensemble-Based Models" (2025)
        """
        model = AdaBoostClassifier(
            n_estimators=200, learning_rate=0.5, 
            algorithm='SAMME', random_state=42
        )
        model.fit(X_train, y_train)
        return model

    @staticmethod
    def catboost(X_train, y_train, X_val, y_val):
        """
        30. CatBoost Classifier
        Reference: "TRX Cryptocurrency Prediction" (2023)
        """
        model = CatBoostClassifier(
            iterations=200, learning_rate=0.1, depth=7,
            l2_leaf_reg=3, border_count=128, 
            random_seed=42, verbose=False
        )
        model.fit(X_train, y_train, eval_set=(X_val, y_val), 
                 early_stopping_rounds=50, verbose=False)
        return model

    @staticmethod
    def decision_tree(X_train, y_train):
        """
        31. Decision Tree Classifier
        Reference: "Comparative Analysis of Ensemble-Based Models" (2025)
        """
        model = DecisionTreeClassifier(
            max_depth=15, min_samples_split=10, 
            min_samples_leaf=4, random_state=42
        )
        model.fit(X_train, y_train)
        return model

    @staticmethod
    def extra_trees(X_train, y_train):
        """
        32. Extra Trees Classifier
        Reference: "Enhancing financial product forecasting" (2025)
        """
        model = ExtraTreesClassifier(
            n_estimators=200, max_depth=15, 
            min_samples_split=10, min_samples_leaf=4,
            max_features='sqrt', random_state=42, n_jobs=-1
        )
        model.fit(X_train, y_train)
        return model

    @staticmethod
    def bagging(X_train, y_train):
        """
        33. Bagging Classifier
        Reference: "Enhancing financial product forecasting" (2025)
        """
        base_estimator = DecisionTreeClassifier(max_depth=10, random_state=42)
        model = BaggingClassifier(
            estimator=base_estimator, n_estimators=100, 
            max_samples=0.8, max_features=0.8, 
            random_state=42, n_jobs=-1
        )
        model.fit(X_train, y_train)
        return model

    @staticmethod
    def gradient_boosting(X_train, y_train):
        """
        34. Gradient Boosting Classifier
        Reference: "Comparative Analysis of Ensemble-Based Models" (2025)
        """
        model = GradientBoostingClassifier(
            n_estimators=200, learning_rate=0.1, 
            max_depth=7, subsample=0.8, 
            min_samples_split=10, random_state=42
        )
        model.fit(X_train, y_train)
        return model

    @staticmethod
    def simple_rnn(X_train, y_train, X_val, y_val, input_shape):
        """
        35. Simple RNN
        Reference: "Utilizing RNN for Real-time Cryptocurrency" (2024)
        """
        model = Sequential([
            SimpleRNN(128, activation='tanh', return_sequences=True,
                     input_shape=input_shape, kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            SimpleRNN(64, activation='tanh', kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            Dense(32, activation='relu', kernel_regularizer=l2(0.01)),
            Dropout(0.2),
            Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model

    @staticmethod
    def mlp(X_train, y_train, X_val, y_val):
        """
        36. Multi-Layer Perceptron
        Reference: "Deep neural networks for cryptocurrencies" (2019)
        Reference: "Comparative Analysis Ensemble-Based Models" (2025)
        """
        input_dim=X_train.shape[1]
        model = Sequential([
            Dense(256, activation='relu', input_dim=input_dim, kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            Dense(128, activation='relu', kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            Dense(64, activation='relu', kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            Dense(32, activation='relu', kernel_regularizer=l2(0.01)),
            Dropout(0.2),
            Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-7)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop, reduce_lr], verbose=0)
        return model

    @staticmethod
    def emd_lstm(X_train, y_train, X_val, y_val, input_shape):
        """
        37. EMD-LSTM (Empirical Mode Decomposition + LSTM)
        Reference: "EMD-LSTM for Cryptocurrency Price Forecasting" (2025)
        """
        inputs = Input(shape=input_shape)

        # Simulate EMD by multi-scale feature extraction
        # Low frequency (trend)
        low_freq = tf.keras.layers.AveragePooling1D(pool_size=5, strides=1, padding='same')(inputs)
        low_freq = LSTM(64, return_sequences=True, kernel_regularizer=l2(0.01))(low_freq)

        # High frequency (detail)
        high_freq = inputs - tf.keras.layers.AveragePooling1D(pool_size=5, strides=1, padding='same')(inputs)
        high_freq = LSTM(64, return_sequences=True, kernel_regularizer=l2(0.01))(high_freq)

        # Combine decomposed features
        x = Concatenate()([low_freq, high_freq])
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)

        x = LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)

        x = LSTM(64, kernel_regularizer=l2(0.01))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)

        x = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = Dropout(0.2)(x)
        outputs = Dense(1, activation='sigmoid')(x)

        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model

    @staticmethod
    def hybrid_lstm_gru(X_train, y_train, X_val, y_val, input_shape):
        """
        38. Hybrid LSTM-GRU
        Reference: "Cryptocurrency price prediction through integrated forecasting" (2024)
        Reference: "Development of cryptocurrency price prediction model" (2025)
        """
        model = Sequential([
            LSTM(128, return_sequences=True, input_shape=input_shape, kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            GRU(96, return_sequences=True, kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            LSTM(64, return_sequences=True, kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            GRU(32, kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            Dense(32, activation='relu', kernel_regularizer=l2(0.01)),
            Dropout(0.2),
            Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model

    @staticmethod
    def parallel_cnn(X_train, y_train, X_val, y_val, input_shape):
        """
        39. Parallel CNN
        Reference: "Time series prediction for cryptocurrency with transformer and parallel CNN" (2025)
        """
        inputs = Input(shape=input_shape)

        # Branch 1: Small kernel
        conv1 = Conv1D(64, 3, activation='relu', padding='same', kernel_regularizer=l2(0.01))(inputs)
        conv1 = BatchNormalization()(conv1)
        conv1 = MaxPooling1D(2)(conv1)
        conv1 = Dropout(0.2)(conv1)

        # Branch 2: Medium kernel
        conv2 = Conv1D(64, 5, activation='relu', padding='same', kernel_regularizer=l2(0.01))(inputs)
        conv2 = BatchNormalization()(conv2)
        conv2 = MaxPooling1D(2)(conv2)
        conv2 = Dropout(0.2)(conv2)

        # Branch 3: Large kernel
        conv3 = Conv1D(64, 7, activation='relu', padding='same', kernel_regularizer=l2(0.01))(inputs)
        conv3 = BatchNormalization()(conv3)
        conv3 = MaxPooling1D(2)(conv3)
        conv3 = Dropout(0.2)(conv3)

        # Merge branches
        x = Concatenate()([conv1, conv2, conv3])
        x = Conv1D(128, 3, activation='relu', padding='same', kernel_regularizer=l2(0.01))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)

        x = GlobalAveragePooling1D()(x)
        x = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)
        outputs = Dense(1, activation='sigmoid')(x)

        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model

    @staticmethod
    def stacking_ensemble(X_train, y_train, X_val, y_val):
        """
        40. Stacking Ensemble Classifier
        Reference: "Stacking Ensemble Deep Learning for Bitcoin" (2022)
        Reference: "Comparative Analysis of Ensemble-Based Models" (2025)
        In thesis, Accuracy: 81.80%, F1: 81.49%, AUC: 88.43%
        """
        # Base learners
        base_learners = [
            ('rf', RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42, n_jobs=-1)),
            ('xgb', XGBClassifier(n_estimators=100, max_depth=5, learning_rate=0.1, random_state=42)),
            ('lgbm', LGBMClassifier(n_estimators=100, max_depth=5, learning_rate=0.1, random_state=42, verbose=-1))
        ]

        # Meta learner
        meta_learner = LogisticRegression(max_iter=1000, random_state=42)

        model = StackingClassifier(
            estimators=base_learners, 
            final_estimator=meta_learner, 
            cv=5, n_jobs=-1
        )
        model.fit(X_train, y_train)
        return model

    @staticmethod
    def voting_hard(X_train, y_train):
        """
        41. Voting Classifier (Hard Voting)
        Reference: "Detecting Anomalies in Blockchain" (2024)
        Accuracy: 97%, 다수결 투표 방식
        """
        estimators = [
            ('rf', RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)),
            ('xgb', XGBClassifier(n_estimators=100, random_state=42)),
            ('lgbm', LGBMClassifier(n_estimators=100, random_state=42, verbose=-1)),
            ('svm', SVC(kernel='rbf', C=1.0, random_state=42, probability=True))
        ]

        model = VotingClassifier(estimators=estimators, voting='hard', n_jobs=-1)
        model.fit(X_train, y_train)
        return model

    @staticmethod
    def voting_soft(X_train, y_train):
        """
        42. Voting Classifier (Soft Voting)
        Reference: "Enhancing blockchain transaction classification" (2025)
        확률 기반 소프트 투표 방식
        """
        estimators = [
            ('rf', RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)),
            ('xgb', XGBClassifier(n_estimators=100, random_state=42)),
            ('lgbm', LGBMClassifier(n_estimators=100, random_state=42, verbose=-1)),
            ('lr', LogisticRegression(max_iter=1000, random_state=42))
        ]

        model = VotingClassifier(estimators=estimators, voting='soft', n_jobs=-1)
        model.fit(X_train, y_train)
        return model

    @staticmethod
    def lstm_xgboost_hybrid(X_train, y_train, X_val, y_val, input_shape):
        """
        43. LSTM + XGBoost Hybrid
        Reference: "CRYPTO PRICE PREDICTION USING LSTM+XGBOOST" (2025)
        """
        # Stage 1: LSTM feature extraction
        inputs = Input(shape=input_shape)
        x = LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01))(inputs)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)
        x = LSTM(64, return_sequences=False, kernel_regularizer=l2(0.01))(x)
        x = BatchNormalization()(x)
        lstm_features = Dropout(0.3)(x)

        # Dense layers for feature transformation
        x = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(lstm_features)
        x = Dropout(0.2)(x)
        outputs = Dense(1, activation='sigmoid')(x)

        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model

    @staticmethod
    def residual_lstm(X_train, y_train, X_val, y_val, input_shape):
        """
        44. Residual LSTM
        Reference: "Deep Learning for Cryptocurrency" (2024)
        """
        inputs = Input(shape=input_shape)

        # First LSTM block with residual
        x = LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01))(inputs)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)

        # Second LSTM block with residual
        lstm_out = LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01))(x)
        lstm_out = BatchNormalization()(lstm_out)
        x = Add()([x, lstm_out])  # Residual connection
        x = Dropout(0.3)(x)

        # Final LSTM
        x = LSTM(64, kernel_regularizer=l2(0.01))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)

        x = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = Dropout(0.2)(x)
        outputs = Dense(1, activation='sigmoid')(x)

        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model

    @staticmethod
    def wavenet(X_train, y_train, X_val, y_val, input_shape):
        """
        45. WaveNet-style Dilated Causal CNN
        Reference: "Advanced Time Series Models for Crypto" (2024)
        """
        inputs = Input(shape=input_shape)

        x = inputs
        skip_connections = []

        # Dilated causal convolutions
        for dilation_rate in [1, 2, 4, 8, 16, 32]:
            # Dilated conv
            conv = Conv1D(64, 2, padding='causal', dilation_rate=dilation_rate,
                         activation='relu', kernel_regularizer=l2(0.01))(x)
            conv = BatchNormalization()(conv)
            conv = Dropout(0.2)(conv)

            # Skip connection
            skip = Conv1D(64, 1, kernel_regularizer=l2(0.01))(conv)
            skip_connections.append(skip)

            # Residual connection
            res = Conv1D(64, 1, kernel_regularizer=l2(0.01))(conv)
            if x.shape[-1] != 64:
                x = Conv1D(64, 1, kernel_regularizer=l2(0.01))(x)
            x = Add()([x, res])

        # Aggregate skip connections
        x = Add()(skip_connections)
        x = Activation('relu')(x)
        x = GlobalAveragePooling1D()(x)

        x = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)
        outputs = Dense(1, activation='sigmoid')(x)

        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model

    
# ============================================================================
# RegressionModels (15개 모델)
# ============================================================================

class RegressionModels:
    """회귀 모델 (15개 논문 기반)"""
    
    @staticmethod
    def random_forest_reg(X_train, y_train):
        """1. Random Forest Regressor"""
        model = RandomForestRegressor(
            n_estimators=200, max_depth=15, min_samples_split=10,
            min_samples_leaf=4, max_features='sqrt', random_state=42, n_jobs=-1
        )
        model.fit(X_train, y_train)
        return model
    
    @staticmethod
    def lightgbm_reg(X_train, y_train, X_val, y_val):
        """
        2. LightGBM Regressor
        Reference: "Beyond Conventional Methods" (2025)
        """
        model = LGBMRegressor(
            n_estimators=200, max_depth=7, learning_rate=0.05, num_leaves=31,
            subsample=0.8, colsample_bytree=0.8, reg_alpha=0.1, reg_lambda=0.1,
            min_child_samples=20, random_state=42, verbose=-1
        )
        model.fit(X_train, y_train, eval_set=[(X_val, y_val)],
                 callbacks=[early_stopping(50, verbose=False)])
        return model
    
    @staticmethod
    def xgboost_reg(X_train, y_train, X_val, y_val):
        """
        3. XGBoost Regressor
        Reference: "Beyond Conventional Methods" (2025)
        """
        model = XGBRegressor(
            n_estimators=200, max_depth=7, learning_rate=0.05,
            subsample=0.8, colsample_bytree=0.8, reg_alpha=0.1, reg_lambda=1.0,
            min_child_weight=3, gamma=0.1, random_state=42
        )
        model.fit(X_train, y_train, eval_set=[(X_val, y_val)], verbose=False)
        return model
    
    @staticmethod
    def svr(X_train, y_train):
        """
        4. Support Vector Regressor
        Reference: "SVR to Improve ETH Prediction" (2025)
        R²: 0.9985, MSE: 2137.97
        """
        model = SVR(kernel='linear', C=100, epsilon=1, gamma='scale')
        model.fit(X_train, y_train)
        return model
    
    @staticmethod
    def lstm_reg(X_train, y_train, X_val, y_val, input_shape):
        """
        5. LSTM Regressor
        Reference: "AI-based model" (2025) - R²: 97.44%
        """
        model = Sequential([
            LSTM(128, activation='tanh', return_sequences=True, 
                 input_shape=input_shape, kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            LSTM(64, activation='tanh', kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            Dense(32, activation='relu', kernel_regularizer=l2(0.01)),
            Dropout(0.2),
            Dense(1)
        ])
        model.compile(optimizer='adam', loss='mse', metrics=['mae'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model
    
    @staticmethod
    def bilstm_reg(X_train, y_train, X_val, y_val, input_shape):
        """
        6. BiLSTM Regressor
        Reference: "Predicting Bitcoin" (2025) - R²: 0.98
        """
        model = Sequential([
            Bidirectional(LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01)), 
                         input_shape=input_shape),
            BatchNormalization(),
            Dropout(0.3),
            Bidirectional(LSTM(64, kernel_regularizer=l2(0.01))),
            BatchNormalization(),
            Dropout(0.3),
            Dense(32, activation='relu', kernel_regularizer=l2(0.01)),
            Dropout(0.2),
            Dense(1)
        ])
        model.compile(optimizer='adam', loss='mse', metrics=['mae'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model
    
    @staticmethod
    def gru(X_train, y_train, X_val, y_val, input_shape):
        """
        7. GRU Regressor
        Reference: "Comparative Analysis" (2025) - RMSE: 0.0234
        """
        model = Sequential([
            GRU(128, activation='tanh', return_sequences=True, 
                input_shape=input_shape, kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            GRU(64, activation='tanh', kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            Dense(32, activation='relu', kernel_regularizer=l2(0.01)),
            Dropout(0.2),
            Dense(1)
        ])
        model.compile(optimizer='adam', loss='mse', metrics=['mae'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model
    
    @staticmethod
    def stacked_lstm_reg(X_train, y_train, X_val, y_val, input_shape):
        """8. Stacked LSTM (3 layers)"""
        model = Sequential([
            LSTM(128, return_sequences=True, input_shape=input_shape, kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            LSTM(96, return_sequences=True, kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            LSTM(64, kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            Dense(32, activation='relu', kernel_regularizer=l2(0.01)),
            Dropout(0.2),
            Dense(1)
        ])
        model.compile(optimizer='adam', loss='mse', metrics=['mae'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model
    
    @staticmethod
    def cnn_lstm(X_train, y_train, X_val, y_val, input_shape):
        """
        9. CNN-LSTM
        Reference: "Application of CNN-BiLSTM" (2025)
        """
        model = Sequential([
            Conv1D(64, 3, activation='relu', padding='same', 
                   input_shape=input_shape, kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            MaxPooling1D(2),
            Dropout(0.2),
            Conv1D(32, 3, activation='relu', padding='same', kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.2),
            LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            LSTM(64, kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            Dense(32, activation='relu', kernel_regularizer=l2(0.01)),
            Dropout(0.2),
            Dense(1)
        ])
        model.compile(optimizer='adam', loss='mse', metrics=['mae'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model
    
    @staticmethod
    def cnn_gru(X_train, y_train, X_val, y_val, input_shape):
        """10. CNN-GRU"""
        model = Sequential([
            Conv1D(64, 3, activation='relu', padding='same', 
                   input_shape=input_shape, kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            MaxPooling1D(2),
            Dropout(0.2),
            GRU(128, return_sequences=True, kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            GRU(64, kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            Dropout(0.3),
            Dense(32, activation='relu', kernel_regularizer=l2(0.01)),
            Dropout(0.2),
            Dense(1)
        ])
        model.compile(optimizer='adam', loss='mse', metrics=['mae'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model
    
    @staticmethod
    def cnn_bilstm(X_train, y_train, X_val, y_val, input_shape):
        """
        11. CNN-BiLSTM
        Reference: "Application of CNN-BiLSTM" (2025)
        MAPE: 2.8546%, R²: 0.9415
        """
        model = Sequential([
            Conv1D(64, 3, activation='relu', padding='same', 
                   input_shape=input_shape, kernel_regularizer=l2(0.01)),
            BatchNormalization(),
            MaxPooling1D(2),
            Dropout(0.2),
            Bidirectional(LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01))),
            BatchNormalization(),
            Dropout(0.3),
            Bidirectional(LSTM(64, kernel_regularizer=l2(0.01))),
            BatchNormalization(),
            Dropout(0.3),
            Dense(32, activation='relu', kernel_regularizer=l2(0.01)),
            Dropout(0.2),
            Dense(1)
        ])
        model.compile(optimizer='adam', loss='mse', metrics=['mae'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model
    
    @staticmethod
    def seq2seq(X_train, y_train, X_val, y_val, input_shape):
        """
        12. Seq2Seq (Encoder-Decoder)
        Reference: "Bitcoin price prediction using LSTM autoencoder" (2024)
        """
        # Encoder
        encoder_inputs = Input(shape=input_shape)
        encoder = LSTM(128, return_state=True, kernel_regularizer=l2(0.01))
        encoder_outputs, state_h, state_c = encoder(encoder_inputs)
        encoder_states = [state_h, state_c]
        
        # Decoder
        decoder_inputs = RepeatVector(1)(encoder_outputs)
        decoder_lstm = LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01))
        decoder_outputs = decoder_lstm(decoder_inputs, initial_state=encoder_states)
        decoder_outputs = Dropout(0.3)(decoder_outputs)
        decoder_dense = TimeDistributed(Dense(64, activation='relu', kernel_regularizer=l2(0.01)))
        decoder_outputs = decoder_dense(decoder_outputs)
        decoder_outputs = Flatten()(decoder_outputs)
        outputs = Dense(1)(decoder_outputs)
        
        model = Model(encoder_inputs, outputs)
        model.compile(optimizer='adam', loss='mse', metrics=['mae'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model
    
    @staticmethod
    def wavenet(X_train, y_train, X_val, y_val, input_shape):
        """
        13. WaveNet
        Reference: "Bitcoin price prediction using WaveNets" (2019)
        """
        inputs = Input(shape=input_shape)
        x = inputs
        # WaveNet-style dilated convolutions
        for dilation_rate in [1, 2, 4, 8, 16]:
            tanh_out = Conv1D(32, 2, padding='causal', dilation_rate=dilation_rate,
                             activation='tanh', kernel_regularizer=l2(0.01))(x)
            sigmoid_out = Conv1D(32, 2, padding='causal', dilation_rate=dilation_rate,
                                activation='sigmoid', kernel_regularizer=l2(0.01))(x)
            z = tf.keras.layers.Multiply()([tanh_out, sigmoid_out])
            z = Conv1D(32, 1, kernel_regularizer=l2(0.01))(z)
            x = Add()([x, z]) if x.shape[-1] == 32 else z
        
        x = GlobalAveragePooling1D()(x)
        x = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = Dropout(0.3)(x)
        outputs = Dense(1)(x)
        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='mse', metrics=['mae'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model
    
    @staticmethod
    def tcn_reg(X_train, y_train, X_val, y_val, input_shape):
        """
        14. Temporal Convolutional Network
        Reference: "Utilising TCN for Cryptocurrency" (2024)
        """
        inputs = Input(shape=input_shape)
        x = inputs
        for dilation_rate in [1, 2, 4, 8]:
            conv = Conv1D(64, 3, padding='causal', dilation_rate=dilation_rate,
                         activation='relu', kernel_regularizer=l2(0.01))(x)
            conv = BatchNormalization()(conv)
            conv = Dropout(0.2)(conv)
            x = Add()([x, conv]) if x.shape[-1] == 64 else conv
        
        x = GlobalAveragePooling1D()(x)
        x = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = Dropout(0.3)(x)
        outputs = Dense(1)(x)
        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='mse', metrics=['mae'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model
    
    @staticmethod
    def transformer_reg(X_train, y_train, X_val, y_val, input_shape):
        """
        15. Transformer Regressor
        Reference: "Transformer-based approach for ETH" (2024)
        """
        inputs = Input(shape=input_shape)
        attn_output = MultiHeadAttention(num_heads=8, key_dim=64, dropout=0.1)(inputs, inputs)
        attn_output = Dropout(0.1)(attn_output)
        x = LayerNormalization(epsilon=1e-6)(inputs + attn_output)
        ff = Dense(128, activation='relu', kernel_regularizer=l2(0.01))(x)
        ff = Dropout(0.1)(ff)
        ff = Dense(input_shape[1], kernel_regularizer=l2(0.01))(ff)
        x = LayerNormalization(epsilon=1e-6)(x + ff)
        x = GlobalAveragePooling1D()(x)
        x = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = Dropout(0.3)(x)
        outputs = Dense(1)(x)
        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='mse', metrics=['mae'])
        early_stop = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=50, batch_size=32, callbacks=[early_stop], verbose=0)
        return model
    @staticmethod
    def tabnet_reg(X_train, y_train, X_val, y_val):
        """
        16. TabNet Regressor
        Reference: "TabNet for Cryptocurrency Forecasting" (2024)
        해석 가능한 특징 선택으로 암호화폐 가격 예측
        """
        model = TabNetRegressor(
            n_d=64, n_a=64, n_steps=5,
            gamma=1.5, n_independent=2, n_shared=2,
            lambda_sparse=1e-4, momentum=0.3,
            mask_type='entmax', optimizer_fn=torch.optim.Adam,
            optimizer_params=dict(lr=2e-2),
            scheduler_params={"step_size": 50, "gamma": 0.9},
            scheduler_fn=torch.optim.lr_scheduler.StepLR,
            verbose=0, seed=42
        )
        model.fit(
            X_train, y_train,
            eval_set=[(X_val, y_val)],
            max_epochs=100, patience=20,
            batch_size=256, virtual_batch_size=128
        )
        return model
    
    @staticmethod
    def informer_reg(X_train, y_train, X_val, y_val, input_shape):
        """
        17. Informer Regressor
        Reference: "Informer on High Frequency Bitcoin Data" (2025)
        5분봉 비트코인 데이터에서 Buy-and-Hold 전략 능가
        """
        inputs = Input(shape=input_shape)
        
        x = inputs
        for _ in range(2):
            attn = MultiHeadAttention(num_heads=4, key_dim=32, dropout=0.1)(x, x)
            attn = Dropout(0.1)(attn)
            x = LayerNormalization(epsilon=1e-6)(x + attn)
            x = Conv1D(input_shape[1], 1, activation='relu', 
                      kernel_regularizer=l2(0.01))(x)
            x = MaxPooling1D(2, padding='same')(x)
        
        x = GlobalAveragePooling1D()(x)
        x = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = Dropout(0.3)(x)
        outputs = Dense(1)(x)
        
        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='mse', metrics=['mae'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model
    
    @staticmethod
    def nbeats_reg(X_train, y_train, X_val, y_val, input_shape):
        """
        18. N-BEATS Regressor
        Reference: "N-BEATS Perceiver for Crypto Portfolio" (2024)
        암호화폐 포트폴리오 예측에서 정확도와 견고성 향상
        """
        inputs = Input(shape=input_shape)
        
        # Trend stack
        x1 = Flatten()(inputs)
        for _ in range(4):
            x1 = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x1)
            x1 = BatchNormalization()(x1)
            x1 = Dropout(0.2)(x1)
        trend = Dense(64, activation='linear', kernel_regularizer=l2(0.01))(x1)
        
        # Seasonality stack
        x2 = Flatten()(inputs)
        for _ in range(4):
            x2 = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x2)
            x2 = BatchNormalization()(x2)
            x2 = Dropout(0.2)(x2)
        season = Dense(64, activation='linear', kernel_regularizer=l2(0.01))(x2)
        
        combined = Add()([trend, season])
        combined = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(combined)
        combined = Dropout(0.3)(combined)
        outputs = Dense(1)(combined)
        
        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='mse', metrics=['mae'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model
    
    @staticmethod
    def tft_reg(X_train, y_train, X_val, y_val, input_shape):
        """
        19. Temporal Fusion Transformer Regressor
        Reference: "Adaptive TFT for Cryptocurrency" (2025)
        동적 subseries와 패턴 기반 분류로 단기 예측 강화
        """
        inputs = Input(shape=input_shape)
        
        x = Flatten()(inputs)
        var_weights = Dense(input_shape[0] * input_shape[1], activation='softmax',
                           kernel_regularizer=l2(0.01))(x)
        var_weights = Reshape(input_shape)(var_weights)
        selected = Multiply()([inputs, var_weights])
        
        x = LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01))(selected)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)
        
        attn = MultiHeadAttention(num_heads=4, key_dim=32, dropout=0.1)(x, x)
        x = Add()([x, attn])
        x = LayerNormalization(epsilon=1e-6)(x)
        
        x = GlobalAveragePooling1D()(x)
        x = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = Dropout(0.3)(x)
        outputs = Dense(1)(x)
        
        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='mse', metrics=['mae'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model
    
    @staticmethod
    def performer_reg(X_train, y_train, X_val, y_val, input_shape):
        """
        20. Performer Regressor
        Reference: "Performer with BiLSTM for Crypto" (2024)
        시간별·일별 주요 암호화폐 예측에서 기존 방법 능가
        """
        inputs = Input(shape=input_shape)
        
        x = inputs
        for _ in range(2):
            attn = MultiHeadAttention(num_heads=4, key_dim=64, dropout=0.1)(x, x)
            attn = Dropout(0.1)(attn)
            x = LayerNormalization(epsilon=1e-6)(x + attn)
            
            ff = Bidirectional(LSTM(64, return_sequences=True, 
                                   kernel_regularizer=l2(0.01)))(x)
            ff = Dropout(0.1)(ff)
            x = LayerNormalization(epsilon=1e-6)(x + ff)
        
        x = GlobalAveragePooling1D()(x)
        x = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = Dropout(0.3)(x)
        outputs = Dense(1)(x)
        
        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='mse', metrics=['mae'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model
    
    @staticmethod
    def patchtst_reg(X_train, y_train, X_val, y_val, input_shape, patch_len=16, stride=8):
        """
        21. PatchTST Regressor
        Reference: "Crypto Predictions with PatchTST" (2024)
        이더리움 LLM 예측에서 GPT-2, Llama 다음으로 우수
        """
        inputs = Input(shape=input_shape)
        
        x = inputs
        num_patches = (input_shape[0] - patch_len) // stride + 1
        
        patches = []
        for i in range(0, input_shape[0] - patch_len + 1, stride):
            patch = Lambda(lambda z: z[:, i:i+patch_len, :])(x)
            patch = Flatten()(patch)
            patch = Dense(128, kernel_regularizer=l2(0.01))(patch)
            patches.append(patch)
        
        if len(patches) > 1:
            x = tf.stack(patches, axis=1)
        else:
            x = tf.expand_dims(patches[0], axis=1)
        
        for _ in range(2):
            attn = MultiHeadAttention(num_heads=4, key_dim=32, dropout=0.1)(x, x)
            attn = Dropout(0.1)(attn)
            x = LayerNormalization(epsilon=1e-6)(x + attn)
            
            ff = Dense(256, activation='gelu', kernel_regularizer=l2(0.01))(x)
            ff = Dropout(0.1)(ff)
            ff = Dense(128, kernel_regularizer=l2(0.01))(ff)
            x = LayerNormalization(epsilon=1e-6)(x + ff)
        
        x = GlobalAveragePooling1D()(x)
        x = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = Dropout(0.3)(x)
        outputs = Dense(1)(x)
        
        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='mse', metrics=['mae'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model
    
    @staticmethod
    def autoformer_reg(X_train, y_train, X_val, y_val, input_shape):
        """
        22. Autoformer Regressor
        Reference: "Autoformer for Long-term Forecasting" (2021)
        시계열 분해로 트렌드와 계절성 동시 포착
        """
        inputs = Input(shape=input_shape)
        
        x = inputs
        trend = tf.keras.layers.AveragePooling1D(pool_size=25, strides=1, 
                                                  padding='same')(x)
        seasonal = tf.subtract(x, trend)
        
        x = seasonal
        for _ in range(2):
            attn = MultiHeadAttention(num_heads=4, key_dim=32, dropout=0.1)(x, x)
            attn = Dropout(0.1)(attn)
            x = LayerNormalization(epsilon=1e-6)(x + attn)
            
            ff = Dense(128, activation='gelu', kernel_regularizer=l2(0.01))(x)
            ff = Dropout(0.1)(ff)
            ff = Dense(input_shape[1], kernel_regularizer=l2(0.01))(ff)
            x = LayerNormalization(epsilon=1e-6)(x + ff)
        
        seasonal_out = GlobalAveragePooling1D()(x)
        trend_out = GlobalAveragePooling1D()(trend)
        combined = Concatenate()([seasonal_out, trend_out])
        
        combined = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(combined)
        combined = Dropout(0.3)(combined)
        outputs = Dense(1)(combined)
        
        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='mse', metrics=['mae'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model
    
    @staticmethod
    def itransformer_reg(X_train, y_train, X_val, y_val, input_shape):
        """
        23. iTransformer Regressor
        Reference: "iTransformer Crypto Forecasting" (2024)
        변수 간 관계를 시간보다 우선시하여 처리
        """
        inputs = Input(shape=input_shape)
        
        x = tf.transpose(inputs, perm=[0, 2, 1])
        x = Dense(64, kernel_regularizer=l2(0.01))(x)
        x = LayerNormalization(epsilon=1e-6)(x)
        
        for _ in range(2):
            attn = MultiHeadAttention(num_heads=4, key_dim=16, dropout=0.1)(x, x)
            attn = Dropout(0.1)(attn)
            x = LayerNormalization(epsilon=1e-6)(x + attn)
            
            ff = Dense(128, activation='gelu', kernel_regularizer=l2(0.01))(x)
            ff = Dropout(0.1)(ff)
            ff = Dense(64, kernel_regularizer=l2(0.01))(ff)
            x = LayerNormalization(epsilon=1e-6)(x + ff)
        
        x = GlobalAveragePooling1D()(x)
        x = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = Dropout(0.3)(x)
        outputs = Dense(1)(x)
        
        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='mse', metrics=['mae'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model
    
    @staticmethod
    def ethervoyant_reg(X_train, y_train, X_val, y_val, input_shape):
        """
        24. EtherVoyant Regressor
        Reference: "EtherVoyant: State-of-the-art ETH Forecasting" (2024)
        이더리움 가격 예측 전용 최신 모델
        """
        inputs = Input(shape=input_shape)
        
        conv1 = Conv1D(64, 3, activation='relu', padding='same', 
                      kernel_regularizer=l2(0.01))(inputs)
        conv1 = BatchNormalization()(conv1)
        conv1 = Dropout(0.2)(conv1)
        
        conv2 = Conv1D(64, 5, activation='relu', padding='same', 
                      kernel_regularizer=l2(0.01))(inputs)
        conv2 = BatchNormalization()(conv2)
        conv2 = Dropout(0.2)(conv2)
        
        x = Concatenate()([conv1, conv2])
        x = MaxPooling1D(2)(x)
        
        x = Bidirectional(LSTM(128, return_sequences=True, 
                              kernel_regularizer=l2(0.01)))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)
        
        attn = MultiHeadAttention(num_heads=4, key_dim=32, dropout=0.1)(x, x)
        x = Add()([x, attn])
        x = LayerNormalization(epsilon=1e-6)(x)
        
        x = Bidirectional(LSTM(64, kernel_regularizer=l2(0.01)))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)
        
        x = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = Dropout(0.3)(x)
        outputs = Dense(1)(x)
        
        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='mse', metrics=['mae'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-7)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop, reduce_lr], verbose=0)
        return model
    
    @staticmethod
    def vmd_hybrid_reg(X_train, y_train, X_val, y_val, input_shape):
        """
        25. VMD-PatchTST Hybrid Regressor
        Reference: "VMD-PatchTST for Stock/Crypto Forecasting" (2024)
        변동성 분해 + Transformer로 정확도 향상
        """
        inputs = Input(shape=input_shape)
        
        low_freq = tf.keras.layers.AveragePooling1D(pool_size=5, strides=1, 
                                                     padding='same')(inputs)
        low_freq = Conv1D(32, 3, activation='relu', padding='same',
                         kernel_regularizer=l2(0.01))(low_freq)
        
        mid_freq = inputs - low_freq
        mid_freq = Conv1D(32, 3, activation='relu', padding='same',
                         kernel_regularizer=l2(0.01))(mid_freq)
        
        high_freq = inputs - low_freq - mid_freq
        high_freq = Conv1D(32, 3, activation='relu', padding='same',
                          kernel_regularizer=l2(0.01))(high_freq)
        
        x = Concatenate()([low_freq, mid_freq, high_freq])
        x = BatchNormalization()(x)
        x = Dropout(0.2)(x)
        
        for _ in range(2):
            attn = MultiHeadAttention(num_heads=4, key_dim=32, dropout=0.1)(x, x)
            attn = Dropout(0.1)(attn)
            x = LayerNormalization(epsilon=1e-6)(x + attn)
            
            ff = Dense(128, activation='gelu', kernel_regularizer=l2(0.01))(x)
            ff = Dropout(0.1)(ff)
            ff = Dense(96, kernel_regularizer=l2(0.01))(ff)
            x = LayerNormalization(epsilon=1e-6)(x + ff)
        
        x = GlobalAveragePooling1D()(x)
        x = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = Dropout(0.3)(x)
        outputs = Dense(1)(x)
        
        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='mse', metrics=['mae'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model
    
    @staticmethod
    def dtw_lstm_reg(X_train, y_train, X_val, y_val, input_shape):
        """
        26. DTW-LSTM Regressor
        Reference: "Application of Dynamic Time Warping on ETH Prediction" (2024)
        DTW로 유사 패턴 식별 후 LSTM 예측 - 기존 LSTM 대비 23.4% 향상
        """
        inputs = Input(shape=input_shape)

        # DTW-inspired feature extraction using Conv1D with multiple dilation rates
        dtw_features = []
        for dilation in [1, 2, 4, 8]:
            conv = Conv1D(32, 3, dilation_rate=dilation, padding='causal',
                         activation='relu', kernel_regularizer=l2(0.01))(inputs)
            conv = BatchNormalization()(conv)
            dtw_features.append(conv)

        x = Concatenate()(dtw_features)
        x = Dropout(0.2)(x)

        # LSTM layers
        x = LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)

        x = LSTM(64, kernel_regularizer=l2(0.01))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)

        x = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = Dropout(0.2)(x)
        outputs = Dense(1)(x)

        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='mse', metrics=['mae'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model

    @staticmethod
    def attention_lstm_reg(X_train, y_train, X_val, y_val, input_shape):
        """
        27. Attention-LSTM Regressor
        Reference: "ETH Gas Price Prediction with Attention" (2024)
        어텐션 메커니즘으로 중요 시점 강조
        """
        inputs = Input(shape=input_shape)

        # LSTM layers
        lstm_out = LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01))(inputs)
        lstm_out = BatchNormalization()(lstm_out)
        lstm_out = Dropout(0.3)(lstm_out)

        # Attention mechanism
        attention = Dense(1, activation='tanh')(lstm_out)
        attention = Flatten()(attention)
        attention = Activation('softmax')(attention)
        attention = RepeatVector(128)(attention)
        attention = Permute([2, 1])(attention)

        # Apply attention
        attended = Multiply()([lstm_out, attention])
        attended = Lambda(lambda x: tf.reduce_sum(x, axis=1))(attended)

        attended = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(attended)
        attended = BatchNormalization()(attended)
        attended = Dropout(0.3)(attended)

        attended = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(attended)
        attended = Dropout(0.2)(attended)
        outputs = Dense(1)(attended)

        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='mse', metrics=['mae'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model

    @staticmethod
    def dual_attention_reg(X_train, y_train, X_val, y_val, input_shape):
        """
        28. Dual Attention Mechanism Regressor
        Reference: "Dual Attention Mechanism for Crypto Trend" (2024)
        시계열 어텐션 + 특징 어텐션 결합으로 20% 성능 향상
        """
        inputs = Input(shape=input_shape)

        # Feature attention
        feature_attention = Dense(input_shape[1], activation='sigmoid',
                                 kernel_regularizer=l2(0.01))(inputs)
        feature_attended = Multiply()([inputs, feature_attention])

        # Temporal processing
        x = LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01))(feature_attended)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)

        # Temporal attention
        temporal_attention = Dense(1, activation='tanh')(x)
        temporal_attention = Flatten()(temporal_attention)
        temporal_attention = Activation('softmax')(temporal_attention)
        temporal_attention = RepeatVector(128)(temporal_attention)
        temporal_attention = Permute([2, 1])(temporal_attention)

        x = Multiply()([x, temporal_attention])
        x = Lambda(lambda z: tf.reduce_sum(z, axis=1))(x)

        x = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)

        x = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = Dropout(0.2)(x)
        outputs = Dense(1)(x)

        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='mse', metrics=['mae'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model

    @staticmethod
    def cross_correlation_lstm_reg(X_train, y_train, X_val, y_val, input_shape):
        """
        29. Cross-Correlation LSTM Regressor
        Reference: "Crypto Volatility Prediction via Cross-Correlation" (2024)
        가격·수익률·변동성 간 상호상관으로 특징 선택
        """
        inputs = Input(shape=input_shape)

        # Feature correlation learning
        x = Conv1D(64, 1, activation='relu', kernel_regularizer=l2(0.01))(inputs)
        x = BatchNormalization()(x)
        x = Dropout(0.2)(x)

        # Multi-scale temporal processing
        branch1 = LSTM(64, return_sequences=True, kernel_regularizer=l2(0.01))(x)
        branch2 = GRU(64, return_sequences=True, kernel_regularizer=l2(0.01))(x)

        merged = Concatenate()([branch1, branch2])
        merged = BatchNormalization()(merged)
        merged = Dropout(0.3)(merged)

        merged = LSTM(64, kernel_regularizer=l2(0.01))(merged)
        merged = BatchNormalization()(merged)
        merged = Dropout(0.3)(merged)

        merged = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(merged)
        merged = Dropout(0.2)(merged)
        outputs = Dense(1)(merged)

        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='mse', metrics=['mae'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model

    @staticmethod
    def gradient_optimized_lstm_reg(X_train, y_train, X_val, y_val, input_shape):
        """
        30. Gradient-Optimized LSTM Regressor
        Reference: "Gradient-Specific Optimization for Bitcoin" (2024)
        그래디언트 특화 최적화로 예측 성능 향상
        """
        inputs = Input(shape=input_shape)

        x = LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01))(inputs)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)

        x = LSTM(96, return_sequences=True, kernel_regularizer=l2(0.01))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)

        x = LSTM(64, kernel_regularizer=l2(0.01))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)

        x = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = Dropout(0.3)(x)

        x = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = Dropout(0.2)(x)
        outputs = Dense(1)(x)

        model = Model(inputs=inputs, outputs=outputs)

        # Gradient-specific optimizer with custom learning rate schedule
        optimizer = tf.keras.optimizers.Adam(learning_rate=0.001, clipnorm=1.0)
        model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])

        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, 
                                     min_lr=1e-6, verbose=0)

        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, 
                 callbacks=[early_stop, reduce_lr], verbose=0)
        return model

    @staticmethod
    def ensemble_stacking_reg(X_train, y_train, X_val, y_val):
        """
        31. Ensemble Stacking Regressor
        Reference: "Ensemble Bitcoin Price Prediction" (2025)
        스태킹 앙상블 - 81.8% 정확도, 다양한 모델 결합
        """
        from sklearn.ensemble import StackingRegressor
        from sklearn.linear_model import Ridge

        # Base models
        base_models = [
            ('rf', RandomForestRegressor(n_estimators=100, max_depth=10, 
                                        min_samples_split=10, random_state=42, n_jobs=-1)),
            ('lgbm', LGBMRegressor(n_estimators=100, max_depth=5, learning_rate=0.05,
                                  num_leaves=31, random_state=42, verbose=-1)),
            ('xgb', XGBRegressor(n_estimators=100, max_depth=5, learning_rate=0.05,
                                random_state=42))
        ]

        # Meta-learner
        meta_model = Ridge(alpha=1.0)

        # Stacking ensemble
        model = StackingRegressor(
            estimators=base_models,
            final_estimator=meta_model,
            cv=5,
            n_jobs=-1
        )

        model.fit(X_train, y_train)
        return model

    @staticmethod
    def ensemble_voting_reg(X_train, y_train, X_val, y_val):
        """
        32. Ensemble Voting Regressor
        Reference: "Crypto Trading with Ensemble Methods" (2025)
        보팅 앙상블로 리스크 완화, Sharpe ratio 0.28 달성
        """
        from sklearn.ensemble import VotingRegressor

        # Diverse base models
        estimators = [
            ('rf', RandomForestRegressor(n_estimators=100, max_depth=10,
                                        random_state=42, n_jobs=-1)),
            ('lgbm', LGBMRegressor(n_estimators=100, max_depth=5,
                                  learning_rate=0.05, random_state=42, verbose=-1)),
            ('xgb', XGBRegressor(n_estimators=100, max_depth=5,
                                learning_rate=0.05, random_state=42)),
            ('svr', SVR(kernel='rbf', C=100, epsilon=0.1))
        ]

        model = VotingRegressor(estimators=estimators, n_jobs=-1)
        model.fit(X_train, y_train)
        return model

    @staticmethod
    def lstm_xgboost_hybrid_reg(X_train, y_train, X_val, y_val, input_shape):
        """
        33. LSTM-XGBoost Hybrid Regressor
        Reference: "LSTM+XGBoost Crypto Price Prediction" (2025)
        LSTM 특징 추출 + XGBoost 예측
        """
        # LSTM feature extraction
        inputs = Input(shape=input_shape)

        lstm_out = LSTM(64, return_sequences=False, kernel_regularizer=l2(0.01))(inputs)
        lstm_out = BatchNormalization()(lstm_out)
        lstm_out = Dropout(0.3)(lstm_out)

        feature_model = Model(inputs=inputs, outputs=lstm_out)
        feature_model.compile(optimizer='adam', loss='mse')

        # Extract features
        X_train_features = feature_model.predict(X_train, verbose=0)
        X_val_features = feature_model.predict(X_val, verbose=0)

        # XGBoost on extracted features
        xgb_model = XGBRegressor(
            n_estimators=200, max_depth=7, learning_rate=0.05,
            subsample=0.8, colsample_bytree=0.8,
            reg_alpha=0.1, reg_lambda=1.0,
            random_state=42
        )
        xgb_model.fit(X_train_features, y_train,
                     eval_set=[(X_val_features, y_val)],
                     verbose=False)

        # Return both models (need custom wrapper for prediction)
        class HybridModel:
            def __init__(self, feature_extractor, predictor):
                self.feature_extractor = feature_extractor
                self.predictor = predictor

            def predict(self, X):
                features = self.feature_extractor.predict(X, verbose=0)
                return self.predictor.predict(features)

        return HybridModel(feature_model, xgb_model)

    @staticmethod
    def residual_lstm_reg(X_train, y_train, X_val, y_val, input_shape):
        """
        34. Residual LSTM Regressor
        Reference: "Deep Residual Networks for Time Series" (2024)
        잔차 연결로 깊은 네트워크 학습 안정화
        """
        inputs = Input(shape=input_shape)

        # First LSTM block
        x = LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01))(inputs)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)

        # Residual block 1
        residual1 = x
        x = LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01))(x)
        x = BatchNormalization()(x)
        x = Add()([x, residual1])
        x = Dropout(0.3)(x)

        # Residual block 2
        residual2 = x
        x = LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01))(x)
        x = BatchNormalization()(x)
        x = Add()([x, residual2])
        x = Dropout(0.3)(x)

        # Final layers
        x = LSTM(64, kernel_regularizer=l2(0.01))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)

        x = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = Dropout(0.2)(x)
        outputs = Dense(1)(x)

        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='mse', metrics=['mae'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model

    @staticmethod
    def multiscale_cnn_lstm_reg(X_train, y_train, X_val, y_val, input_shape):
        """
        35. Multi-Scale CNN-LSTM Regressor
        Reference: "Multi-Scale Feature Extraction for Crypto" (2024)
        다중 스케일 특징 추출로 다양한 시간 패턴 포착
        """
        inputs = Input(shape=input_shape)

        # Multi-scale CNN branches
        branch1 = Conv1D(32, 3, padding='same', activation='relu',
                        kernel_regularizer=l2(0.01))(inputs)
        branch1 = BatchNormalization()(branch1)
        branch1 = MaxPooling1D(2)(branch1)

        branch2 = Conv1D(32, 5, padding='same', activation='relu',
                        kernel_regularizer=l2(0.01))(inputs)
        branch2 = BatchNormalization()(branch2)
        branch2 = MaxPooling1D(2)(branch2)

        branch3 = Conv1D(32, 7, padding='same', activation='relu',
                        kernel_regularizer=l2(0.01))(inputs)
        branch3 = BatchNormalization()(branch3)
        branch3 = MaxPooling1D(2)(branch3)

        # Merge branches
        merged = Concatenate()([branch1, branch2, branch3])
        merged = Dropout(0.2)(merged)

        # LSTM processing
        merged = LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01))(merged)
        merged = BatchNormalization()(merged)
        merged = Dropout(0.3)(merged)

        merged = LSTM(64, kernel_regularizer=l2(0.01))(merged)
        merged = BatchNormalization()(merged)
        merged = Dropout(0.3)(merged)

        merged = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(merged)
        merged = Dropout(0.2)(merged)
        outputs = Dense(1)(merged)

        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='mse', metrics=['mae'])
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)
        return model


# ============================================================================
# MultiTaskModels (15개 모델 - 논문 기반)
# ============================================================================

class MultiTaskModels:
    """멀티태스크 학습 모델 15개 (논문 기반)"""
    
    @staticmethod
    def hard_sharing_lstm(X_train, y_train_dir, y_train_ret, 
                          X_val, y_val_dir, y_val_ret, input_shape):
        """
        1. Hard Parameter Sharing LSTM
        Reference: "Cryptocurrency price prediction and portfolio optimization" (2025)
        Most common MTL approach
        """
        inputs = Input(shape=input_shape)
        
        # Shared layers (Hard sharing)
        x = LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01))(inputs)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)
        shared = LSTM(64, kernel_regularizer=l2(0.01))(x)
        shared = BatchNormalization()(shared)
        
        # Task-specific heads
        dir_head = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(shared)
        dir_head = BatchNormalization()(dir_head)
        dir_head = Dropout(0.3)(dir_head)
        dir_output = Dense(1, activation='sigmoid', name='direction')(dir_head)
        
        ret_head = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(shared)
        ret_head = BatchNormalization()(ret_head)
        ret_head = Dropout(0.3)(ret_head)
        ret_output = Dense(1, name='return')(ret_head)
        
        model = Model(inputs=inputs, outputs=[dir_output, ret_output])
        model.compile(
            optimizer='adam',
            loss={'direction': 'binary_crossentropy', 'return': 'mse'},
            loss_weights={'direction': 1.0, 'return': 1.0},
            metrics={'direction': 'accuracy', 'return': 'mae'}
        )
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(
            X_train, {'direction': y_train_dir, 'return': y_train_ret},
            validation_data=(X_val, {'direction': y_val_dir, 'return': y_val_ret}),
            epochs=100, batch_size=32, callbacks=[early_stop], verbose=0
        )
        return model
    
    @staticmethod
    def bilstm_mtl(X_train, y_train_dir, y_train_ret, 
                   X_val, y_val_dir, y_val_ret, input_shape):
        """
        2. BiLSTM Multi-Task (Hard Sharing)
        Reference: "Multi-iTR" (2025) - R²: 0.98
        """
        inputs = Input(shape=input_shape)
        x = Bidirectional(LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01)))(inputs)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)
        shared = Bidirectional(LSTM(64, kernel_regularizer=l2(0.01)))(x)
        shared = BatchNormalization()(shared)
        
        dir_head = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(shared)
        dir_head = BatchNormalization()(dir_head)
        dir_head = Dropout(0.3)(dir_head)
        dir_output = Dense(1, activation='sigmoid', name='direction')(dir_head)
        
        ret_head = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(shared)
        ret_head = BatchNormalization()(ret_head)
        ret_head = Dropout(0.3)(ret_head)
        ret_output = Dense(1, name='return')(ret_head)
        
        model = Model(inputs=inputs, outputs=[dir_output, ret_output])
        model.compile(
            optimizer='adam',
            loss={'direction': 'binary_crossentropy', 'return': 'mse'},
            loss_weights={'direction': 1.0, 'return': 1.0},
            metrics={'direction': 'accuracy', 'return': 'mae'}
        )
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(
            X_train, {'direction': y_train_dir, 'return': y_train_ret},
            validation_data=(X_val, {'direction': y_val_dir, 'return': y_val_ret}),
            epochs=100, batch_size=32, callbacks=[early_stop], verbose=0
        )
        return model
    
    @staticmethod
    def gru_mtl(X_train, y_train_dir, y_train_ret, 
                X_val, y_val_dir, y_val_ret, input_shape):
        """
        3. GRU Multi-Task
        Reference: "Comparative Analysis of LSTM and GRU for ETH" (2025)
        """
        inputs = Input(shape=input_shape)
        x = GRU(128, return_sequences=True, kernel_regularizer=l2(0.01))(inputs)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)
        shared = GRU(64, kernel_regularizer=l2(0.01))(x)
        shared = BatchNormalization()(shared)
        
        dir_head = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(shared)
        dir_head = Dropout(0.3)(dir_head)
        dir_output = Dense(1, activation='sigmoid', name='direction')(dir_head)
        
        ret_head = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(shared)
        ret_head = Dropout(0.3)(ret_head)
        ret_output = Dense(1, name='return')(ret_head)
        
        model = Model(inputs=inputs, outputs=[dir_output, ret_output])
        model.compile(
            optimizer='adam',
            loss={'direction': 'binary_crossentropy', 'return': 'mse'},
            loss_weights={'direction': 1.0, 'return': 1.0},
            metrics={'direction': 'accuracy', 'return': 'mae'}
        )
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(
            X_train, {'direction': y_train_dir, 'return': y_train_ret},
            validation_data=(X_val, {'direction': y_val_dir, 'return': y_val_ret}),
            epochs=100, batch_size=32, callbacks=[early_stop], verbose=0
        )
        return model
    
    @staticmethod
    def soft_sharing_lstm(X_train, y_train_dir, y_train_ret, 
                         X_val, y_val_dir, y_val_ret, input_shape):
        """
        4. Soft Parameter Sharing LSTM
        Reference: "Task's Choice: Pruning-Based Feature Sharing" (2022)
        Each task has its own network, regularized to be similar
        """
        inputs = Input(shape=input_shape)
        
        # Task 1 network (Direction)
        dir_lstm1 = LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01), name='dir_lstm1')(inputs)
        dir_lstm1 = BatchNormalization()(dir_lstm1)
        dir_lstm1 = Dropout(0.3)(dir_lstm1)
        dir_lstm2 = LSTM(64, kernel_regularizer=l2(0.01), name='dir_lstm2')(dir_lstm1)
        dir_lstm2 = BatchNormalization()(dir_lstm2)
        
        # Task 2 network (Return)
        ret_lstm1 = LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01), name='ret_lstm1')(inputs)
        ret_lstm1 = BatchNormalization()(ret_lstm1)
        ret_lstm1 = Dropout(0.3)(ret_lstm1)
        ret_lstm2 = LSTM(64, kernel_regularizer=l2(0.01), name='ret_lstm2')(ret_lstm1)
        ret_lstm2 = BatchNormalization()(ret_lstm2)
        
        # Task-specific outputs
        dir_head = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(dir_lstm2)
        dir_head = Dropout(0.3)(dir_head)
        dir_output = Dense(1, activation='sigmoid', name='direction')(dir_head)
        
        ret_head = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(ret_lstm2)
        ret_head = Dropout(0.3)(ret_head)
        ret_output = Dense(1, name='return')(ret_head)
        
        model = Model(inputs=inputs, outputs=[dir_output, ret_output])
        model.compile(
            optimizer='adam',
            loss={'direction': 'binary_crossentropy', 'return': 'mse'},
            loss_weights={'direction': 1.0, 'return': 1.0},
            metrics={'direction': 'accuracy', 'return': 'mae'}
        )
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(
            X_train, {'direction': y_train_dir, 'return': y_train_ret},
            validation_data=(X_val, {'direction': y_val_dir, 'return': y_val_ret}),
            epochs=100, batch_size=32, callbacks=[early_stop], verbose=0
        )
        return model
    
    @staticmethod
    def cross_stitch_mtl(X_train, y_train_dir, y_train_ret, 
                        X_val, y_val_dir, y_val_ret, input_shape):
        """
        5. Cross-Stitch Networks
        Reference: "Cross-stitch Networks for Multi-task Learning" (CVPR 2016)
        Learns optimal combination of shared and task-specific representations
        """
        inputs = Input(shape=input_shape)
        
        # Task 1 branch
        dir_lstm1 = LSTM(64, return_sequences=True, kernel_regularizer=l2(0.01))(inputs)
        
        # Task 2 branch
        ret_lstm1 = LSTM(64, return_sequences=True, kernel_regularizer=l2(0.01))(inputs)
        
        # Cross-stitch unit (linear combination)
        # α_dir * dir_lstm1 + β_dir * ret_lstm1 -> new_dir
        # α_ret * ret_lstm1 + β_ret * dir_lstm1 -> new_ret
        concat = Concatenate()([dir_lstm1, ret_lstm1])
        
        # Learn cross-stitch weights
        dir_cross = Dense(64, kernel_regularizer=l2(0.01))(concat)
        dir_cross = BatchNormalization()(dir_cross)
        dir_cross = Dropout(0.3)(dir_cross)
        
        ret_cross = Dense(64, kernel_regularizer=l2(0.01))(concat)
        ret_cross = BatchNormalization()(ret_cross)
        ret_cross = Dropout(0.3)(ret_cross)
        
        # Second layer
        dir_lstm2 = LSTM(64, kernel_regularizer=l2(0.01))(dir_cross)
        ret_lstm2 = LSTM(64, kernel_regularizer=l2(0.01))(ret_cross)
        
        # Outputs
        dir_output = Dense(1, activation='sigmoid', name='direction')(dir_lstm2)
        ret_output = Dense(1, name='return')(ret_lstm2)
        
        model = Model(inputs=inputs, outputs=[dir_output, ret_output])
        model.compile(
            optimizer='adam',
            loss={'direction': 'binary_crossentropy', 'return': 'mse'},
            loss_weights={'direction': 1.0, 'return': 1.0},
            metrics={'direction': 'accuracy', 'return': 'mae'}
        )
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(
            X_train, {'direction': y_train_dir, 'return': y_train_ret},
            validation_data=(X_val, {'direction': y_val_dir, 'return': y_val_ret}),
            epochs=100, batch_size=32, callbacks=[early_stop], verbose=0
        )
        return model
    
    @staticmethod
    def mmoe_mtl(X_train, y_train_dir, y_train_ret, 
                X_val, y_val_dir, y_val_ret, input_shape, num_experts=3):
        """
        6. Multi-gate Mixture-of-Experts (MMoE)
        Reference: "Modeling Task Relationships in Multi-task Learning" (Google, KDD 2018)
        Explicitly learns task relationships from data
        """
        inputs = Input(shape=input_shape)
        
        # Expert networks
        experts = []
        for i in range(num_experts):
            expert = LSTM(64, return_sequences=False, 
                         kernel_regularizer=l2(0.01), 
                         name=f'expert_{i}')(inputs)
            expert = BatchNormalization()(expert)
            experts.append(expert)
        
        # Stack experts
        experts_stacked = tf.stack(experts, axis=1)  # (batch, num_experts, 64)
        
        # Gate for direction task
        dir_gate_input = LSTM(32, kernel_regularizer=l2(0.01))(inputs)
        dir_gate = Dense(num_experts, activation='softmax', 
                        kernel_regularizer=l2(0.01), 
                        name='dir_gate')(dir_gate_input)
        dir_gate = tf.expand_dims(dir_gate, -1)  # (batch, num_experts, 1)
        
        # Weighted sum for direction
        dir_weighted = tf.reduce_sum(experts_stacked * dir_gate, axis=1)
        dir_weighted = Dropout(0.3)(dir_weighted)
        dir_output = Dense(1, activation='sigmoid', name='direction')(dir_weighted)
        
        # Gate for return task
        ret_gate_input = LSTM(32, kernel_regularizer=l2(0.01))(inputs)
        ret_gate = Dense(num_experts, activation='softmax', 
                        kernel_regularizer=l2(0.01), 
                        name='ret_gate')(ret_gate_input)
        ret_gate = tf.expand_dims(ret_gate, -1)
        
        # Weighted sum for return
        ret_weighted = tf.reduce_sum(experts_stacked * ret_gate, axis=1)
        ret_weighted = Dropout(0.3)(ret_weighted)
        ret_output = Dense(1, name='return')(ret_weighted)
        
        model = Model(inputs=inputs, outputs=[dir_output, ret_output])
        model.compile(
            optimizer='adam',
            loss={'direction': 'binary_crossentropy', 'return': 'mse'},
            loss_weights={'direction': 1.0, 'return': 1.0},
            metrics={'direction': 'accuracy', 'return': 'mae'}
        )
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(
            X_train, {'direction': y_train_dir, 'return': y_train_ret},
            validation_data=(X_val, {'direction': y_val_dir, 'return': y_val_ret}),
            epochs=100, batch_size=32, callbacks=[early_stop], verbose=0
        )
        return model
    
    @staticmethod
    def cnn_lstm_mtl(X_train, y_train_dir, y_train_ret, 
                    X_val, y_val_dir, y_val_ret, input_shape):
        """
        7. CNN-LSTM Multi-Task
        Reference: "Bitcoin Price Direction Forecasting" (2024)
        CNN-LSTM for direction + price prediction
        """
        inputs = Input(shape=input_shape)
        
        # Shared CNN layers
        x = Conv1D(64, 3, activation='relu', padding='same', 
                  kernel_regularizer=l2(0.01))(inputs)
        x = BatchNormalization()(x)
        x = MaxPooling1D(2)(x)
        x = Dropout(0.2)(x)
        
        # Shared LSTM layers
        x = LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)
        shared = LSTM(64, kernel_regularizer=l2(0.01))(x)
        shared = BatchNormalization()(shared)
        
        # Task-specific heads
        dir_head = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(shared)
        dir_head = Dropout(0.3)(dir_head)
        dir_output = Dense(1, activation='sigmoid', name='direction')(dir_head)
        
        ret_head = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(shared)
        ret_head = Dropout(0.3)(ret_head)
        ret_output = Dense(1, name='return')(ret_head)
        
        model = Model(inputs=inputs, outputs=[dir_output, ret_output])
        model.compile(
            optimizer='adam',
            loss={'direction': 'binary_crossentropy', 'return': 'mse'},
            loss_weights={'direction': 1.0, 'return': 1.0},
            metrics={'direction': 'accuracy', 'return': 'mae'}
        )
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(
            X_train, {'direction': y_train_dir, 'return': y_train_ret},
            validation_data=(X_val, {'direction': y_val_dir, 'return': y_val_ret}),
            epochs=100, batch_size=32, callbacks=[early_stop], verbose=0
        )
        return model
    
    @staticmethod
    def cnn_gru_mtl(X_train, y_train_dir, y_train_ret, 
                   X_val, y_val_dir, y_val_ret, input_shape):
        """
        8. CNN-GRU Multi-Task
        Faster than CNN-LSTM while maintaining performance
        """
        inputs = Input(shape=input_shape)
        
        x = Conv1D(64, 3, activation='relu', padding='same', kernel_regularizer=l2(0.01))(inputs)
        x = BatchNormalization()(x)
        x = MaxPooling1D(2)(x)
        x = Dropout(0.2)(x)
        
        x = GRU(128, return_sequences=True, kernel_regularizer=l2(0.01))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)
        shared = GRU(64, kernel_regularizer=l2(0.01))(x)
        shared = BatchNormalization()(shared)
        
        dir_head = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(shared)
        dir_head = Dropout(0.3)(dir_head)
        dir_output = Dense(1, activation='sigmoid', name='direction')(dir_head)
        
        ret_head = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(shared)
        ret_head = Dropout(0.3)(ret_head)
        ret_output = Dense(1, name='return')(ret_head)
        
        model = Model(inputs=inputs, outputs=[dir_output, ret_output])
        model.compile(
            optimizer='adam',
            loss={'direction': 'binary_crossentropy', 'return': 'mse'},
            loss_weights={'direction': 1.0, 'return': 1.0},
            metrics={'direction': 'accuracy', 'return': 'mae'}
        )
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(
            X_train, {'direction': y_train_dir, 'return': y_train_ret},
            validation_data=(X_val, {'direction': y_val_dir, 'return': y_val_ret}),
            epochs=100, batch_size=32, callbacks=[early_stop], verbose=0
        )
        return model
    
    @staticmethod
    def transformer_mtl(X_train, y_train_dir, y_train_ret,
                       X_val, y_val_dir, y_val_ret, input_shape):
        """
        9. Transformer Multi-Task
        Reference: "Multi-iTR" (2025) - R²: 0.98
        """
        inputs = Input(shape=input_shape)
        attn_output = MultiHeadAttention(num_heads=8, key_dim=64, dropout=0.1)(inputs, inputs)
        attn_output = Dropout(0.1)(attn_output)
        x = LayerNormalization(epsilon=1e-6)(inputs + attn_output)
        ff = Dense(128, activation='relu', kernel_regularizer=l2(0.01))(x)
        ff = Dropout(0.1)(ff)
        ff = Dense(input_shape[1], kernel_regularizer=l2(0.01))(ff)
        x = LayerNormalization(epsilon=1e-6)(x + ff)
        shared = GlobalAveragePooling1D()(x)
        
        dir_head = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(shared)
        dir_head = BatchNormalization()(dir_head)
        dir_head = Dropout(0.3)(dir_head)
        dir_output = Dense(1, activation='sigmoid', name='direction')(dir_head)
        
        ret_head = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(shared)
        ret_head = BatchNormalization()(ret_head)
        ret_head = Dropout(0.3)(ret_head)
        ret_output = Dense(1, name='return')(ret_head)
        
        model = Model(inputs=inputs, outputs=[dir_output, ret_output])
        model.compile(
            optimizer='adam',
            loss={'direction': 'binary_crossentropy', 'return': 'mse'},
            loss_weights={'direction': 1.0, 'return': 1.0},
            metrics={'direction': 'accuracy', 'return': 'mae'}
        )
        early_stop = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)
        model.fit(
            X_train, {'direction': y_train_dir, 'return': y_train_ret},
            validation_data=(X_val, {'direction': y_val_dir, 'return': y_val_ret}),
            epochs=50, batch_size=32, callbacks=[early_stop], verbose=0
        )
        return model
    
    @staticmethod
    def stacked_lstm_mtl(X_train, y_train_dir, y_train_ret, 
                        X_val, y_val_dir, y_val_ret, input_shape):
        """
        10. Stacked LSTM Multi-Task (3 layers)
        Deep shared representation learning
        """
        inputs = Input(shape=input_shape)
        x = LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01))(inputs)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)
        x = LSTM(96, return_sequences=True, kernel_regularizer=l2(0.01))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)
        shared = LSTM(64, kernel_regularizer=l2(0.01))(x)
        shared = BatchNormalization()(shared)
        
        dir_head = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(shared)
        dir_head = Dropout(0.3)(dir_head)
        dir_output = Dense(1, activation='sigmoid', name='direction')(dir_head)
        
        ret_head = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(shared)
        ret_head = Dropout(0.3)(ret_head)
        ret_output = Dense(1, name='return')(ret_head)
        
        model = Model(inputs=inputs, outputs=[dir_output, ret_output])
        model.compile(
            optimizer='adam',
            loss={'direction': 'binary_crossentropy', 'return': 'mse'},
            loss_weights={'direction': 1.0, 'return': 1.0},
            metrics={'direction': 'accuracy', 'return': 'mae'}
        )
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(
            X_train, {'direction': y_train_dir, 'return': y_train_ret},
            validation_data=(X_val, {'direction': y_val_dir, 'return': y_val_ret}),
            epochs=100, batch_size=32, callbacks=[early_stop], verbose=0
        )
        return model
    
    @staticmethod
    def attention_mtl(X_train, y_train_dir, y_train_ret, 
                     X_val, y_val_dir, y_val_ret, input_shape):
        """
        11. LSTM-Attention Multi-Task
        Reference: "Representation Learning for Financial Time Series" (2024)
        """
        inputs = Input(shape=input_shape)
        lstm_out = LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01))(inputs)
        lstm_out = BatchNormalization()(lstm_out)
        lstm_out = Dropout(0.3)(lstm_out)
        lstm_out = LSTM(64, return_sequences=True, kernel_regularizer=l2(0.01))(lstm_out)
        lstm_out = BatchNormalization()(lstm_out)
        
        # Self-attention
        attention = Attention()([lstm_out, lstm_out])
        combined = Add()([lstm_out, attention])
        shared = GlobalAveragePooling1D()(combined)
        
        dir_head = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(shared)
        dir_head = Dropout(0.3)(dir_head)
        dir_output = Dense(1, activation='sigmoid', name='direction')(dir_head)
        
        ret_head = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(shared)
        ret_head = Dropout(0.3)(ret_head)
        ret_output = Dense(1, name='return')(ret_head)
        
        model = Model(inputs=inputs, outputs=[dir_output, ret_output])
        model.compile(
            optimizer='adam',
            loss={'direction': 'binary_crossentropy', 'return': 'mse'},
            loss_weights={'direction': 1.0, 'return': 1.0},
            metrics={'direction': 'accuracy', 'return': 'mae'}
        )
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(
            X_train, {'direction': y_train_dir, 'return': y_train_ret},
            validation_data=(X_val, {'direction': y_val_dir, 'return': y_val_ret}),
            epochs=100, batch_size=32, callbacks=[early_stop], verbose=0
        )
        return model
    
    @staticmethod
    def tcn_mtl(X_train, y_train_dir, y_train_ret, 
               X_val, y_val_dir, y_val_ret, input_shape):
        """
        12. Temporal Convolutional Network Multi-Task
        Reference: "Utilising TCN for Cryptocurrency Forecasting" (2024)
        """
        inputs = Input(shape=input_shape)
        x = inputs
        for dilation_rate in [1, 2, 4, 8]:
            conv = Conv1D(64, 3, padding='causal', dilation_rate=dilation_rate,
                         activation='relu', kernel_regularizer=l2(0.01))(x)
            conv = BatchNormalization()(conv)
            conv = Dropout(0.2)(conv)
            x = Add()([x, conv]) if x.shape[-1] == 64 else conv
        
        shared = GlobalAveragePooling1D()(x)
        
        dir_head = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(shared)
        dir_head = Dropout(0.3)(dir_head)
        dir_output = Dense(1, activation='sigmoid', name='direction')(dir_head)
        
        ret_head = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(shared)
        ret_head = Dropout(0.3)(ret_head)
        ret_output = Dense(1, name='return')(ret_head)
        
        model = Model(inputs=inputs, outputs=[dir_output, ret_output])
        model.compile(
            optimizer='adam',
            loss={'direction': 'binary_crossentropy', 'return': 'mse'},
            loss_weights={'direction': 1.0, 'return': 1.0},
            metrics={'direction': 'accuracy', 'return': 'mae'}
        )
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(
            X_train, {'direction': y_train_dir, 'return': y_train_ret},
            validation_data=(X_val, {'direction': y_val_dir, 'return': y_val_ret}),
            epochs=100, batch_size=32, callbacks=[early_stop], verbose=0
        )
        return model
    
    @staticmethod
    def hierarchical_mtl(X_train, y_train_dir, y_train_ret, 
                        X_val, y_val_dir, y_val_ret, input_shape):
        """
        13. Hierarchical Multi-Task Learning
        Reference: "Deep Multitask Learning with Progressive Parameter Sharing" (2023)
        Progressive sharing from low to high layers
        """
        inputs = Input(shape=input_shape)
        
        # Low-level shared features
        x = LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01))(inputs)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)
        
        # Mid-level: partially shared
        dir_mid = LSTM(64, return_sequences=True, kernel_regularizer=l2(0.01))(x)
        ret_mid = LSTM(64, return_sequences=True, kernel_regularizer=l2(0.01))(x)
        
        # High-level: task-specific
        dir_high = LSTM(32, kernel_regularizer=l2(0.01))(dir_mid)
        ret_high = LSTM(32, kernel_regularizer=l2(0.01))(ret_mid)
        
        dir_head = Dense(16, activation='relu', kernel_regularizer=l2(0.01))(dir_high)
        dir_head = Dropout(0.3)(dir_head)
        dir_output = Dense(1, activation='sigmoid', name='direction')(dir_head)
        
        ret_head = Dense(16, activation='relu', kernel_regularizer=l2(0.01))(ret_high)
        ret_head = Dropout(0.3)(ret_head)
        ret_output = Dense(1, name='return')(ret_head)
        
        model = Model(inputs=inputs, outputs=[dir_output, ret_output])
        model.compile(
            optimizer='adam',
            loss={'direction': 'binary_crossentropy', 'return': 'mse'},
            loss_weights={'direction': 1.0, 'return': 1.0},
            metrics={'direction': 'accuracy', 'return': 'mae'}
        )
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(
            X_train, {'direction': y_train_dir, 'return': y_train_ret},
            validation_data=(X_val, {'direction': y_val_dir, 'return': y_val_ret}),
            epochs=100, batch_size=32, callbacks=[early_stop], verbose=0
        )
        return model
    
    @staticmethod
    def weighted_loss_mtl(X_train, y_train_dir, y_train_ret, 
                         X_val, y_val_dir, y_val_ret, input_shape):
        """
        14. Adaptive Loss Weighting Multi-Task
        Reference: "Multi-Task Transformer with Adaptive Cross-Entropy Loss" (2022)
        Automatically balances task learning
        """
        inputs = Input(shape=input_shape)
        x = Bidirectional(LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01)))(inputs)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)
        shared = Bidirectional(LSTM(64, kernel_regularizer=l2(0.01)))(x)
        shared = BatchNormalization()(shared)
        
        dir_head = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(shared)
        dir_head = Dropout(0.3)(dir_head)
        dir_output = Dense(1, activation='sigmoid', name='direction')(dir_head)
        
        ret_head = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(shared)
        ret_head = Dropout(0.3)(ret_head)
        ret_output = Dense(1, name='return')(ret_head)
        
        model = Model(inputs=inputs, outputs=[dir_output, ret_output])
        
        # Adaptive loss weights (learned during training)
        model.compile(
            optimizer='adam',
            loss={'direction': 'binary_crossentropy', 'return': 'mse'},
            loss_weights={'direction': 1.5, 'return': 1.0},  # Give more weight to direction
            metrics={'direction': 'accuracy', 'return': 'mae'}
        )
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-7)
        model.fit(
            X_train, {'direction': y_train_dir, 'return': y_train_ret},
            validation_data=(X_val, {'direction': y_val_dir, 'return': y_val_ret}),
            epochs=100, batch_size=32, callbacks=[early_stop, reduce_lr], verbose=0
        )
        return model
    
    @staticmethod
    def ensemble_mtl(X_train, y_train_dir, y_train_ret, 
                    X_val, y_val_dir, y_val_ret, input_shape):
        """
        15. Ensemble Multi-Task Learning
        Reference: "Ensemble-based models for cryptocurrency trading" (2024)
        Multiple shared layers with ensemble output
        """
        inputs = Input(shape=input_shape)
        
        # Multiple shared branches
        branch1 = LSTM(64, return_sequences=True, kernel_regularizer=l2(0.01))(inputs)
        branch1 = BatchNormalization()(branch1)
        branch1 = Dropout(0.3)(branch1)
        branch1 = LSTM(32, kernel_regularizer=l2(0.01))(branch1)
        
        branch2 = GRU(64, return_sequences=True, kernel_regularizer=l2(0.01))(inputs)
        branch2 = BatchNormalization()(branch2)
        branch2 = Dropout(0.3)(branch2)
        branch2 = GRU(32, kernel_regularizer=l2(0.01))(branch2)
        
        # Ensemble
        shared = Concatenate()([branch1, branch2])
        shared = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(shared)
        shared = BatchNormalization()(shared)
        shared = Dropout(0.3)(shared)
        
        dir_head = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(shared)
        dir_head = Dropout(0.3)(dir_head)
        dir_output = Dense(1, activation='sigmoid', name='direction')(dir_head)
        
        ret_head = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(shared)
        ret_head = Dropout(0.3)(ret_head)
        ret_output = Dense(1, name='return')(ret_head)
        
        model = Model(inputs=inputs, outputs=[dir_output, ret_output])
        model.compile(
            optimizer='adam',
            loss={'direction': 'binary_crossentropy', 'return': 'mse'},
            loss_weights={'direction': 1.0, 'return': 1.0},
            metrics={'direction': 'accuracy', 'return': 'mae'}
        )
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(
            X_train, {'direction': y_train_dir, 'return': y_train_ret},
            validation_data=(X_val, {'direction': y_val_dir, 'return': y_val_ret}),
            epochs=100, batch_size=32, callbacks=[early_stop], verbose=0
        )
        return model
    @staticmethod
    def tabnet_mtl(X_train, y_train_dir, y_train_ret, 
                   X_val, y_val_dir, y_val_ret):
        """
        16. TabNet Multi-Task (Pseudo MTL)
        Reference: "TabNet for Multi-Task Learning" (2024)
        해석 가능한 특징 중요도와 멀티태스크 학습 결합
        Note: TabNet은 native MTL 미지원, 두 모델 앙상블로 구현
        """
        # Direction model
        dir_model = TabNetClassifier(
            n_d=64, n_a=64, n_steps=5, gamma=1.5,
            lambda_sparse=1e-4, optimizer_params=dict(lr=2e-2),
            verbose=0, seed=42
        )
        dir_model.fit(X_train, y_train_dir, eval_set=[(X_val, y_val_dir)],
                     max_epochs=100, patience=20, batch_size=256)
        
        # Return model
        ret_model = TabNetRegressor(
            n_d=64, n_a=64, n_steps=5, gamma=1.5,
            lambda_sparse=1e-4, optimizer_params=dict(lr=2e-2),
            verbose=0, seed=42
        )
        ret_model.fit(X_train, y_train_ret, eval_set=[(X_val, y_val_ret)],
                     max_epochs=100, patience=20, batch_size=256)
        
        return {'direction': dir_model, 'return': ret_model}
    
    @staticmethod
    def informer_mtl(X_train, y_train_dir, y_train_ret,
                    X_val, y_val_dir, y_val_ret, input_shape):
        """
        17. Informer Multi-Task
        Reference: "Informer for Bitcoin Trading" (2025)
        고빈도 데이터에서 방향과 수익률 동시 예측
        """
        inputs = Input(shape=input_shape)
        
        # Shared Informer layers
        x = inputs
        for _ in range(2):
            attn = MultiHeadAttention(num_heads=4, key_dim=32, dropout=0.1)(x, x)
            attn = Dropout(0.1)(attn)
            x = LayerNormalization(epsilon=1e-6)(x + attn)
            x = Conv1D(input_shape[1], 1, activation='relu', 
                      kernel_regularizer=l2(0.01))(x)
            x = MaxPooling1D(2, padding='same')(x)
        
        shared = GlobalAveragePooling1D()(x)
        shared = Dense(128, activation='relu', kernel_regularizer=l2(0.01))(shared)
        shared = BatchNormalization()(shared)
        shared = Dropout(0.3)(shared)
        
        # Task-specific heads
        dir_head = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(shared)
        dir_head = Dropout(0.3)(dir_head)
        dir_output = Dense(1, activation='sigmoid', name='direction')(dir_head)
        
        ret_head = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(shared)
        ret_head = Dropout(0.3)(ret_head)
        ret_output = Dense(1, name='return')(ret_head)
        
        model = Model(inputs=inputs, outputs=[dir_output, ret_output])
        model.compile(
            optimizer='adam',
            loss={'direction': 'binary_crossentropy', 'return': 'mse'},
            loss_weights={'direction': 1.0, 'return': 1.0},
            metrics={'direction': 'accuracy', 'return': 'mae'}
        )
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(
            X_train, {'direction': y_train_dir, 'return': y_train_ret},
            validation_data=(X_val, {'direction': y_val_dir, 'return': y_val_ret}),
            epochs=100, batch_size=32, callbacks=[early_stop], verbose=0
        )
        return model
    
    @staticmethod
    def nbeats_mtl(X_train, y_train_dir, y_train_ret,
                  X_val, y_val_dir, y_val_ret, input_shape):
        """
        18. N-BEATS Multi-Task
        Reference: "N-BEATS Perceiver" (2024)
        트렌드와 계절성 분해로 암호화폐 포트폴리오 예측
        """
        inputs = Input(shape=input_shape)
        flat = Flatten()(inputs)
        
        # Shared trend analysis
        trend = flat
        for _ in range(3):
            trend = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(trend)
            trend = BatchNormalization()(trend)
            trend = Dropout(0.2)(trend)
        
        # Shared seasonality analysis
        season = flat
        for _ in range(3):
            season = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(season)
            season = BatchNormalization()(season)
            season = Dropout(0.2)(season)
        
        # Combine
        shared = Concatenate()([trend, season])
        shared = Dense(128, activation='relu', kernel_regularizer=l2(0.01))(shared)
        shared = BatchNormalization()(shared)
        shared = Dropout(0.3)(shared)
        
        # Task heads
        dir_head = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(shared)
        dir_head = Dropout(0.3)(dir_head)
        dir_output = Dense(1, activation='sigmoid', name='direction')(dir_head)
        
        ret_head = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(shared)
        ret_head = Dropout(0.3)(ret_head)
        ret_output = Dense(1, name='return')(ret_head)
        
        model = Model(inputs=inputs, outputs=[dir_output, ret_output])
        model.compile(
            optimizer='adam',
            loss={'direction': 'binary_crossentropy', 'return': 'mse'},
            loss_weights={'direction': 1.0, 'return': 1.0},
            metrics={'direction': 'accuracy', 'return': 'mae'}
        )
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(
            X_train, {'direction': y_train_dir, 'return': y_train_ret},
            validation_data=(X_val, {'direction': y_val_dir, 'return': y_val_ret}),
            epochs=100, batch_size=32, callbacks=[early_stop], verbose=0
        )
        return model
    
    @staticmethod
    def tft_mtl(X_train, y_train_dir, y_train_ret,
               X_val, y_val_dir, y_val_ret, input_shape):
        """
        19. Temporal Fusion Transformer MTL
        Reference: "Adaptive TFT for Cryptocurrency" (2025)
        변수 선택 네트워크로 중요 특징 자동 선택
        """
        inputs = Input(shape=input_shape)
        
        # Variable selection
        x = Flatten()(inputs)
        var_weights = Dense(input_shape[0] * input_shape[1], activation='softmax',
                           kernel_regularizer=l2(0.01))(x)
        var_weights = Reshape(input_shape)(var_weights)
        selected = Multiply()([inputs, var_weights])
        
        # Shared processing
        x = LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01))(selected)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)
        
        attn = MultiHeadAttention(num_heads=4, key_dim=32, dropout=0.1)(x, x)
        x = Add()([x, attn])
        x = LayerNormalization(epsilon=1e-6)(x)
        
        shared = GlobalAveragePooling1D()(x)
        shared = Dense(128, activation='relu', kernel_regularizer=l2(0.01))(shared)
        shared = BatchNormalization()(shared)
        shared = Dropout(0.3)(shared)
        
        # Task heads
        dir_head = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(shared)
        dir_head = Dropout(0.3)(dir_head)
        dir_output = Dense(1, activation='sigmoid', name='direction')(dir_head)
        
        ret_head = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(shared)
        ret_head = Dropout(0.3)(ret_head)
        ret_output = Dense(1, name='return')(ret_head)
        
        model = Model(inputs=inputs, outputs=[dir_output, ret_output])
        model.compile(
            optimizer='adam',
            loss={'direction': 'binary_crossentropy', 'return': 'mse'},
            loss_weights={'direction': 1.0, 'return': 1.0},
            metrics={'direction': 'accuracy', 'return': 'mae'}
        )
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-7)
        model.fit(
            X_train, {'direction': y_train_dir, 'return': y_train_ret},
            validation_data=(X_val, {'direction': y_val_dir, 'return': y_val_ret}),
            epochs=100, batch_size=32, callbacks=[early_stop, reduce_lr], verbose=0
        )
        return model
    
    @staticmethod
    def performer_mtl(X_train, y_train_dir, y_train_ret,
                     X_val, y_val_dir, y_val_ret, input_shape):
        """
        20. Performer Multi-Task
        Reference: "Performer with BiLSTM" (2024)
        효율적 어텐션 메커니즘으로 계산 비용 절감
        """
        inputs = Input(shape=input_shape)
        
        # Shared Performer layers
        x = inputs
        for _ in range(2):
            attn = MultiHeadAttention(num_heads=4, key_dim=64, dropout=0.1)(x, x)
            attn = Dropout(0.1)(attn)
            x = LayerNormalization(epsilon=1e-6)(x + attn)
            
            ff = Bidirectional(LSTM(64, return_sequences=True, 
                                   kernel_regularizer=l2(0.01)))(x)
            ff = Dropout(0.1)(ff)
            x = LayerNormalization(epsilon=1e-6)(x + ff)
        
        shared = GlobalAveragePooling1D()(x)
        shared = Dense(128, activation='relu', kernel_regularizer=l2(0.01))(shared)
        shared = BatchNormalization()(shared)
        shared = Dropout(0.3)(shared)
        
        # Task heads
        dir_head = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(shared)
        dir_head = Dropout(0.3)(dir_head)
        dir_output = Dense(1, activation='sigmoid', name='direction')(dir_head)
        
        ret_head = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(shared)
        ret_head = Dropout(0.3)(ret_head)
        ret_output = Dense(1, name='return')(ret_head)
        
        model = Model(inputs=inputs, outputs=[dir_output, ret_output])
        model.compile(
            optimizer='adam',
            loss={'direction': 'binary_crossentropy', 'return': 'mse'},
            loss_weights={'direction': 1.0, 'return': 1.0},
            metrics={'direction': 'accuracy', 'return': 'mae'}
        )
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(
            X_train, {'direction': y_train_dir, 'return': y_train_ret},
            validation_data=(X_val, {'direction': y_val_dir, 'return': y_val_ret}),
            epochs=100, batch_size=32, callbacks=[early_stop], verbose=0
        )
        return model

    @staticmethod
    def patchtst_mtl(X_train, y_train_dir, y_train_ret,
                    X_val, y_val_dir, y_val_ret, input_shape, patch_len=16, stride=8):
        """
        21. PatchTST Multi-Task
        Reference: "PatchTST for Multi-output Forecasting" (2024)
        채널 독립성으로 멀티태스크 효율성 향상
        """
        inputs = Input(shape=input_shape)
        
        x = inputs
        patches = []
        for i in range(0, input_shape[0] - patch_len + 1, stride):
            patch = Lambda(lambda z: z[:, i:i+patch_len, :])(x)
            patch = Flatten()(patch)
            patch = Dense(128, kernel_regularizer=l2(0.01))(patch)
            patches.append(patch)
        
        if len(patches) > 1:
            x = tf.stack(patches, axis=1)
        else:
            x = tf.expand_dims(patches[0], axis=1)
        
        for _ in range(2):
            attn = MultiHeadAttention(num_heads=4, key_dim=32, dropout=0.1)(x, x)
            attn = Dropout(0.1)(attn)
            x = LayerNormalization(epsilon=1e-6)(x + attn)
            
            ff = Dense(256, activation='gelu', kernel_regularizer=l2(0.01))(x)
            ff = Dropout(0.1)(ff)
            ff = Dense(128, kernel_regularizer=l2(0.01))(ff)
            x = LayerNormalization(epsilon=1e-6)(x + ff)
        
        shared = GlobalAveragePooling1D()(x)
        shared = Dense(128, activation='relu', kernel_regularizer=l2(0.01))(shared)
        shared = BatchNormalization()(shared)
        shared = Dropout(0.3)(shared)
        
        dir_head = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(shared)
        dir_head = Dropout(0.3)(dir_head)
        dir_output = Dense(1, activation='sigmoid', name='direction')(dir_head)
        
        ret_head = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(shared)
        ret_head = Dropout(0.3)(ret_head)
        ret_output = Dense(1, name='return')(ret_head)
        
        model = Model(inputs=inputs, outputs=[dir_output, ret_output])
        model.compile(
            optimizer='adam',
            loss={'direction': 'binary_crossentropy', 'return': 'mse'},
            loss_weights={'direction': 1.0, 'return': 1.0},
            metrics={'direction': 'accuracy', 'return': 'mae'}
        )
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(
            X_train, {'direction': y_train_dir, 'return': y_train_ret},
            validation_data=(X_val, {'direction': y_val_dir, 'return': y_val_ret}),
            epochs=100, batch_size=32, callbacks=[early_stop], verbose=0
        )
        return model
    
    @staticmethod
    def autoformer_mtl(X_train, y_train_dir, y_train_ret,
                      X_val, y_val_dir, y_val_ret, input_shape):
        """
        22. Autoformer Multi-Task
        Reference: "Autoformer Decomposition" (2021)
        시계열 분해를 멀티태스크에 적용
        """
        inputs = Input(shape=input_shape)
        
        x = inputs
        trend = tf.keras.layers.AveragePooling1D(pool_size=25, strides=1, 
                                                  padding='same')(x)
        seasonal = tf.subtract(x, trend)
        
        x = seasonal
        for _ in range(2):
            attn = MultiHeadAttention(num_heads=4, key_dim=32, dropout=0.1)(x, x)
            attn = Dropout(0.1)(attn)
            x = LayerNormalization(epsilon=1e-6)(x + attn)
            
            ff = Dense(128, activation='gelu', kernel_regularizer=l2(0.01))(x)
            ff = Dropout(0.1)(ff)
            ff = Dense(input_shape[1], kernel_regularizer=l2(0.01))(ff)
            x = LayerNormalization(epsilon=1e-6)(x + ff)
        
        seasonal_out = GlobalAveragePooling1D()(x)
        trend_out = GlobalAveragePooling1D()(trend)
        shared = Concatenate()([seasonal_out, trend_out])
        shared = Dense(128, activation='relu', kernel_regularizer=l2(0.01))(shared)
        shared = BatchNormalization()(shared)
        shared = Dropout(0.3)(shared)
        
        dir_head = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(shared)
        dir_head = Dropout(0.3)(dir_head)
        dir_output = Dense(1, activation='sigmoid', name='direction')(dir_head)
        
        ret_head = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(shared)
        ret_head = Dropout(0.3)(ret_head)
        ret_output = Dense(1, name='return')(ret_head)
        
        model = Model(inputs=inputs, outputs=[dir_output, ret_output])
        model.compile(
            optimizer='adam',
            loss={'direction': 'binary_crossentropy', 'return': 'mse'},
            loss_weights={'direction': 1.0, 'return': 1.0},
            metrics={'direction': 'accuracy', 'return': 'mae'}
        )
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(
            X_train, {'direction': y_train_dir, 'return': y_train_ret},
            validation_data=(X_val, {'direction': y_val_dir, 'return': y_val_ret}),
            epochs=100, batch_size=32, callbacks=[early_stop], verbose=0
        )
        return model
    
    @staticmethod
    def itransformer_mtl(X_train, y_train_dir, y_train_ret,
                        X_val, y_val_dir, y_val_ret, input_shape):
        """
        23. iTransformer Multi-Task
        Reference: "iTransformer Multivariate Forecasting" (2024)
        변수 간 관계 우선 처리로 멀티태스크 향상
        """
        inputs = Input(shape=input_shape)
        
        x = tf.transpose(inputs, perm=[0, 2, 1])
        x = Dense(64, kernel_regularizer=l2(0.01))(x)
        x = LayerNormalization(epsilon=1e-6)(x)
        
        for _ in range(2):
            attn = MultiHeadAttention(num_heads=4, key_dim=16, dropout=0.1)(x, x)
            attn = Dropout(0.1)(attn)
            x = LayerNormalization(epsilon=1e-6)(x + attn)
            
            ff = Dense(128, activation='gelu', kernel_regularizer=l2(0.01))(x)
            ff = Dropout(0.1)(ff)
            ff = Dense(64, kernel_regularizer=l2(0.01))(ff)
            x = LayerNormalization(epsilon=1e-6)(x + ff)
        
        shared = GlobalAveragePooling1D()(x)
        shared = Dense(128, activation='relu', kernel_regularizer=l2(0.01))(shared)
        shared = BatchNormalization()(shared)
        shared = Dropout(0.3)(shared)
        
        dir_head = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(shared)
        dir_head = Dropout(0.3)(dir_head)
        dir_output = Dense(1, activation='sigmoid', name='direction')(dir_head)
        
        ret_head = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(shared)
        ret_head = Dropout(0.3)(ret_head)
        ret_output = Dense(1, name='return')(ret_head)
        
        model = Model(inputs=inputs, outputs=[dir_output, ret_output])
        model.compile(
            optimizer='adam',
            loss={'direction': 'binary_crossentropy', 'return': 'mse'},
            loss_weights={'direction': 1.0, 'return': 1.0},
            metrics={'direction': 'accuracy', 'return': 'mae'}
        )
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(
            X_train, {'direction': y_train_dir, 'return': y_train_ret},
            validation_data=(X_val, {'direction': y_val_dir, 'return': y_val_ret}),
            epochs=100, batch_size=32, callbacks=[early_stop], verbose=0
        )
        return model
    
    @staticmethod
    def ethervoyant_mtl(X_train, y_train_dir, y_train_ret,
                       X_val, y_val_dir, y_val_ret, input_shape):
        """
        24. EtherVoyant Multi-Task
        Reference: "EtherVoyant ETH Prediction" (2024)
        이더리움 전용 멀티태스크 아키텍처
        """
        inputs = Input(shape=input_shape)
        
        conv1 = Conv1D(64, 3, activation='relu', padding='same', 
                      kernel_regularizer=l2(0.01))(inputs)
        conv1 = BatchNormalization()(conv1)
        conv1 = Dropout(0.2)(conv1)
        
        conv2 = Conv1D(64, 5, activation='relu', padding='same', 
                      kernel_regularizer=l2(0.01))(inputs)
        conv2 = BatchNormalization()(conv2)
        conv2 = Dropout(0.2)(conv2)
        
        x = Concatenate()([conv1, conv2])
        x = MaxPooling1D(2)(x)
        
        x = Bidirectional(LSTM(128, return_sequences=True, 
                              kernel_regularizer=l2(0.01)))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)
        
        attn = MultiHeadAttention(num_heads=4, key_dim=32, dropout=0.1)(x, x)
        x = Add()([x, attn])
        x = LayerNormalization(epsilon=1e-6)(x)
        
        shared = Bidirectional(LSTM(64, kernel_regularizer=l2(0.01)))(x)
        shared = BatchNormalization()(shared)
        shared = Dropout(0.3)(shared)
        shared = Dense(128, activation='relu', kernel_regularizer=l2(0.01))(shared)
        
        dir_head = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(shared)
        dir_head = Dropout(0.3)(dir_head)
        dir_output = Dense(1, activation='sigmoid', name='direction')(dir_head)
        
        ret_head = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(shared)
        ret_head = Dropout(0.3)(ret_head)
        ret_output = Dense(1, name='return')(ret_head)
        
        model = Model(inputs=inputs, outputs=[dir_output, ret_output])
        model.compile(
            optimizer='adam',
            loss={'direction': 'binary_crossentropy', 'return': 'mse'},
            loss_weights={'direction': 1.0, 'return': 1.0},
            metrics={'direction': 'accuracy', 'return': 'mae'}
        )
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-7)
        model.fit(
            X_train, {'direction': y_train_dir, 'return': y_train_ret},
            validation_data=(X_val, {'direction': y_val_dir, 'return': y_val_ret}),
            epochs=100, batch_size=32, callbacks=[early_stop, reduce_lr], verbose=0
        )
        return model
    
    @staticmethod
    def vmd_hybrid_mtl(X_train, y_train_dir, y_train_ret,
                      X_val, y_val_dir, y_val_ret, input_shape):
        """
        25. VMD-Hybrid Multi-Task
        Reference: "VMD Decomposition for Volatility" (2024)
        변동성 분해로 방향과 수익률 동시 예측
        """
        inputs = Input(shape=input_shape)
        
        low_freq = tf.keras.layers.AveragePooling1D(pool_size=5, strides=1, 
                                                     padding='same')(inputs)
        low_freq = Conv1D(32, 3, activation='relu', padding='same',
                         kernel_regularizer=l2(0.01))(low_freq)
        
        mid_freq = inputs - low_freq
        mid_freq = Conv1D(32, 3, activation='relu', padding='same',
                         kernel_regularizer=l2(0.01))(mid_freq)
        
        high_freq = inputs - low_freq - mid_freq
        high_freq = Conv1D(32, 3, activation='relu', padding='same',
                          kernel_regularizer=l2(0.01))(high_freq)
        
        x = Concatenate()([low_freq, mid_freq, high_freq])
        x = BatchNormalization()(x)
        x = Dropout(0.2)(x)
        
        for _ in range(2):
            attn = MultiHeadAttention(num_heads=4, key_dim=32, dropout=0.1)(x, x)
            attn = Dropout(0.1)(attn)
            x = LayerNormalization(epsilon=1e-6)(x + attn)
            
            ff = Dense(128, activation='gelu', kernel_regularizer=l2(0.01))(x)
            ff = Dropout(0.1)(ff)
            ff = Dense(96, kernel_regularizer=l2(0.01))(ff)
            x = LayerNormalization(epsilon=1e-6)(x + ff)
        
        shared = GlobalAveragePooling1D()(x)
        shared = Dense(128, activation='relu', kernel_regularizer=l2(0.01))(shared)
        shared = BatchNormalization()(shared)
        shared = Dropout(0.3)(shared)
        
        dir_head = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(shared)
        dir_head = Dropout(0.3)(dir_head)
        dir_output = Dense(1, activation='sigmoid', name='direction')(dir_head)
        
        ret_head = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(shared)
        ret_head = Dropout(0.3)(ret_head)
        ret_output = Dense(1, name='return')(ret_head)
        
        model = Model(inputs=inputs, outputs=[dir_output, ret_output])
        model.compile(
            optimizer='adam',
            loss={'direction': 'binary_crossentropy', 'return': 'mse'},
            loss_weights={'direction': 1.0, 'return': 1.0},
            metrics={'direction': 'accuracy', 'return': 'mae'}
        )
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(
            X_train, {'direction': y_train_dir, 'return': y_train_ret},
            validation_data=(X_val, {'direction': y_val_dir, 'return': y_val_ret}),
            epochs=100, batch_size=32, callbacks=[early_stop], verbose=0
        )
        return model
    
    @staticmethod
    def snas_mtl(X_train, y_train_dir, y_train_ret,
                X_val, y_val_dir, y_val_ret, input_shape):
        """
        26. SNAS (Stochastic Neural Architecture Search) Multi-Task
        Reference: "Multi-Task Time Series Forecasting With Shared Attention" (2021)
        태스크 간 공유 어텐션으로 아키텍처 자동 탐색
        """
        inputs = Input(shape=input_shape)

        # Shared Attention Module
        shared_attn = MultiHeadAttention(num_heads=8, key_dim=64, dropout=0.1)(inputs, inputs)
        shared_attn = Dropout(0.1)(shared_attn)
        x = LayerNormalization(epsilon=1e-6)(inputs + shared_attn)

        # Parallel Task-Specific Encoders
        dir_encoder = LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01))(x)
        dir_encoder = BatchNormalization()(dir_encoder)
        dir_encoder = Dropout(0.3)(dir_encoder)
        dir_features = LSTM(64, kernel_regularizer=l2(0.01))(dir_encoder)

        ret_encoder = LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01))(x)
        ret_encoder = BatchNormalization()(ret_encoder)
        ret_encoder = Dropout(0.3)(ret_encoder)
        ret_features = LSTM(64, kernel_regularizer=l2(0.01))(ret_encoder)

        # Task outputs
        dir_head = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(dir_features)
        dir_head = Dropout(0.3)(dir_head)
        dir_output = Dense(1, activation='sigmoid', name='direction')(dir_head)

        ret_head = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(ret_features)
        ret_head = Dropout(0.3)(ret_head)
        ret_output = Dense(1, name='return')(ret_head)

        model = Model(inputs=inputs, outputs=[dir_output, ret_output])
        model.compile(
            optimizer='adam',
            loss={'direction': 'binary_crossentropy', 'return': 'mse'},
            loss_weights={'direction': 1.0, 'return': 1.0},
            metrics={'direction': 'accuracy', 'return': 'mae'}
        )
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(
            X_train, {'direction': y_train_dir, 'return': y_train_ret},
            validation_data=(X_val, {'direction': y_val_dir, 'return': y_val_ret}),
            epochs=100, batch_size=32, callbacks=[early_stop], verbose=0
        )
        return model

    @staticmethod
    def dlinear_mtl(X_train, y_train_dir, y_train_ret,
                   X_val, y_val_dir, y_val_ret, input_shape):
        """
        27. DLinear Multi-Task
        Reference: "Are Transformers Effective for Time Series Forecasting?" (2023)
        단순 선형 레이어로 트렌드/계절성 분해 후 예측
        """
        inputs = Input(shape=input_shape)

        # Decomposition using moving average
        trend = tf.keras.layers.AveragePooling1D(pool_size=25, strides=1, padding='same')(inputs)
        seasonal = tf.subtract(inputs, trend)

        # Flatten for linear layers
        trend_flat = Flatten()(trend)
        seasonal_flat = Flatten()(seasonal)

        # Shared feature extraction
        trend_features = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(trend_flat)
        trend_features = BatchNormalization()(trend_features)
        trend_features = Dropout(0.3)(trend_features)

        seasonal_features = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(seasonal_flat)
        seasonal_features = BatchNormalization()(seasonal_features)
        seasonal_features = Dropout(0.3)(seasonal_features)

        shared = Concatenate()([trend_features, seasonal_features])
        shared = Dense(128, activation='relu', kernel_regularizer=l2(0.01))(shared)
        shared = BatchNormalization()(shared)
        shared = Dropout(0.3)(shared)

        # Task-specific heads
        dir_head = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(shared)
        dir_head = Dropout(0.3)(dir_head)
        dir_output = Dense(1, activation='sigmoid', name='direction')(dir_head)

        ret_head = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(shared)
        ret_head = Dropout(0.3)(ret_head)
        ret_output = Dense(1, name='return')(ret_head)

        model = Model(inputs=inputs, outputs=[dir_output, ret_output])
        model.compile(
            optimizer='adam',
            loss={'direction': 'binary_crossentropy', 'return': 'mse'},
            loss_weights={'direction': 1.0, 'return': 1.0},
            metrics={'direction': 'accuracy', 'return': 'mae'}
        )
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(
            X_train, {'direction': y_train_dir, 'return': y_train_ret},
            validation_data=(X_val, {'direction': y_val_dir, 'return': y_val_ret}),
            epochs=100, batch_size=32, callbacks=[early_stop], verbose=0
        )
        return model

    @staticmethod
    def fedformer_mtl(X_train, y_train_dir, y_train_ret,
                     X_val, y_val_dir, y_val_ret, input_shape):
        """
        28. FEDformer Multi-Task
        Reference: "FEDformer: Frequency Enhanced Decomposed Transformer" (2022)
        주파수 도메인에서 시계열 분석 수행
        """
        inputs = Input(shape=input_shape)

        # Frequency domain processing (approximation using FFT concepts)
        # Low frequency components
        low_freq = tf.keras.layers.AveragePooling1D(pool_size=5, strides=1, padding='same')(inputs)
        low_freq = Conv1D(64, 3, activation='relu', padding='same', kernel_regularizer=l2(0.01))(low_freq)

        # High frequency components
        high_freq = inputs - low_freq
        high_freq = Conv1D(64, 3, activation='relu', padding='same', kernel_regularizer=l2(0.01))(high_freq)

        # Combine frequency components
        x = Concatenate()([low_freq, high_freq])
        x = BatchNormalization()(x)
        x = Dropout(0.2)(x)

        # Transformer blocks
        for _ in range(2):
            attn = MultiHeadAttention(num_heads=4, key_dim=32, dropout=0.1)(x, x)
            attn = Dropout(0.1)(attn)
            x = LayerNormalization(epsilon=1e-6)(x + attn)

            ff = Dense(256, activation='gelu', kernel_regularizer=l2(0.01))(x)
            ff = Dropout(0.1)(ff)
            ff = Dense(128, kernel_regularizer=l2(0.01))(ff)
            x = LayerNormalization(epsilon=1e-6)(x + ff)

        shared = GlobalAveragePooling1D()(x)
        shared = Dense(128, activation='relu', kernel_regularizer=l2(0.01))(shared)
        shared = BatchNormalization()(shared)
        shared = Dropout(0.3)(shared)

        # Task heads
        dir_head = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(shared)
        dir_head = Dropout(0.3)(dir_head)
        dir_output = Dense(1, activation='sigmoid', name='direction')(dir_head)

        ret_head = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(shared)
        ret_head = Dropout(0.3)(ret_head)
        ret_output = Dense(1, name='return')(ret_head)

        model = Model(inputs=inputs, outputs=[dir_output, ret_output])
        model.compile(
            optimizer='adam',
            loss={'direction': 'binary_crossentropy', 'return': 'mse'},
            loss_weights={'direction': 1.0, 'return': 1.0},
            metrics={'direction': 'accuracy', 'return': 'mae'}
        )
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(
            X_train, {'direction': y_train_dir, 'return': y_train_ret},
            validation_data=(X_val, {'direction': y_val_dir, 'return': y_val_ret}),
            epochs=100, batch_size=32, callbacks=[early_stop], verbose=0
        )
        return model

    @staticmethod
    def units_mtl(X_train, y_train_dir, y_train_ret,
                 X_val, y_val_dir, y_val_ret, input_shape):
        """
        29. UniTS Multi-Task
        Reference: "UniTS: A Unified Multi-Task Time Series Model" (NeurIPS 2024)
        Task tokenization으로 예측/생성 태스크 통합
        """
        inputs = Input(shape=input_shape)

        # Task tokens (learnable embeddings)
        dir_task_token = tf.Variable(tf.random.normal([1, 1, input_shape[1]]), trainable=True)
        ret_task_token = tf.Variable(tf.random.normal([1, 1, input_shape[1]]), trainable=True)

        # Expand task tokens to batch size
        batch_size = tf.shape(inputs)[0]
        dir_token_expanded = tf.tile(dir_task_token, [batch_size, 1, 1])
        ret_token_expanded = tf.tile(ret_task_token, [batch_size, 1, 1])

        # Concatenate task tokens with input
        dir_input = Concatenate(axis=1)([dir_token_expanded, inputs])
        ret_input = Concatenate(axis=1)([ret_token_expanded, inputs])

        # Shared transformer encoder
        def transformer_block(x):
            attn = MultiHeadAttention(num_heads=8, key_dim=64, dropout=0.1)(x, x)
            attn = Dropout(0.1)(attn)
            x = LayerNormalization(epsilon=1e-6)(x + attn)

            ff = Dense(256, activation='gelu', kernel_regularizer=l2(0.01))(x)
            ff = Dropout(0.1)(ff)
            ff = Dense(input_shape[1], kernel_regularizer=l2(0.01))(ff)
            x = LayerNormalization(epsilon=1e-6)(x + ff)
            return x

        dir_features = transformer_block(dir_input)
        dir_features = GlobalAveragePooling1D()(dir_features)

        ret_features = transformer_block(ret_input)
        ret_features = GlobalAveragePooling1D()(ret_features)

        # Task-specific outputs
        dir_head = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(dir_features)
        dir_head = BatchNormalization()(dir_head)
        dir_head = Dropout(0.3)(dir_head)
        dir_output = Dense(1, activation='sigmoid', name='direction')(dir_head)

        ret_head = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(ret_features)
        ret_head = BatchNormalization()(ret_head)
        ret_head = Dropout(0.3)(ret_head)
        ret_output = Dense(1, name='return')(ret_head)

        model = Model(inputs=inputs, outputs=[dir_output, ret_output])
        model.compile(
            optimizer='adam',
            loss={'direction': 'binary_crossentropy', 'return': 'mse'},
            loss_weights={'direction': 1.0, 'return': 1.0},
            metrics={'direction': 'accuracy', 'return': 'mae'}
        )
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        model.fit(
            X_train, {'direction': y_train_dir, 'return': y_train_ret},
            validation_data=(X_val, {'direction': y_val_dir, 'return': y_val_ret}),
            epochs=100, batch_size=32, callbacks=[early_stop], verbose=0
        )
        return model

    @staticmethod
    def metarl_crypto_mtl(X_train, y_train_dir, y_train_ret,
                         X_val, y_val_dir, y_val_ret, input_shape):
        """
        30. Meta-RL Crypto Multi-Task
        Reference: "Meta-Learning Reinforcement Learning for Crypto-Return Prediction" (2025)
        메타학습과 강화학습을 결합한 암호화폐 예측
        """
        inputs = Input(shape=input_shape)

        # Meta-learning feature extractor
        x = Conv1D(64, 3, activation='relu', padding='same', kernel_regularizer=l2(0.01))(inputs)
        x = BatchNormalization()(x)
        x = MaxPooling1D(2)(x)
        x = Dropout(0.2)(x)

        # Multi-modal processing
        x = Bidirectional(LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01)))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)

        # Attention mechanism for meta-learning
        attn = MultiHeadAttention(num_heads=4, key_dim=32, dropout=0.1)(x, x)
        x = Add()([x, attn])
        x = LayerNormalization(epsilon=1e-6)(x)

        shared = Bidirectional(LSTM(64, kernel_regularizer=l2(0.01)))(x)
        shared = BatchNormalization()(shared)
        shared = Dropout(0.3)(shared)

        # Multi-objective reward design
        dir_head = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(shared)
        dir_head = BatchNormalization()(dir_head)
        dir_head = Dropout(0.3)(dir_head)
        dir_output = Dense(1, activation='sigmoid', name='direction')(dir_head)

        ret_head = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(shared)
        ret_head = BatchNormalization()(ret_head)
        ret_head = Dropout(0.3)(ret_head)
        ret_output = Dense(1, name='return')(ret_head)

        model = Model(inputs=inputs, outputs=[dir_output, ret_output])
        model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
            loss={'direction': 'binary_crossentropy', 'return': 'mse'},
            loss_weights={'direction': 1.2, 'return': 0.8},  # Adaptive weighting
            metrics={'direction': 'accuracy', 'return': 'mae'}
        )
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-7)
        model.fit(
            X_train, {'direction': y_train_dir, 'return': y_train_ret},
            validation_data=(X_val, {'direction': y_val_dir, 'return': y_val_ret}),
            epochs=100, batch_size=32, callbacks=[early_stop, reduce_lr], verbose=0
        )
        return model



In [9]:
class ModelEvaluator:
    """모델 평가 및 백테스팅 (Task별 전략 구현)"""
    
    def __init__(self):
        self.results = []
    
    def _predict_model(self, model, X):
        pred = model.predict(X)
        if isinstance(pred, list):
            # 멀티태스크: 각 output별로 반환
            return [p.squeeze() for p in pred] 
        else:
            if len(pred.shape) > 1 and pred.shape[1] == 1:
                pred = pred.squeeze()
            return pred

    
    def evaluate_classification_model(self, model, X_train, y_train, X_val, y_val, 
                                     X_test, y_test, test_returns, test_dates, model_name,
                                     is_deep_learning=False):
        """분류 모델 평가 - Binary Signal 전략"""
        from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
        
        # 예측
        train_pred = self._predict_model(model, X_train)
        val_pred = self._predict_model(model, X_val)
        test_pred = self._predict_model(model, X_test)
        if is_deep_learning:
            test_pred_proba = test_pred
            # 멀티태스크: 분류 output만 선택
            if isinstance(train_pred, list):
                train_pred = train_pred[0]
                val_pred = val_pred[0]
                test_pred = test_pred[0]
            train_pred = (train_pred > 0.5).astype(int).ravel()
            val_pred = (val_pred > 0.5).astype(int).ravel()
            test_pred = (test_pred > 0.5).astype(int).ravel()
        else:
            test_pred_proba = None
        
        # 분류 지표
        train_acc = accuracy_score(y_train, train_pred)
        val_acc = accuracy_score(y_val, val_pred)
        test_acc = accuracy_score(y_test, test_pred)
        
        test_prec = precision_score(y_test, test_pred, zero_division=0)
        test_rec = recall_score(y_test, test_pred, zero_division=0)
        test_f1 = f1_score(y_test, test_pred, zero_division=0)
        test_roc_auc = roc_auc_score(y_test, test_pred)
        
        # 백테스팅 - Binary Signal 전략
        backtest = self._backtest_classification(
            test_pred, test_pred_proba, test_returns, test_dates
        )
        
        self.results.append({
            'Model': model_name,
            'Train_Accuracy': train_acc,
            'Val_Accuracy': val_acc,
            'Test_Accuracy': test_acc,
            'Test_Precision': test_prec,
            'Test_Recall': test_rec,
            'Test_F1': test_f1,
            'Test_AUC_ROC': test_roc_auc,
            **backtest
        })
        
        return self.results[-1]
    
    def evaluate_regression_model(self, model, X_train, y_train, X_val, y_val,
                                  X_test, y_test, test_returns, test_dates, model_name,
                                  is_deep_learning=False, task_output_index=None): 
        """회귀 모델 평가 - Proportional Position Sizing 전략"""

        # 예측
        if is_deep_learning and task_output_index is not None:
            train_pred = self._predict_model(model, X_train)[task_output_index]
            val_pred = self._predict_model(model, X_val)[task_output_index]
            test_pred = self._predict_model(model, X_test)[task_output_index]
        else:
            train_pred = self._predict_model(model, X_train)
            val_pred = self._predict_model(model, X_val)
            test_pred = self._predict_model(model, X_test)

        train_rmse = np.sqrt(mean_squared_error(y_train, train_pred))
        val_rmse = np.sqrt(mean_squared_error(y_val, val_pred))
        test_rmse = np.sqrt(mean_squared_error(y_test, test_pred))
        
        test_mae = mean_absolute_error(y_test, test_pred)
        test_r2 = r2_score(y_test, test_pred)
        
        # MAPE 계산 (0으로 나누기 방지)
        mask = y_test != 0
        if mask.sum() > 0:
            test_mape = np.mean(np.abs((y_test[mask] - test_pred[mask]) / y_test[mask])) * 100
        else:
            test_mape = np.nan
        
        # 방향 예측 정확도
        test_pred_direction = (test_pred > 0).astype(int)
        test_true_direction = (y_test > 0).astype(int)
        direction_acc = accuracy_score(test_true_direction, test_pred_direction)
        
        # 백테스팅 - Proportional Position Sizing 전략
        backtest = self._backtest_regression(
            test_pred, test_returns, test_dates
        )
        
        self.results.append({
            'Model': model_name,
            'Train_RMSE': train_rmse,
            'Val_RMSE': val_rmse,
            'Test_RMSE': test_rmse,
            'Test_MAE': test_mae,
            'Test_R2': test_r2,
            'Test_MAPE': test_mape,
            'Direction_Accuracy': direction_acc,
            **backtest
        })
        
        return self.results[-1]
    
    
    def _predict_model(self, model, X):
        pred = model.predict(X)
        #print(f"[DEBUG] _predict_model - Raw prediction shape: {np.shape(pred)}")

        if isinstance(pred, list):
            cleaned = []
            for i, p in enumerate(pred):
                if isinstance(p, np.ndarray):
                    #print(f"[DEBUG] _predict_model - pred[{i}] shape: {p.shape}")
                    cleaned.append(p.squeeze() if p.shape[-1] == 1 else p)
                else:
                    cleaned.append(p)
            return cleaned
        else:
            return pred.squeeze() if pred.shape[-1] == 1 else pred

    def evaluate_multitask_model(self, model, X_test, y_test_list, test_returns, test_dates, 
                                  model_name, task_types=['classification', 'regression']):

        preds = model.predict(X_test)

        if isinstance(preds, list):
            preds = [p.squeeze() if p.shape[-1] == 1 else p for p in preds]
        else:
            preds = [preds.squeeze()]

        for i, (pred, y_true, task_type) in enumerate(zip(preds, y_test_list, task_types)):

            if pred.ndim == 1 or (pred.ndim == 2 and pred.shape[1] == 1):
                single_pred = pred.squeeze()
                single_true = y_true.squeeze()
                suffix = f"{model_name}_task{i+1}"

                if task_type == 'classification':
                    self.evaluate_classification_model(
                        model, X_test, single_true, X_test, single_true, X_test, single_true,
                        test_returns, test_dates, f"{suffix}_cls", is_deep_learning=True
                    )
                else:
                    self.evaluate_regression_model(
                        model, X_test, single_true, X_test, single_true, X_test, single_true,
                        test_returns, test_dates, f"{suffix}_reg", is_deep_learning=True,task_output_index=i
                    )

            elif pred.ndim == 2 and pred.shape[1] > 1:
                for j in range(pred.shape[1]):
                    single_pred = pred[:, j]
                    if y_true.ndim == 2:
                        single_true = y_true[:, j]
                    else:
                        single_true = y_true  

                    suffix = f"{model_name}_task{i+1}_col{j+1}"

                    if task_type == 'classification':
                        self.evaluate_classification_model(
                            model, X_test, single_true, X_test, single_true, X_test, single_true,
                            test_returns, test_dates, f"{suffix}_cls", is_deep_learning=True
                        )
                    else:
                        self.evaluate_regression_model(
                            model, X_test, single_true, X_test, single_true, X_test, single_true,
                            test_returns, test_dates, f"{suffix}_reg", is_deep_learning=True
                        )

        backtest = self._backtest_multitask(
            preds, y_test_list, test_returns, test_dates, task_types
        )

        self.results.append({
            'Model': f"{model_name}_combined",
            **backtest
        })

        return self.results[-1]

    
    def _backtest_classification(self, predictions, probabilities, returns, dates, 
                                 trading_cost=0.0004, slippage_func=None, confidence_threshold=0.6):
        """
        분류 모델 백테스팅 전략

        Reference:
        - Piparo (2025): "Backtesting Expected Shortfall in Cryptocurrencies" - SSRN 5296678
        - Mettalex (2025): "CEX vs DEX Trading" - CEX 평균 거래비용 0.04%

        Fixes:
        - Look-ahead bias prevention with np.roll
        - Adaptive slippage based on volatility
        """
        if slippage_func is None:
            # 동적 슬리피지: 변동성 고려
            realized_vol = pd.Series(returns).rolling(30, min_periods=1).std().values
            slippage_func = lambda pos_change, vol: 0.0001 * np.sqrt(abs(pos_change)) * np.clip(vol / 0.02, 0.5, 3.0)

        if isinstance(predictions, list):
            pred_bin = predictions[0]  
        else:
            pred_bin = predictions
        if isinstance(probabilities, list):
            proba = probabilities[0]
        else:
            proba = probabilities

        positions = np.where(pred_bin == 1, 1, 0)
        positions = np.roll(positions, 1)  # Look-ahead bias 방지
        positions[0] = 0

        position_changes = np.abs(np.diff(positions, prepend=0))

        # 변동성 기반 슬리피지 계산
        if callable(slippage_func):
            try:
                slippage_costs = slippage_func(position_changes, realized_vol)
            except:
                slippage_costs = slippage_func(position_changes)
        else:
            slippage_costs = 0.0001 * np.sqrt(position_changes)

        trading_costs = position_changes * trading_cost + slippage_costs
        strategy_returns = positions * returns - trading_costs

        basic_metrics = self._calculate_metrics(strategy_returns, positions, position_changes, returns)

        if proba is not None:
            high_confidence = (proba > confidence_threshold) | (proba < (1 - confidence_threshold))
            confident_positions = np.where(
                high_confidence & (proba > 0.5), 1, 0
            )
            confident_positions = np.roll(confident_positions, 1)
            confident_positions[0] = 0

            position_changes_conf = np.abs(np.diff(confident_positions, prepend=0))

            try:
                slippage_costs_conf = slippage_func(position_changes_conf, realized_vol)
            except:
                slippage_costs_conf = slippage_func(position_changes_conf)

            trading_costs_conf = position_changes_conf * trading_cost + slippage_costs_conf
            confident_returns = confident_positions * returns - trading_costs_conf

            conf_metrics = self._calculate_metrics(confident_returns, confident_positions, position_changes_conf, returns)

            return {
                'Total_Return(%)': basic_metrics['Total_Return(%)'],
                'Sharpe': basic_metrics['Sharpe'],
                'Sortino': basic_metrics['Sortino'],
                'Calmar': basic_metrics['Calmar'],
                'Max_Drawdown(%)': basic_metrics['Max_Drawdown(%)'],
                'CVaR_95(%)': basic_metrics['CVaR_95(%)'],
                'Win_Rate(%)': basic_metrics['Win_Rate(%)'],
                'Total_Trades': basic_metrics['Total_Trades'],
                'Confident_Return(%)': conf_metrics['Total_Return(%)'],
                'Confident_Sharpe': conf_metrics['Sharpe'],
                'Profit_Factor': basic_metrics.get('Profit_Factor', 0),
                'Confident_Trades': conf_metrics['Total_Trades']
            }
        else:
            return basic_metrics


    def _backtest_regression(self, predictions, returns, dates, trading_cost=0.0004,
                            slippage_func=None, volatility_target=0.30):
        """
        회귀 모델 백테스팅 전략

        Reference:
        - Mettalex (2025): "Perpetual Futures Trading CEX vs DEX" - 거래비용 및 슬리피지
        - Piparo (2025): Expected Shortfall 백테스팅 - 암호화폐 극단 손실 특성
        - Dudek (2024): "Forecasting Crypto Volatility" - 변동성 특성
        - SpeedBot (2025): "Position Sizing Techniques" - Kelly Criterion 적용

        Fixes:
        - Corrected Kelly Criterion formula
        - Reduced rolling window from 60 to 30 days
        - More conservative Kelly fraction clipping (0.10 instead of 0.25)
        - Adaptive slippage model
        """
        # 변동성 계산 (30일 윈도우로 단축)
        realized_vol = pd.Series(returns).rolling(30, min_periods=1).std().values

        if slippage_func is None:
            # 동적 슬리피지: 변동성이 높을수록 슬리피지 증가
            slippage_func = lambda pos_change, vol: 0.0001 * np.sqrt(abs(pos_change)) * np.clip(vol / 0.02, 0.5, 3.0)

        # === 1. Directional Strategy (기본) ===
        directional_positions = np.where(predictions > 0, 1, 0)
        directional_positions = np.roll(directional_positions, 1)
        directional_positions[0] = 0

        position_changes_dir = np.abs(np.diff(directional_positions, prepend=0))
        try:
            slippage_costs_dir = slippage_func(position_changes_dir, realized_vol)
        except:
            slippage_costs_dir = slippage_func(position_changes_dir)

        trading_costs_dir = position_changes_dir * trading_cost + slippage_costs_dir
        directional_returns = directional_positions * returns - trading_costs_dir

        dir_metrics = self._calculate_metrics(directional_returns, directional_positions, position_changes_dir, returns)

        # === 2. Proportional Strategy with Kelly Criterion ===
        pred_mean = np.mean(predictions)
        pred_std = np.std(predictions)

        # Kelly Criterion 계산 (수정된 공식)
        win_rate = np.mean((predictions > 0) == (returns > 0))
        avg_win = np.mean(returns[returns > 0]) if np.sum(returns > 0) > 0 else 0.01
        avg_loss = abs(np.mean(returns[returns < 0])) if np.sum(returns < 0) > 0 else 0.01
        win_loss_ratio = avg_win / avg_loss

        # 올바른 Kelly Criterion 공식: f = (p * b - q) / b
        # p = win_rate, q = 1 - win_rate, b = win_loss_ratio
        kelly_fraction = (win_rate * win_loss_ratio - (1 - win_rate)) / win_loss_ratio if win_loss_ratio > 0 else 0
        kelly_fraction = np.clip(kelly_fraction, 0, 0.10)  # 더 보수적으로 10% 제한

        # 신호 강도 계산
        if pred_std > 0:
            normalized_pred = (predictions - pred_mean) / pred_std
            signal_strength = 1 / (1 + np.exp(-normalized_pred))
        else:
            signal_strength = np.where(predictions > 0, 0.5, 0)

        # Kelly Criterion 기반 포지션 사이징 (스케일링 조정)
        proportional_positions = signal_strength * kelly_fraction * 10  # 10배 스케일링
        proportional_positions = np.clip(proportional_positions, 0, 1)
        proportional_positions = np.roll(proportional_positions, 1)
        proportional_positions[0] = 0

        position_changes_prop = np.abs(np.diff(proportional_positions, prepend=0))
        try:
            slippage_costs_prop = slippage_func(position_changes_prop, realized_vol)
        except:
            slippage_costs_prop = slippage_func(position_changes_prop)

        trading_costs_prop = position_changes_prop * trading_cost + slippage_costs_prop
        proportional_returns = proportional_positions * returns - trading_costs_prop

        prop_metrics = self._calculate_metrics(proportional_returns, proportional_positions, position_changes_prop, returns)

        # === 3. Volatility-Scaled Strategy ===
        rolling_window = 30  # 60일 -> 30일로 단축

        # EWMA 방식으로 변경 (더 반응성 있게)
        ewma_vol = pd.Series(returns).ewm(span=20, min_periods=rolling_window).std().values

        annualized_vol = ewma_vol * np.sqrt(365)

        vol_scalar = np.where(
            (annualized_vol > 0) & (~np.isnan(annualized_vol)), 
            volatility_target / annualized_vol, 
            1
        )
        vol_scalar = np.clip(vol_scalar, 0.3, 1.5)

        vol_scaled_positions = proportional_positions * vol_scalar
        vol_scaled_positions = np.clip(vol_scaled_positions, 0, 1.0)

        vol_scaled_positions[:rolling_window] = 0

        position_changes_vol = np.abs(np.diff(vol_scaled_positions, prepend=0))
        try:
            slippage_costs_vol = slippage_func(position_changes_vol, realized_vol)
        except:
            slippage_costs_vol = slippage_func(position_changes_vol)

        trading_costs_vol = position_changes_vol * trading_cost + slippage_costs_vol
        vol_scaled_returns = vol_scaled_positions * returns - trading_costs_vol

        vol_metrics = self._calculate_metrics(vol_scaled_returns, vol_scaled_positions, position_changes_vol, returns)

        return {
            'Directional_Return(%)': dir_metrics['Total_Return(%)'],
            'Directional_Sharpe': dir_metrics['Sharpe'],

            'Total_Return(%)': prop_metrics['Total_Return(%)'],
            'Sharpe': prop_metrics['Sharpe'],
            'Sortino': prop_metrics['Sortino'],
            'Calmar': prop_metrics['Calmar'],
            'Max_Drawdown(%)': prop_metrics['Max_Drawdown(%)'],
            'CVaR_95(%)': prop_metrics['CVaR_95(%)'],
            'Win_Rate(%)': prop_metrics['Win_Rate(%)'],
            'Total_Trades': prop_metrics['Total_Trades'],

            'VolScaled_Return(%)': vol_metrics['Total_Return(%)'],
            'VolScaled_Sharpe': vol_metrics['Sharpe'],
            'Profit_Factor': prop_metrics.get('Profit_Factor', 0),
            'Kelly_Fraction': kelly_fraction
        }


    def _backtest_multitask(self, predictions, y_test_list, returns, dates, task_types, 
                           trading_cost=0.0004, slippage_func=None):
        """
        멀티태스크 모델 백테스팅

        Reference:
        - Mettalex (2025): CEX 평균 거래비용 0.04%

        Fixes:
        - Adaptive slippage model
        """
        realized_vol = pd.Series(returns).rolling(30, min_periods=1).std().values

        if slippage_func is None:
            slippage_func = lambda pos_change, vol: 0.0001 * np.sqrt(abs(pos_change)) * np.clip(vol / 0.02, 0.5, 3.0)

        classification_pred = None
        regression_pred = None
        for pred, task_type in zip(predictions, task_types):
            if task_type == 'classification':
                classification_pred = (pred > 0.5).astype(float).ravel()
            elif task_type == 'regression':
                regression_pred = pred.ravel()

        if regression_pred is not None:
            reg_mean = np.mean(regression_pred)
            reg_std = np.std(regression_pred)
            if reg_std > 0:
                normalized_reg = (regression_pred - reg_mean) / reg_std
                signal_strength = 1 / (1 + np.exp(-normalized_reg))
            else:
                signal_strength = np.where(regression_pred > 0, 0.5, 0)
            regression_positions = signal_strength
        else:
            regression_positions = None

        if classification_pred is not None and regression_positions is not None:
            and_positions = classification_pred * regression_positions
            or_positions = np.clip(classification_pred + regression_positions, 0, 1)
            mul_positions = classification_pred * signal_strength
        else:
            and_positions = or_positions = mul_positions = None

        results = {}
        if classification_pred is not None:
            results['direction'] = self._backtest_strategy(classification_pred, returns, dates, trading_cost, slippage_func, realized_vol)
        if regression_positions is not None:
            results['regression'] = self._backtest_strategy(regression_positions, returns, dates, trading_cost, slippage_func, realized_vol)
        if and_positions is not None:
            results['and'] = self._backtest_strategy(and_positions, returns, dates, trading_cost, slippage_func, realized_vol)
        if or_positions is not None:
            results['or'] = self._backtest_strategy(or_positions, returns, dates, trading_cost, slippage_func, realized_vol)
        if mul_positions is not None:
            results['mul'] = self._backtest_strategy(mul_positions, returns, dates, trading_cost, slippage_func, realized_vol)

        best_key = max(results, key=lambda k: results[k]['Sharpe'])
        best_result = results[best_key]
        best_result['Best_Strategy'] = best_key

        return best_result


    def _backtest_strategy(self, positions, returns, dates, trading_cost, slippage_func, realized_vol):
        """
        멀티태스크용 헬퍼 함수

        Fixes:
        - Added volatility-aware slippage
        """
        positions = np.roll(positions, 1)
        positions[0] = 0

        position_changes = np.abs(np.diff(positions, prepend=0))

        try:
            slippage_costs = slippage_func(position_changes, realized_vol)
        except:
            slippage_costs = slippage_func(position_changes)

        trading_costs = position_changes * trading_cost + slippage_costs
        strategy_returns = positions * returns - trading_costs

        return self._calculate_metrics(strategy_returns, positions, position_changes, returns)


    def _calculate_metrics(self, strategy_returns, positions, position_changes, returns=None):
        """
        공통 지표 계산

        Reference:
        - Piparo (2025): "Backtesting Expected Shortfall" - CVaR 계산 필수
        - Mettalex (2025): 암호화폐 극단 손실 특성

        Fixes:
        - CVaR annualization corrected: sqrt(365) -> 365
        - Sharpe ratio uses 365 days (crypto trades 24/7)
        """

        cumulative_returns = (1 + strategy_returns).cumprod()
        total_return = (cumulative_returns[-1] - 1) * 100

        # Sharpe Ratio (암호화폐는 365일 기준)
        if len(strategy_returns) > 0 and np.std(strategy_returns) > 0:
            sharpe = np.mean(strategy_returns) / np.std(strategy_returns) * np.sqrt(365)
        else:
            sharpe = 0

        # Sortino Ratio
        downside_returns = strategy_returns[strategy_returns < 0]
        if len(downside_returns) > 0 and np.std(downside_returns) > 0:
            sortino = np.mean(strategy_returns) / np.std(downside_returns) * np.sqrt(365)
        else:
            sortino = 0

        # Maximum Drawdown
        cummax = np.maximum.accumulate(cumulative_returns)
        drawdown = (cumulative_returns - cummax) / cummax
        max_dd = drawdown.min() * 100 if len(drawdown) > 0 else 0

        # Calmar Ratio
        days = len(strategy_returns)
        if days > 0 and max_dd != 0:
            annualized_return = ((cumulative_returns[-1]) ** (365 / days) - 1) * 100
            calmar = annualized_return / abs(max_dd)
        else:
            calmar = 0

        # CVaR (Conditional Value at Risk) - 수정된 연율화
        if len(strategy_returns) > 0:
            var_95 = np.percentile(strategy_returns, 5)
            # CVaR는 평균값이므로 365를 곱함 (sqrt 아님!)
            cvar_95 = np.mean(strategy_returns[strategy_returns <= var_95]) * 365 * 100
        else:
            cvar_95 = 0

        # Trade-based Win Rate and Profit Factor
        trade_returns = []
        in_position = False
        entry_idx = 0
        entry_value = 1.0
        position_threshold = 0.01

        for i in range(len(positions)):
            if positions[i] > position_threshold and not in_position:
                in_position = True
                entry_idx = i
                entry_value = cumulative_returns[i] if i > 0 else 1.0

            elif positions[i] <= position_threshold and in_position:
                exit_value = cumulative_returns[i]
                trade_return = (exit_value / entry_value - 1) if entry_value > 0 else 0
                trade_returns.append(trade_return)
                in_position = False

        if in_position and len(cumulative_returns) > 0:
            exit_value = cumulative_returns[-1]
            trade_return = (exit_value / entry_value - 1) if entry_value > 0 else 0
            trade_returns.append(trade_return)

        # Win Rate 계산
        if len(trade_returns) > 0:
            winning_trades = sum(1 for r in trade_returns if r > 0)
            total_trades = len(trade_returns)
            win_rate = (winning_trades / total_trades * 100) if total_trades > 0 else 0
        else:
            # Fallback: 일별 수익 기반
            winning_days = np.sum(strategy_returns > 0)
            losing_days = np.sum(strategy_returns < 0)
            total_trades = int(np.sum(position_changes > 0))
            win_rate = (winning_days / (winning_days + losing_days) * 100) if (winning_days + losing_days) > 0 else 0
            total_trades = max(total_trades, 1)

        # Profit Factor
        gross_profit = np.sum(strategy_returns[strategy_returns > 0])
        gross_loss = abs(np.sum(strategy_returns[strategy_returns < 0]))
        profit_factor = (gross_profit / gross_loss) if gross_loss > 0 else 0

        return {
            'Total_Return(%)': total_return,
            'Sharpe': sharpe,
            'Sortino': sortino,
            'Calmar': calmar,
            'Max_Drawdown(%)': max_dd,
            'CVaR_95(%)': cvar_95,
            'Win_Rate(%)': win_rate,
            'Total_Trades': len(trade_returns) if len(trade_returns) > 0 else total_trades,
            'Profit_Factor': profit_factor
        }


    def get_summary_dataframe(self):
        return pd.DataFrame(self.results)

    
    

In [10]:
# ============================================================================
# 모델 설정 정의
# ============================================================================

ML_MODELS_CLASSIFICATION = [
    {'index': 1, 'name': 'RandomForest', 'func': DirectionModels.random_forest, 'needs_val': False},
    {'index': 2, 'name': 'LightGBM', 'func': DirectionModels.lightgbm, 'needs_val': True},
    {'index': 3, 'name': 'XGBoost', 'func': DirectionModels.xgboost, 'needs_val': True},
    {'index': 4, 'name': 'SVM', 'func': DirectionModels.svm, 'needs_val': False},
    {'index': 5, 'name': 'LogisticRegression', 'func': DirectionModels.logistic_regression, 'needs_val': False},
    {'index': 6, 'name': 'NaiveBayes', 'func': DirectionModels.naive_bayes, 'needs_val': False},
    {'index': 7, 'name': 'KNN', 'func': DirectionModels.knn, 'needs_val': False},
    {'index': 8, 'name': 'AdaBoost', 'func': DirectionModels.adaboost, 'needs_val': False},
    {'index': 9, 'name': 'CatBoost', 'func': DirectionModels.catboost, 'needs_val': True},
    {'index': 10, 'name': 'DecisionTree', 'func': DirectionModels.decision_tree, 'needs_val': False},
    {'index': 11, 'name': 'ExtraTrees', 'func': DirectionModels.extra_trees, 'needs_val': False},
    {'index': 12, 'name': 'Bagging', 'func': DirectionModels.bagging, 'needs_val': False},
    {'index': 13, 'name': 'GradientBoosting', 'func': DirectionModels.gradient_boosting, 'needs_val': False},
    {'index': 14, 'name': 'TabNet', 'func': DirectionModels.tabnet, 'needs_val': True},
    {'index': 15, 'name': 'StackingEnsemble', 'func': DirectionModels.stacking_ensemble, 'needs_val': True},
    {'index': 16, 'name': 'VotingHard', 'func': DirectionModels.voting_hard, 'needs_val': False},
    {'index': 17, 'name': 'VotingSoft', 'func': DirectionModels.voting_soft, 'needs_val': False},
    {'index': 18, 'name': 'MLP', 'func': DirectionModels.mlp, 'needs_val': True},
]

# ======================== 시계열/딥러닝/트랜스포머/하이브리드 ===========================
DL_MODELS_CLASSIFICATION = [
    {'index': 19, 'name': 'LSTM', 'func': DirectionModels.lstm},
    {'index': 20, 'name': 'BiLSTM', 'func': DirectionModels.bilstm},
    {'index': 21, 'name': 'GRU', 'func': DirectionModels.gru},
    {'index': 22, 'name': 'Stacked_LSTM', 'func': DirectionModels.stacked_lstm},
    {'index': 23, 'name': 'CNN_LSTM', 'func': DirectionModels.cnn_lstm},
    {'index': 24, 'name': 'CNN_GRU', 'func': DirectionModels.cnn_gru},
    {'index': 25, 'name': 'CNN_BiLSTM', 'func': DirectionModels.cnn_bilstm},
    {'index': 26, 'name': 'LSTM_Attention', 'func': DirectionModels.lstm_attention},
    {'index': 27, 'name': 'Transformer', 'func': DirectionModels.transformer},
    {'index': 28, 'name': 'TCN', 'func': DirectionModels.tcn},
    {'index': 29, 'name': 'DTW_LSTM', 'func': DirectionModels.dtw_lstm},
    {'index': 30, 'name': 'Informer', 'func': DirectionModels.informer},
    {'index': 31, 'name': 'NBEATS', 'func': DirectionModels.nbeats},
    {'index': 32, 'name': 'TFT', 'func': DirectionModels.temporal_fusion_transformer},
    {'index': 33, 'name': 'Performer', 'func': DirectionModels.performer},
    {'index': 34, 'name': 'PatchTST', 'func': DirectionModels.patchtst},
    {'index': 35, 'name': 'Autoformer', 'func': DirectionModels.autoformer},
    {'index': 36, 'name': 'iTransformer', 'func': DirectionModels.itransformer},
    {'index': 37, 'name': 'EtherVoyant', 'func': DirectionModels.ethervoyant},
    {'index': 38, 'name': 'VMD_Hybrid', 'func': DirectionModels.vmd_hybrid},
    {'index': 39, 'name': 'SimpleRNN', 'func': DirectionModels.simple_rnn},
    {'index': 40, 'name': 'EMD_LSTM', 'func': DirectionModels.emd_lstm},
    {'index': 41, 'name': 'Hybrid_LSTM_GRU', 'func': DirectionModels.hybrid_lstm_gru},
    {'index': 42, 'name': 'Parallel_CNN', 'func': DirectionModels.parallel_cnn},
    {'index': 43, 'name': 'LSTM_XGBoost_Hybrid', 'func': DirectionModels.lstm_xgboost_hybrid},
    {'index': 44, 'name': 'Residual_LSTM', 'func': DirectionModels.residual_lstm},
    {'index': 45, 'name': 'WaveNet', 'func': DirectionModels.wavenet},
]


####################################### 멀티 테스크 모델 #####################

DL_MODELS_MULTITASK_ENSEMBLE = [
    {'index': 1, 'name': 'HardSharing_LSTM_MTL', 'func': MultiTaskModels.hard_sharing_lstm, 'outputs': ['direction', 'return']},
    {'index': 2, 'name': 'BiLSTM_MTL', 'func': MultiTaskModels.bilstm_mtl, 'outputs': ['direction', 'return']},
    {'index': 3, 'name': 'GRU_MTL', 'func': MultiTaskModels.gru_mtl, 'outputs': ['direction', 'return']},
    {'index': 4, 'name': 'SoftSharing_LSTM_MTL', 'func': MultiTaskModels.soft_sharing_lstm, 'outputs': ['direction', 'return']},
    {'index': 5, 'name': 'CrossStitch_MTL', 'func': MultiTaskModels.cross_stitch_mtl, 'outputs': ['direction', 'return']},
    {'index': 6, 'name': 'MMoE_MTL', 'func': MultiTaskModels.mmoe_mtl, 'outputs': ['direction', 'return']},
    {'index': 7, 'name': 'CNN_LSTM_MTL', 'func': MultiTaskModels.cnn_lstm_mtl, 'outputs': ['direction', 'price']},
    {'index': 8, 'name': 'CNN_GRU_MTL', 'func': MultiTaskModels.cnn_gru_mtl, 'outputs': ['direction', 'return']},
    {'index': 9, 'name': 'Transformer_MTL', 'func': MultiTaskModels.transformer_mtl, 'outputs': ['direction', 'return']},
    {'index': 10, 'name': 'Stacked_LSTM_MTL', 'func': MultiTaskModels.stacked_lstm_mtl, 'outputs': ['direction', 'return']},
    {'index': 11, 'name': 'Attention_MTL', 'func': MultiTaskModels.attention_mtl, 'outputs': ['direction', 'return']},
    {'index': 12, 'name': 'TCN_MTL', 'func': MultiTaskModels.tcn_mtl, 'outputs': ['direction', 'return']},
    {'index': 13, 'name': 'Hierarchical_MTL', 'func': MultiTaskModels.hierarchical_mtl, 'outputs': ['direction', 'return']},
    {'index': 14, 'name': 'WeightedLoss_MTL', 'func': MultiTaskModels.weighted_loss_mtl, 'outputs': ['direction', 'return']},
    {'index': 15, 'name': 'Ensemble_MTL', 'func': MultiTaskModels.ensemble_mtl, 'outputs': ['direction', 'return']},
    {'index': 16, 'name': 'TabNet_MTL', 'func': MultiTaskModels.tabnet_mtl, 'outputs': ['direction', 'return'],'needs_val':True},
    {'index': 17, 'name': 'Informer_MTL', 'func': MultiTaskModels.informer_mtl, 'outputs': ['direction', 'return']},
    {'index': 18, 'name': 'NBEATS_MTL', 'func': MultiTaskModels.nbeats_mtl, 'outputs': ['direction', 'return']},
    {'index': 19, 'name': 'TFT_MTL', 'func': MultiTaskModels.tft_mtl, 'outputs': ['direction', 'return']},
    {'index': 20, 'name': 'Performer_MTL', 'func': MultiTaskModels.performer_mtl, 'outputs': ['direction', 'return']},
    {'index': 21, 'name': 'PatchTST_MTL', 'func': MultiTaskModels.patchtst_mtl, 'outputs': ['direction', 'return']},
    {'index': 22, 'name': 'Autoformer_MTL', 'func': MultiTaskModels.autoformer_mtl, 'outputs': ['direction', 'return']},
    {'index': 23, 'name': 'iTransformer_MTL', 'func': MultiTaskModels.itransformer_mtl, 'outputs': ['direction', 'return']},
    {'index': 24, 'name': 'EtherVoyant_MTL', 'func': MultiTaskModels.ethervoyant_mtl, 'outputs': ['direction', 'return']},
    {'index': 25, 'name': 'VMD_Hybrid_MTL', 'func': MultiTaskModels.vmd_hybrid_mtl, 'outputs': ['direction', 'return']},
    {'index': 26, 'name': 'SNAS_MTL', 'func': MultiTaskModels.snas_mtl, 'outputs': ['direction', 'return']},
    {'index': 27, 'name': 'DLinear_MTL', 'func': MultiTaskModels.dlinear_mtl, 'outputs': ['direction', 'return']},
    {'index': 28, 'name': 'FEDformer_MTL', 'func': MultiTaskModels.fedformer_mtl, 'outputs': ['direction', 'return']},
    {'index': 29, 'name': 'UniTS_MTL', 'func': MultiTaskModels.units_mtl, 'outputs': ['direction', 'return']},
    {'index': 30, 'name': 'MetaRL_Crypto_MTL', 'func': MultiTaskModels.metarl_crypto_mtl, 'outputs': ['direction', 'return']}
]

##############################   회귀 모델 ################################


# ML 모델 설정 (1-4번)
ML_MODELS_REGRESSION = [
    {'index': 1, 'name': 'RandomForest_Reg', 'func': RegressionModels.random_forest_reg, 'needs_val': False},
    {'index': 2, 'name': 'LightGBM_Reg', 'func': RegressionModels.lightgbm_reg, 'needs_val': True},
    {'index': 3, 'name': 'XGBoost_Reg', 'func': RegressionModels.xgboost_reg, 'needs_val': True},
    {'index': 4, 'name': 'SVR', 'func': RegressionModels.svr, 'needs_val': False}
]

# 딥러닝 모델 설정 (5-35번)
DL_MODELS_REGRESSION = [
    # 기본 RNN 모델 (5-8)
    {'index': 5, 'name': 'LSTM_Reg', 'func': RegressionModels.lstm_reg},
    {'index': 6, 'name': 'BiLSTM_Reg', 'func': RegressionModels.bilstm_reg},
    {'index': 7, 'name': 'GRU', 'func': RegressionModels.gru},
    {'index': 8, 'name': 'Stacked_LSTM_Reg', 'func': RegressionModels.stacked_lstm_reg},
    
    # CNN-RNN 하이브리드 (9-11)
    {'index': 9, 'name': 'CNN-LSTM', 'func': RegressionModels.cnn_lstm},
    {'index': 10, 'name': 'CNN-GRU', 'func': RegressionModels.cnn_gru},
    {'index': 11, 'name': 'CNN-BiLSTM', 'func': RegressionModels.cnn_bilstm},
    
    # 고급 시퀀스 모델 (12-14)
    {'index': 12, 'name': 'Seq2Seq', 'func': RegressionModels.seq2seq},
    {'index': 13, 'name': 'WaveNet', 'func': RegressionModels.wavenet},
    {'index': 14, 'name': 'TCN_Reg', 'func': RegressionModels.tcn_reg},
    
    # Transformer 기반 (15-25)
    {'index': 15, 'name': 'Transformer_Reg', 'func': RegressionModels.transformer_reg},
    {'index': 16, 'name': 'TabNet_Reg', 'func': RegressionModels.tabnet_reg,'needs_val':True},
    {'index': 17, 'name': 'Informer_Reg', 'func': RegressionModels.informer_reg},
    {'index': 18, 'name': 'NBEATS_Reg', 'func': RegressionModels.nbeats_reg},
    {'index': 19, 'name': 'TFT_Reg', 'func': RegressionModels.tft_reg},
    {'index': 20, 'name': 'Performer_Reg', 'func': RegressionModels.performer_reg},
    {'index': 21, 'name': 'PatchTST_Reg', 'func': RegressionModels.patchtst_reg},
    {'index': 22, 'name': 'Autoformer_Reg', 'func': RegressionModels.autoformer_reg},
    {'index': 23, 'name': 'iTransformer_Reg', 'func': RegressionModels.itransformer_reg},
    {'index': 24, 'name': 'EtherVoyant_Reg', 'func': RegressionModels.ethervoyant_reg},
    {'index': 25, 'name': 'VMD_Hybrid_Reg', 'func': RegressionModels.vmd_hybrid_reg},
    
    # 신규 추가 모델 (26-35)
    {'index': 26, 'name': 'DTW_LSTM_Reg', 'func': RegressionModels.dtw_lstm_reg},
    {'index': 27, 'name': 'Attention_LSTM_Reg', 'func': RegressionModels.attention_lstm_reg},
    {'index': 28, 'name': 'Dual_Attention_Reg', 'func': RegressionModels.dual_attention_reg},
    {'index': 29, 'name': 'Cross_Correlation_LSTM', 'func': RegressionModels.cross_correlation_lstm_reg},
    {'index': 30, 'name': 'Gradient_Optimized_LSTM', 'func': RegressionModels.gradient_optimized_lstm_reg},
    {'index': 31, 'name': 'Ensemble_Stacking_Reg', 'func': RegressionModels.ensemble_stacking_reg},
    {'index': 32, 'name': 'Ensemble_Voting_Reg', 'func': RegressionModels.ensemble_voting_reg},
    {'index': 33, 'name': 'LSTM_XGBoost_Hybrid', 'func': RegressionModels.lstm_xgboost_hybrid_reg},
    {'index': 34, 'name': 'Residual_LSTM_Reg', 'func': RegressionModels.residual_lstm_reg},
    {'index': 35, 'name': 'MultiScale_CNN_LSTM', 'func': RegressionModels.multiscale_cnn_lstm_reg}
]


In [11]:
def train_all_models(X_train, y_train, X_val, y_val, X_test, y_test,
                    test_returns, test_dates, evaluator, lookback=30,
                    ml_models=None, dl_models=None, task='classification'):
    print("\n" + "="*80)
    print(f"{task.capitalize()} 모델 학습 시작 (총 {len(ml_models) + len(dl_models)}개 모델)")
    print("="*80)
    trainer = ModelTrainer(evaluator, lookback)

    # ML 모델
    print(f"\n[Part 1/2] Machine Learning 모델 ({len(ml_models)}개)")
    print("-" * 80)
    ml_success_count = 0
    for model_config in ml_models:
        success = trainer.train_ml_model(
            model_config, X_train, y_train, X_val, y_val,
            X_test, y_test, test_returns, test_dates, task=task
        )
        if success:
            ml_success_count += 1
    print(f"\n✓ ML 모델 완료: {ml_success_count}/{len(ml_models)}개 성공")

    # DL 모델
    print(f"\n[Part 2/2] Deep Learning/시계열 모델 ({len(dl_models)}개)")
    print("-" * 80)
    print(f"\n시퀀스 데이터 생성 중 (lookback={lookback})...")
    trainer = ModelTrainer(evaluator, lookback)
    X_train_seq, y_train_seq = trainer.create_sequences(X_train, y_train, lookback)
    X_val_seq, y_val_seq = trainer.create_sequences(X_val, y_val, lookback)
    X_test_seq, y_test_seq = trainer.create_sequences(X_test, y_test, lookback)
    test_returns_seq = test_returns[lookback:]
    test_dates_seq = test_dates[lookback:]
    input_shape = (X_train_seq.shape[1], X_train_seq.shape[2])
    print(f"  ✓ Train shape: {X_train_seq.shape}")
    print(f"  ✓ Val shape: {X_val_seq.shape}")
    print(f"  ✓ Test shape: {X_test_seq.shape}")
    print(f"  ✓ Input shape: {input_shape}\n")
    dl_success_count = 0
    for model_config in dl_models:
        if model_config['name'] in ['TabNet', 'TabNet_Reg', 'Ensemble_Stacking', 'Ensemble_Voting']:
            success = trainer.train_ml_model(
                model_config, X_train, y_train, X_val, y_val,
                X_test, y_test, test_returns, test_dates, task=task
            )
        else:
            if 'outputs' in model_config and len(model_config['outputs']) > 1:
                y_train_list = [y_train_seq[:, i] for i in range(y_train_seq.shape[1])]
                y_val_list = [y_val_seq[:, i] for i in range(y_val_seq.shape[1])]
                y_test_list = [y_test_seq[:, i] for i in range(y_test_seq.shape[1])]
                success = trainer.train_dl_multitask_model(
                    model_config, X_train_seq, y_train_list, X_val_seq, y_val_list,
                    X_test_seq, y_test_list, test_returns_seq, test_dates_seq, input_shape
                )
            else:
                success = trainer.train_dl_model(
                    model_config, X_train_seq, y_train_seq, X_val_seq, y_val_seq,
                    X_test_seq, y_test_seq, test_returns_seq, test_dates_seq, input_shape, task=task
                )
        if success:
            dl_success_count += 1
    print(f"\n✓ DL 모델 완료: {dl_success_count}/{len(dl_models)}개 성공")
    total_success = ml_success_count + dl_success_count
    total_models = len(ml_models) + len(dl_models)
    print("\n" + "="*80)
    print(f"전체 학습 완료: {total_success}/{total_models}개 모델 성공")
    print("="*80)
    return total_success

def train_models_for_fold(fold_idx, X_train, y_train, X_val, y_val,
                          X_test, y_test, test_returns, test_dates,
                          evaluator, all_fold_results, lookback=30,
                          ml_models=None, dl_models=None, task='classification'):
    print(f"\n{'='*80}")
    print(f"Fold {fold_idx + 1} - {task.capitalize()} 모델 학습")
    print(f"{'='*80}")
    success_count = train_all_models(
        X_train, y_train, X_val, y_val, X_test, y_test,
        test_returns, test_dates, evaluator, lookback,
        ml_models=ml_models, dl_models=dl_models, task=task
    )
    fold_summary = evaluator.get_summary_dataframe()
    fold_summary['Fold'] = fold_idx + 1
    all_fold_results.append(fold_summary)
    print(f"\n✓ Fold {fold_idx + 1} 완료 ({success_count}개 모델)")
    return fold_summary


class ModelTrainer:
    """모델 학습 및 평가를 위한 통합 클래스 (분류/회귀 공통)"""
    def __init__(self, evaluator, lookback=30):
        self.evaluator = evaluator
        self.lookback = lookback

    @staticmethod
    def create_sequences(X, y, lookback):
        Xs, ys = [], []
        for i in range(lookback, len(X)):
            Xs.append(X[i-lookback:i])
            # DataFrame이면 .iloc, array면 직접 인덱싱
            ys.append(y.iloc[i] if hasattr(y, 'iloc') else y[i])
        return np.array(Xs), np.array(ys)

    def train_ml_model(self, model_config, X_train, y_train, X_val, y_val,
                       X_test, y_test, test_returns, test_dates, task='classification'):
        try:
            print(f"  [{model_config['index']}] {model_config['name']}...")
            if model_config.get('needs_val', False):
                model = model_config['func'](X_train, y_train, X_val, y_val)
            else:
                model = model_config['func'](X_train, y_train)
            if task == 'classification':
                self.evaluator.evaluate_classification_model(
                    model, X_train, y_train, X_val, y_val, X_test, y_test,
                    test_returns, test_dates, model_config['name']
                )
            else:
                self.evaluator.evaluate_regression_model(
                    model, X_train, y_train, X_val, y_val, X_test, y_test,
                    test_returns, test_dates, model_config['name']
                )
            return True
        except Exception as e:
            import traceback
            print(f"    ⚠ {model_config['name']} 스킵: {type(e).__name__}: {str(e)}")
            print(f"    상세: {traceback.format_exc()}")
            return False

    def train_dl_model(self, model_config, X_train_seq, y_train_seq,
                       X_val_seq, y_val_seq, X_test_seq, y_test_seq,
                       test_returns_seq, test_dates_seq, input_shape, task='classification'):
        try:
            print(f"  [{model_config['index']}] {model_config['name']}...")
            model = model_config['func'](
                X_train_seq, y_train_seq, X_val_seq, y_val_seq, input_shape
            )
            if task == 'classification':
                self.evaluator.evaluate_classification_model(
                    model, X_train_seq, y_train_seq, X_val_seq, y_val_seq,
                    X_test_seq, y_test_seq, test_returns_seq, test_dates_seq,
                    model_config['name'], is_deep_learning=True
                )
            else:
                self.evaluator.evaluate_regression_model(
                    model, X_train_seq, y_train_seq, X_val_seq, y_val_seq,
                    X_test_seq, y_test_seq, test_returns_seq, test_dates_seq,
                    model_config['name'], is_deep_learning=True
                )
            return True
        except Exception as e:
            import traceback
            print(f"    ⚠ {model_config['name']} 스킵: {type(e).__name__}: {str(e)}")
            print(f"    상세: {traceback.format_exc()}")
            return False
        
    def train_dl_multitask_model(self, model_config, X_train_seq, y_train_list,
                                 X_val_seq, y_val_list, X_test_seq, y_test_list,
                                 test_returns_seq, test_dates_seq, input_shape):
        try:
            print(f"  [{model_config['index']}] {model_config['name']} (멀티태스크)...")
            # unpack y_train_list 등은 [y1, y2] 형태
            model = model_config['func'](
                X_train_seq, *y_train_list, X_val_seq, *y_val_list, input_shape
            )
            # 평가: outputs에 따라 자동 분기
            self.evaluator.evaluate_multitask_model(
                model, X_test_seq, y_test_list, test_returns_seq, test_dates_seq, model_config['name']
            )
            return True
        except Exception as e:
            import traceback
            print(f"    ⚠ {model_config['name']} 스킵: {type(e).__name__}: {str(e)}")
            print(f"    상세: {traceback.format_exc()}")
            return False
        

In [12]:
target_cases = [
    {'name': 'direction', 'target_type': 'direction', 'outputs': ['next_direction']}
#     {'name': 'return', 'target_type': 'return', 'outputs': ['next_log_return']},
#     {'name': 'direction_return', 'target_type': 'direction_return', 'outputs': ['next_direction', 'next_log_return']},  
#     {'name': 'price', 'target_type': 'price', 'outputs': ['next_close']},
#     {'name': 'direction_price', 'target_type': 'direction_price', 'outputs': ['next_direction', 'next_close']}
]

split_methods = [
    {'name': 'walk_forward', 'method': 'walk_forward'},
    {'name': 'tvt', 'method': 'tvt'}
]


RESULT_DIR = "model_results"
os.makedirs(RESULT_DIR, exist_ok=True)

def save_walk_forward_results(all_fold_results, target_name, task):
    
    detailed_results = []
    for fold_idx, fold_df in enumerate(all_fold_results, start=1):
        fold_df_copy = fold_df.copy()
        fold_df_copy.insert(0, 'Fold', fold_idx)
        detailed_results.append(fold_df_copy)
    
    detailed_df = pd.concat(detailed_results, ignore_index=True)
    
    if 'Test_Accuracy' in detailed_df.columns:
        detailed_df = detailed_df.sort_values(
            by=['Fold', 'Test_Accuracy'], 
            ascending=[True, False]
        ).reset_index(drop=True)
    
    detailed_path = os.path.join(RESULT_DIR, f"{target_name}_walk_forward_detailed.csv")
    detailed_df.to_csv(detailed_path, index=False, encoding='utf-8-sig')
    print(f"Saved: {detailed_path}")
    
    numeric_cols = detailed_df.select_dtypes(include=[np.number]).columns
    numeric_cols = [col for col in numeric_cols if col != 'Fold']
    
    avg_results = []
    for model in detailed_df['Model'].unique():
        model_data = detailed_df[detailed_df['Model'] == model]
        avg_row = {'Model': model}
        
        for col in numeric_cols:
            if col in model_data.columns:
                avg_row[col] = model_data[col].mean()
                avg_row[f'{col}_Std'] = model_data[col].std()
        
        avg_results.append(avg_row)
    
    avg_df = pd.DataFrame(avg_results)
    
    # Average: Test_Accuracy 기준 정렬
    if 'Test_Accuracy' in avg_df.columns:
        avg_df = avg_df.sort_values(by='Test_Accuracy', ascending=False).reset_index(drop=True)
    
    avg_path = os.path.join(RESULT_DIR, f"{target_name}_walk_forward.csv")
    avg_df.to_csv(avg_path, index=False, encoding='utf-8-sig')
    print(f"Saved: {avg_path}")
    
    return detailed_df, avg_df


def save_summary_csv(summary_df, target_name, split_name, task):
    
    if task == 'classification':
        metric_cols = ['Model', 'Train_Accuracy', 'Val_Accuracy', 'Test_Accuracy', 
                       'Test_Precision', 'Test_Recall', 'Test_F1', 'Test_AUC_ROC']
        backtest_cols = ['Model', 'Total_Return(%)', 'Sharpe', 'Sortino', 'Calmar',
                         'Max_Drawdown(%)', 'Win_Rate(%)', 'Total_Trades', 'Profit_Factor']
        if 'Confident_Return(%)' in summary_df.columns:
            backtest_cols += ['Confident_Return(%)', 'Confident_Sharpe', 'Confident_Trades']
            
    elif task == 'regression':
        metric_cols = ['Model', 'Train_RMSE', 'Val_RMSE', 'Test_RMSE', 
                       'Train_MAE', 'Val_MAE', 'Test_MAE', 'Test_R2', 'Test_MAPE', 'Direction_Accuracy']
        backtest_cols = ['Model', 'Directional_Return(%)', 'Directional_Sharpe',
                         'Total_Return(%)', 'Sharpe', 'Sortino', 'Calmar',
                         'Max_Drawdown(%)', 'Win_Rate(%)', 'Total_Trades', 'Profit_Factor',
                         'VolScaled_Return(%)', 'VolScaled_Sharpe']
                         
    elif task == 'multitask':
        metric_cols = ['Model', 'Train_Accuracy', 'Val_Accuracy', 'Test_Accuracy', 'Test_Precision', 
                       'Test_Recall', 'Test_F1', 'Train_RMSE', 'Val_RMSE', 'Test_RMSE', 
                       'Test_MAE', 'Test_R2', 'Direction_Accuracy']
        backtest_cols = ['Model', 'Total_Return(%)', 'Sharpe', 'Sortino', 'Calmar',
                         'Max_Drawdown(%)', 'Win_Rate(%)', 'Total_Trades', 'Profit_Factor']
        if 'Directional_Return(%)' in summary_df.columns:
            backtest_cols += ['Directional_Return(%)', 'Directional_Sharpe']
        if 'VolScaled_Return(%)' in summary_df.columns:
            backtest_cols += ['VolScaled_Return(%)', 'VolScaled_Sharpe']
        if 'Confident_Return(%)' in summary_df.columns:
            backtest_cols += ['Confident_Return(%)', 'Confident_Sharpe', 'Confident_Trades']
    
    available_cols = [col for col in metric_cols + backtest_cols if col in summary_df.columns]
    save_df = summary_df[["Model"] + [col for col in available_cols if col != "Model"]]
    
    # Test_Accuracy 기준 정렬 (분류/멀티태스크) 또는 Test_RMSE 기준 정렬 (회귀)
    if 'Test_Accuracy' in save_df.columns:
        save_df = save_df.sort_values(by='Test_Accuracy', ascending=False).reset_index(drop=True)
    elif 'Test_RMSE' in save_df.columns:
        save_df = save_df.sort_values(by='Test_RMSE', ascending=True).reset_index(drop=True)
    
    filename = f"{target_name}_{split_name}.csv"
    file_path = os.path.join(RESULT_DIR, filename)
    save_df.to_csv(file_path, index=False, encoding='utf-8-sig')
    print(f"Saved: {file_path}")


In [None]:
# ============================================================================
# Import Libraries
# ============================================================================

import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score, roc_auc_score,
    mean_squared_error, mean_absolute_error, r2_score
)

# ============================================================================
# Scikit-learn ML 모델
# ============================================================================
from sklearn.ensemble import (
    RandomForestClassifier, RandomForestRegressor,
    AdaBoostClassifier,
    ExtraTreesClassifier, ExtraTreesRegressor,
    BaggingClassifier, BaggingRegressor,
    GradientBoostingClassifier, GradientBoostingRegressor,
    StackingClassifier, StackingRegressor,
    VotingClassifier, VotingRegressor
)
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.svm import SVC, SVR
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier

# Boosting Libraries
from lightgbm import LGBMClassifier, LGBMRegressor
from xgboost import XGBClassifier, XGBRegressor
from catboost import CatBoostClassifier, CatBoostRegressor

try:
    from pytorch_tabnet.tab_model import TabNetClassifier, TabNetRegressor
    TABNET_AVAILABLE = True
except ImportError:
    TABNET_AVAILABLE = False
    print("Warning: pytorch-tabnet not installed. TabNet models will be skipped.")

# ============================================================================
# TensorFlow/Keras 딥러닝 레이어
# ============================================================================
import tensorflow as tf
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import (
    Input, Dense, LSTM, GRU, Bidirectional, SimpleRNN,
    Conv1D, MaxPooling1D, GlobalAveragePooling1D, GlobalMaxPooling1D,
    Dropout, BatchNormalization, LayerNormalization,
    Flatten, Concatenate, Add, Multiply,
    MultiHeadAttention, Attention,
    AveragePooling1D
)
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam

# ============================================================================
# PyTorch 
# ============================================================================
try:
    import torch
    import torch.nn as nn
    PYTORCH_AVAILABLE = True
except ImportError:
    PYTORCH_AVAILABLE = False
    print("Warning: PyTorch not installed. Some models may not work.")

all_results = {}

for target_case in target_cases:
    for split_method in split_methods:
        print(f"\n{'='*80}")
        print(f"Experiment: {target_case['name']} x {split_method['name']}")
        print(f"{'='*80}")
        
        result = build_complete_pipeline_corrected(
            df_merged, train_start_date,
            method=split_method['method'],
            target_type=target_case['target_type'],
        )
        
        if split_method['method'] == 'tvt':
            X_train = result['train']['X_robust']
            X_val = result['val']['X_robust']
            X_test = result['test']['X_robust']
            test_returns = result['test']['y']['next_log_return'].values  
            test_dates = result['test']['dates'].values 
            
            if len(target_case['outputs']) == 1:
                y_train = result['train']['y'][target_case['outputs'][0]].values
                y_val = result['val']['y'][target_case['outputs'][0]].values
                y_test = result['test']['y'][target_case['outputs'][0]].values
                ml_models = ML_MODELS_REGRESSION if target_case['target_type'] in ['return', 'price'] else ML_MODELS_CLASSIFICATION
                dl_models = DL_MODELS_REGRESSION if target_case['target_type'] in ['return', 'price'] else DL_MODELS_CLASSIFICATION
                task = 'regression' if target_case['target_type'] in ['return', 'price'] else 'classification'
            else:
                y_train = result['train']['y'][target_case['outputs']].values
                y_val = result['val']['y'][target_case['outputs']].values
                y_test = result['test']['y'][target_case['outputs']].values
                ml_models = []
                dl_models = DL_MODELS_MULTITASK_ENSEMBLE
                task = 'multitask'
            
            evaluator = ModelEvaluator()
            train_all_models(
                X_train, y_train, X_val, y_val, X_test, y_test,
                test_returns, test_dates, evaluator,
                ml_models=ml_models, dl_models=dl_models, task=task
            )
            summary_df = evaluator.get_summary_dataframe()
            all_results[f"{target_case['name']}_{split_method['name']}"] = summary_df
            save_summary_csv(summary_df, target_case['name'], split_method['name'], task)
            
        else:
            fold_results = []
            
            for fold_idx, fold in enumerate(result, start=1):
                print(f"\n  Processing Fold {fold_idx}/{len(result)}")
                
                X_train = fold['train']['X_robust']
                X_val = fold['val']['X_robust']
                X_test = fold['test']['X_robust']
                test_returns = fold['test']['y']['next_log_return'].values  
                test_dates = fold['test']['dates'].values  
                
                if len(target_case['outputs']) == 1:
                    y_train = fold['train']['y'][target_case['outputs'][0]].values  
                    y_val = fold['val']['y'][target_case['outputs'][0]].values
                    y_test = fold['test']['y'][target_case['outputs'][0]].values
                    ml_models = ML_MODELS_REGRESSION if target_case['target_type'] in ['return', 'price'] else ML_MODELS_CLASSIFICATION
                    dl_models = DL_MODELS_REGRESSION if target_case['target_type'] in ['return', 'price'] else DL_MODELS_CLASSIFICATION
                    task = 'regression' if target_case['target_type'] in ['return', 'price'] else 'classification'
                else:
                    y_train = fold['train']['y'][target_case['outputs']].values
                    y_val = fold['val']['y'][target_case['outputs']].values
                    y_test = fold['test']['y'][target_case['outputs']].values
                    ml_models = []
                    dl_models = DL_MODELS_MULTITASK_ENSEMBLE
                    task = 'multitask'
                
                evaluator = ModelEvaluator()
                train_all_models(
                    X_train, y_train, X_val, y_val, X_test, y_test,
                    test_returns, test_dates, evaluator,
                    ml_models=ml_models, dl_models=dl_models, task=task
                )
                fold_summary = evaluator.get_summary_dataframe()
                fold_results.append(fold_summary)
                print(f"  Fold {fold_idx} completed")
            
            print(f"\n  Aggregating {len(fold_results)} folds...")
            detailed_df, avg_df = save_walk_forward_results(
                fold_results, target_case['name'], task
            )
            all_results[f"{target_case['name']}_{split_method['name']}"] = avg_df




Experiment: direction x walk_forward
Auto-calculated n_splits: 18 (from 1752 days)

Walk-Forward Configuration
Total data: 1752 days
Train=600d, Val=60d, Test=60d, Step=60d
Lookback=30d, Val sequences: 30
Target folds: 18

Fold  1:
  Train:  600d  (2020-12-19 ~ 2022-08-10)
  Val:     60d  (2022-08-11 ~ 2022-10-09)
  Test:    60d  (2022-10-10 ~ 2022-12-08)
Fold  2:
  Train:  660d  (2020-12-19 ~ 2022-10-09)
  Val:     60d  (2022-10-10 ~ 2022-12-08)
  Test:    60d  (2022-12-09 ~ 2023-02-06)
Fold  3:
  Train:  720d  (2020-12-19 ~ 2022-12-08)
  Val:     60d  (2022-12-09 ~ 2023-02-06)
  Test:    60d  (2023-02-07 ~ 2023-04-07)
Fold  4:
  Train:  780d  (2020-12-19 ~ 2023-02-06)
  Val:     60d  (2023-02-07 ~ 2023-04-07)
  Test:    60d  (2023-04-08 ~ 2023-06-06)
Fold  5:
  Train:  840d  (2020-12-19 ~ 2023-04-07)
  Val:     60d  (2023-04-08 ~ 2023-06-06)
  Test:    60d  (2023-06-07 ~ 2023-08-05)
Fold  6:
  Train:  900d  (2020-12-19 ~ 2023-06-06)
  Val:     60d  (2023-06-07 ~ 2023-08-05)
  Test: 

선택된 지표들
DPO_20, eth_btc_corr_3d, btc_return_lag5, high_lag5_ratio, eth_btc_corr_7d, MACDH_12_26_9, volume_lag5, sentiment_acceleration, eth_avg_gas_price, GAP, btc_return_lag1, btc_dominance, BB_Sentiment_Consensus, Acceleration_Momentum, bnb_return, sol_return, doge_volume_ratio_20d, eth_btc_volume_ratio, eth_large_eth_transfers_lag1, price_percentile_250d, HIGH_CLOSE_RANGE, EMA_CROSS_SIGNAL, btc_intraday_range, bull_bear_ratio, RSI_percentile_60d, eth_avg_block_difficulty, bull_bear_ratio_lag1, PRICE_VS_SMA10, sentiment_sum, eth_intraday_range, HIGH_LOW_RANGE, DISTANCE_FROM_LOW, VTXM_14, eth_btc_spread, eth_btc_volcorr_30d, vol_regime_duration, funding_fundingRate_lag1, BTC_Weighted_Impact, VOLUME_CHANGE, CCI_14
선택된 지표들
DPO_20, VOLUME_CHANGE, eth_btc_corr_3d, btc_return_lag5, HIGH_CLOSE_RANGE, eth_btc_corr_7d, CCI_14, HIGH_LOW_RANGE, vol_trend, close_lag2_logret, xrp_volume_change, eth_active_addresses, eth_contract_events, btc_return_lag1, btc_return_lag2, btc_dominance, eth_btc_bet

2025-10-20 23:56:24.280949: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2256] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


    ⚠ MLP 스킵: ValueError: Classification metrics can't handle a mix of binary and continuous targets
    상세: Traceback (most recent call last):
  File "/tmp/ipykernel_1210809/987515945.py", line 111, in train_ml_model
    self.evaluator.evaluate_classification_model(
  File "/tmp/ipykernel_1210809/1057377665.py", line 42, in evaluate_classification_model
    train_acc = accuracy_score(y_train, train_pred)
  File "/raid/invigoworks/anaconda3/lib/python3.10/site-packages/sklearn/utils/_param_validation.py", line 218, in wrapper
    return func(*args, **kwargs)
  File "/raid/invigoworks/anaconda3/lib/python3.10/site-packages/sklearn/metrics/_classification.py", line 359, in accuracy_score
    y_type, y_true, y_pred = _check_targets(y_true, y_pred)
  File "/raid/invigoworks/anaconda3/lib/python3.10/site-packages/sklearn/metrics/_classification.py", line 106, in _check_targets
    raise ValueError(
ValueError: Classification metrics can't handle a mix of binary and continuous targets


✓ ML

W0000 00:00:1760972634.976916 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 31719424 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1760972636.518351 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environ



W0000 00:00:1760972695.710796 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 31719424 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }


  [27] Transformer...
  [28] TCN...
  [29] DTW_LSTM...
  [30] Informer...
  [31] NBEATS...
  [32] TFT...
  [33] Performer...
  [34] PatchTST...
  [35] Autoformer...
  [36] iTransformer...
  [37] EtherVoyant...
  [38] VMD_Hybrid...
    ⚠ VMD_Hybrid 스킵: NameError: name 'inputs' is not defined
    상세: Traceback (most recent call last):
  File "/tmp/ipykernel_1210809/987515945.py", line 132, in train_dl_model
    model = model_config['func'](
  File "/tmp/ipykernel_1210809/2720413276.py", line 780, in vmd_hybrid
    x = Conv1D(32, 1, padding='same', kernel_regularizer=l2(0.01))(inputs)
NameError: name 'inputs' is not defined

  [39] SimpleRNN...
  [40] EMD_LSTM...
  [41] Hybrid_LSTM_GRU...
  [42] Parallel_CNN...
  [43] LSTM_XGBoost_Hybrid...
  [44] Residual_LSTM...
  [45] WaveNet...

✓ DL 모델 완료: 26/27개 성공

전체 학습 완료: 43/45개 모델 성공
  Fold 1 completed

  Processing Fold 2/18

Classification 모델 학습 시작 (총 45개 모델)

[Part 1/2] Machine Learning 모델 (18개)
----------------------------------------------

W0000 00:00:1760974050.026256 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 31719424 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1760974051.597230 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environ



W0000 00:00:1760974104.844309 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 31719424 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }


  [27] Transformer...
  [28] TCN...
  [29] DTW_LSTM...
  [30] Informer...
  [31] NBEATS...
  [32] TFT...
  [33] Performer...
  [34] PatchTST...
  [35] Autoformer...
  [36] iTransformer...
  [37] EtherVoyant...
  [38] VMD_Hybrid...
    ⚠ VMD_Hybrid 스킵: NameError: name 'inputs' is not defined
    상세: Traceback (most recent call last):
  File "/tmp/ipykernel_1210809/987515945.py", line 132, in train_dl_model
    model = model_config['func'](
  File "/tmp/ipykernel_1210809/2720413276.py", line 780, in vmd_hybrid
    x = Conv1D(32, 1, padding='same', kernel_regularizer=l2(0.01))(inputs)
NameError: name 'inputs' is not defined

  [39] SimpleRNN...
  [40] EMD_LSTM...
  [41] Hybrid_LSTM_GRU...
  [42] Parallel_CNN...
  [43] LSTM_XGBoost_Hybrid...
  [44] Residual_LSTM...
  [45] WaveNet...

✓ DL 모델 완료: 26/27개 성공

전체 학습 완료: 43/45개 모델 성공
  Fold 2 completed

  Processing Fold 3/18

Classification 모델 학습 시작 (총 45개 모델)

[Part 1/2] Machine Learning 모델 (18개)
----------------------------------------------

W0000 00:00:1760975657.958003 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 31719424 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1760975659.615596 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environ



W0000 00:00:1760975740.574761 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 31719424 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }


  [27] Transformer...
  [28] TCN...
  [29] DTW_LSTM...
  [30] Informer...
  [31] NBEATS...
  [32] TFT...
  [33] Performer...
  [34] PatchTST...
  [35] Autoformer...
  [36] iTransformer...
  [37] EtherVoyant...
  [38] VMD_Hybrid...
    ⚠ VMD_Hybrid 스킵: NameError: name 'inputs' is not defined
    상세: Traceback (most recent call last):
  File "/tmp/ipykernel_1210809/987515945.py", line 132, in train_dl_model
    model = model_config['func'](
  File "/tmp/ipykernel_1210809/2720413276.py", line 780, in vmd_hybrid
    x = Conv1D(32, 1, padding='same', kernel_regularizer=l2(0.01))(inputs)
NameError: name 'inputs' is not defined

  [39] SimpleRNN...
  [40] EMD_LSTM...
  [41] Hybrid_LSTM_GRU...
  [42] Parallel_CNN...
  [43] LSTM_XGBoost_Hybrid...
  [44] Residual_LSTM...
  [45] WaveNet...

✓ DL 모델 완료: 26/27개 성공

전체 학습 완료: 43/45개 모델 성공
  Fold 3 completed

  Processing Fold 4/18

Classification 모델 학습 시작 (총 45개 모델)

[Part 1/2] Machine Learning 모델 (18개)
----------------------------------------------

W0000 00:00:1760977343.455063 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 31719424 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1760977345.158222 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environ



W0000 00:00:1760977417.348062 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 31719424 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }


  [27] Transformer...
  [28] TCN...
  [29] DTW_LSTM...
  [30] Informer...
  [31] NBEATS...
  [32] TFT...
  [33] Performer...
  [34] PatchTST...
  [35] Autoformer...
  [36] iTransformer...
  [37] EtherVoyant...
  [38] VMD_Hybrid...
    ⚠ VMD_Hybrid 스킵: NameError: name 'inputs' is not defined
    상세: Traceback (most recent call last):
  File "/tmp/ipykernel_1210809/987515945.py", line 132, in train_dl_model
    model = model_config['func'](
  File "/tmp/ipykernel_1210809/2720413276.py", line 780, in vmd_hybrid
    x = Conv1D(32, 1, padding='same', kernel_regularizer=l2(0.01))(inputs)
NameError: name 'inputs' is not defined

  [39] SimpleRNN...
  [40] EMD_LSTM...
  [41] Hybrid_LSTM_GRU...
  [42] Parallel_CNN...
  [43] LSTM_XGBoost_Hybrid...
  [44] Residual_LSTM...
  [45] WaveNet...

✓ DL 모델 완료: 26/27개 성공

전체 학습 완료: 43/45개 모델 성공
  Fold 4 completed

  Processing Fold 5/18

Classification 모델 학습 시작 (총 45개 모델)

[Part 1/2] Machine Learning 모델 (18개)
----------------------------------------------

W0000 00:00:1760979124.742638 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 31719424 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1760979126.724358 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environ

 6/26 [=====>........................] - ETA: 0s 

W0000 00:00:1760979208.548971 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 31719424 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }


  [27] Transformer...
  [28] TCN...
  [29] DTW_LSTM...
  [30] Informer...
  [31] NBEATS...
  [32] TFT...
  [33] Performer...
  [34] PatchTST...
  [35] Autoformer...
  [36] iTransformer...
  [37] EtherVoyant...
  [38] VMD_Hybrid...
    ⚠ VMD_Hybrid 스킵: NameError: name 'inputs' is not defined
    상세: Traceback (most recent call last):
  File "/tmp/ipykernel_1210809/987515945.py", line 132, in train_dl_model
    model = model_config['func'](
  File "/tmp/ipykernel_1210809/2720413276.py", line 780, in vmd_hybrid
    x = Conv1D(32, 1, padding='same', kernel_regularizer=l2(0.01))(inputs)
NameError: name 'inputs' is not defined

  [39] SimpleRNN...
  [40] EMD_LSTM...
  [41] Hybrid_LSTM_GRU...
  [42] Parallel_CNN...
  [43] LSTM_XGBoost_Hybrid...
  [44] Residual_LSTM...
  [45] WaveNet...

✓ DL 모델 완료: 26/27개 성공

전체 학습 완료: 43/45개 모델 성공
  Fold 5 completed

  Processing Fold 6/18

Classification 모델 학습 시작 (총 45개 모델)

[Part 1/2] Machine Learning 모델 (18개)
----------------------------------------------

W0000 00:00:1760981269.989101 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 31719424 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1760981271.927106 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environ

 6/28 [=====>........................] - ETA: 0s 

W0000 00:00:1760981342.258585 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 31719424 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }


  [27] Transformer...
  [28] TCN...
  [29] DTW_LSTM...
  [30] Informer...
  [31] NBEATS...
  [32] TFT...
  [33] Performer...
  [34] PatchTST...
  [35] Autoformer...
  [36] iTransformer...
  [37] EtherVoyant...
  [38] VMD_Hybrid...
    ⚠ VMD_Hybrid 스킵: NameError: name 'inputs' is not defined
    상세: Traceback (most recent call last):
  File "/tmp/ipykernel_1210809/987515945.py", line 132, in train_dl_model
    model = model_config['func'](
  File "/tmp/ipykernel_1210809/2720413276.py", line 780, in vmd_hybrid
    x = Conv1D(32, 1, padding='same', kernel_regularizer=l2(0.01))(inputs)
NameError: name 'inputs' is not defined

  [39] SimpleRNN...
  [40] EMD_LSTM...
  [41] Hybrid_LSTM_GRU...
  [42] Parallel_CNN...
  [43] LSTM_XGBoost_Hybrid...
  [44] Residual_LSTM...
  [45] WaveNet...

✓ DL 모델 완료: 26/27개 성공

전체 학습 완료: 43/45개 모델 성공
  Fold 6 completed

  Processing Fold 7/18

Classification 모델 학습 시작 (총 45개 모델)

[Part 1/2] Machine Learning 모델 (18개)
----------------------------------------------

W0000 00:00:1760983437.679643 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 31719424 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1760983439.744354 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environ

 5/30 [====>.........................] - ETA: 0s 

W0000 00:00:1760983543.281684 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 31719424 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }


  [27] Transformer...
  [28] TCN...
  [29] DTW_LSTM...
  [30] Informer...
  [31] NBEATS...
  [32] TFT...
  [33] Performer...
  [34] PatchTST...
  [35] Autoformer...
  [36] iTransformer...
  [37] EtherVoyant...
  [38] VMD_Hybrid...
    ⚠ VMD_Hybrid 스킵: NameError: name 'inputs' is not defined
    상세: Traceback (most recent call last):
  File "/tmp/ipykernel_1210809/987515945.py", line 132, in train_dl_model
    model = model_config['func'](
  File "/tmp/ipykernel_1210809/2720413276.py", line 780, in vmd_hybrid
    x = Conv1D(32, 1, padding='same', kernel_regularizer=l2(0.01))(inputs)
NameError: name 'inputs' is not defined

  [39] SimpleRNN...
  [40] EMD_LSTM...
  [41] Hybrid_LSTM_GRU...
  [42] Parallel_CNN...
  [43] LSTM_XGBoost_Hybrid...
  [44] Residual_LSTM...
  [45] WaveNet...

✓ DL 모델 완료: 26/27개 성공

전체 학습 완료: 43/45개 모델 성공
  Fold 7 completed

  Processing Fold 8/18

Classification 모델 학습 시작 (총 45개 모델)

[Part 1/2] Machine Learning 모델 (18개)
----------------------------------------------

W0000 00:00:1760985827.892197 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 31719424 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1760985830.055616 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environ

 5/31 [===>..........................] - ETA: 0s 

W0000 00:00:1760985910.567429 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 31719424 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }


  [27] Transformer...
  [28] TCN...
  [29] DTW_LSTM...
  [30] Informer...
  [31] NBEATS...
  [32] TFT...
  [33] Performer...
  [34] PatchTST...
  [35] Autoformer...
  [36] iTransformer...
  [37] EtherVoyant...
  [38] VMD_Hybrid...
    ⚠ VMD_Hybrid 스킵: NameError: name 'inputs' is not defined
    상세: Traceback (most recent call last):
  File "/tmp/ipykernel_1210809/987515945.py", line 132, in train_dl_model
    model = model_config['func'](
  File "/tmp/ipykernel_1210809/2720413276.py", line 780, in vmd_hybrid
    x = Conv1D(32, 1, padding='same', kernel_regularizer=l2(0.01))(inputs)
NameError: name 'inputs' is not defined

  [39] SimpleRNN...
  [40] EMD_LSTM...
  [41] Hybrid_LSTM_GRU...
  [42] Parallel_CNN...
  [43] LSTM_XGBoost_Hybrid...
  [44] Residual_LSTM...
  [45] WaveNet...

✓ DL 모델 완료: 26/27개 성공

전체 학습 완료: 43/45개 모델 성공
  Fold 8 completed

  Processing Fold 9/18

Classification 모델 학습 시작 (총 45개 모델)

[Part 1/2] Machine Learning 모델 (18개)
----------------------------------------------

W0000 00:00:1760988167.943317 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 31719424 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1760988170.298119 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environ

 5/33 [===>..........................] - ETA: 0s 

W0000 00:00:1760988310.697573 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 31719424 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }


  [27] Transformer...
  [28] TCN...
  [29] DTW_LSTM...
  [30] Informer...
  [31] NBEATS...
  [32] TFT...
  [33] Performer...
  [34] PatchTST...
  [35] Autoformer...
  [36] iTransformer...
  [37] EtherVoyant...
  [38] VMD_Hybrid...
    ⚠ VMD_Hybrid 스킵: NameError: name 'inputs' is not defined
    상세: Traceback (most recent call last):
  File "/tmp/ipykernel_1210809/987515945.py", line 132, in train_dl_model
    model = model_config['func'](
  File "/tmp/ipykernel_1210809/2720413276.py", line 780, in vmd_hybrid
    x = Conv1D(32, 1, padding='same', kernel_regularizer=l2(0.01))(inputs)
NameError: name 'inputs' is not defined

  [39] SimpleRNN...
  [40] EMD_LSTM...
  [41] Hybrid_LSTM_GRU...
  [42] Parallel_CNN...
  [43] LSTM_XGBoost_Hybrid...
  [44] Residual_LSTM...
  [45] WaveNet...

✓ DL 모델 완료: 26/27개 성공

전체 학습 완료: 43/45개 모델 성공
  Fold 9 completed

  Processing Fold 10/18

Classification 모델 학습 시작 (총 45개 모델)

[Part 1/2] Machine Learning 모델 (18개)
---------------------------------------------

W0000 00:00:1760990809.579981 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 31719424 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1760990811.786303 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environ

 6/35 [====>.........................] - ETA: 0s 

W0000 00:00:1760990925.165851 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 31719424 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }


  [27] Transformer...
  [28] TCN...
  [29] DTW_LSTM...
  [30] Informer...
  [31] NBEATS...
  [32] TFT...
  [33] Performer...
  [34] PatchTST...
  [35] Autoformer...
  [36] iTransformer...
  [37] EtherVoyant...
  [38] VMD_Hybrid...
    ⚠ VMD_Hybrid 스킵: NameError: name 'inputs' is not defined
    상세: Traceback (most recent call last):
  File "/tmp/ipykernel_1210809/987515945.py", line 132, in train_dl_model
    model = model_config['func'](
  File "/tmp/ipykernel_1210809/2720413276.py", line 780, in vmd_hybrid
    x = Conv1D(32, 1, padding='same', kernel_regularizer=l2(0.01))(inputs)
NameError: name 'inputs' is not defined

  [39] SimpleRNN...
  [40] EMD_LSTM...
  [41] Hybrid_LSTM_GRU...
  [42] Parallel_CNN...
  [43] LSTM_XGBoost_Hybrid...
  [44] Residual_LSTM...
  [45] WaveNet...

✓ DL 모델 완료: 26/27개 성공

전체 학습 완료: 43/45개 모델 성공
  Fold 10 completed

  Processing Fold 11/18

Classification 모델 학습 시작 (총 45개 모델)

[Part 1/2] Machine Learning 모델 (18개)
--------------------------------------------

W0000 00:00:1760993692.228298 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 31719424 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1760993694.548174 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environ

 5/37 [===>..........................] - ETA: 0s 

W0000 00:00:1760993798.170229 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 31719424 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }


  [27] Transformer...
  [28] TCN...
  [29] DTW_LSTM...
  [30] Informer...
  [31] NBEATS...
  [32] TFT...
  [33] Performer...
  [34] PatchTST...
  [35] Autoformer...
  [36] iTransformer...
  [37] EtherVoyant...
  [38] VMD_Hybrid...
    ⚠ VMD_Hybrid 스킵: NameError: name 'inputs' is not defined
    상세: Traceback (most recent call last):
  File "/tmp/ipykernel_1210809/987515945.py", line 132, in train_dl_model
    model = model_config['func'](
  File "/tmp/ipykernel_1210809/2720413276.py", line 780, in vmd_hybrid
    x = Conv1D(32, 1, padding='same', kernel_regularizer=l2(0.01))(inputs)
NameError: name 'inputs' is not defined

  [39] SimpleRNN...
  [40] EMD_LSTM...
  [41] Hybrid_LSTM_GRU...
  [42] Parallel_CNN...
  [43] LSTM_XGBoost_Hybrid...
  [44] Residual_LSTM...
  [45] WaveNet...

✓ DL 모델 완료: 26/27개 성공

전체 학습 완료: 43/45개 모델 성공
  Fold 11 completed

  Processing Fold 12/18

Classification 모델 학습 시작 (총 45개 모델)

[Part 1/2] Machine Learning 모델 (18개)
--------------------------------------------

W0000 00:00:1760996271.080364 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 31719424 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1760996273.691145 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environ

 5/39 [==>...........................] - ETA: 0s 

W0000 00:00:1760996413.813977 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 31719424 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }


  [27] Transformer...
  [28] TCN...
  [29] DTW_LSTM...
  [30] Informer...
  [31] NBEATS...
  [32] TFT...
  [33] Performer...
  [34] PatchTST...
  [35] Autoformer...
  [36] iTransformer...
  [37] EtherVoyant...
  [38] VMD_Hybrid...
    ⚠ VMD_Hybrid 스킵: NameError: name 'inputs' is not defined
    상세: Traceback (most recent call last):
  File "/tmp/ipykernel_1210809/987515945.py", line 132, in train_dl_model
    model = model_config['func'](
  File "/tmp/ipykernel_1210809/2720413276.py", line 780, in vmd_hybrid
    x = Conv1D(32, 1, padding='same', kernel_regularizer=l2(0.01))(inputs)
NameError: name 'inputs' is not defined

  [39] SimpleRNN...
  [40] EMD_LSTM...
  [41] Hybrid_LSTM_GRU...


IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



  [41] Hybrid_LSTM_GRU...
  [42] Parallel_CNN...
  [43] LSTM_XGBoost_Hybrid...
  [44] Residual_LSTM...
  [45] WaveNet...

✓ DL 모델 완료: 26/27개 성공

전체 학습 완료: 43/45개 모델 성공
  Fold 15 completed

  Processing Fold 16/18

Classification 모델 학습 시작 (총 45개 모델)

[Part 1/2] Machine Learning 모델 (18개)
--------------------------------------------------------------------------------
  [1] RandomForest...
  [2] LightGBM...
  [3] XGBoost...
  [4] SVM...
  [5] LogisticRegression...
  [6] NaiveBayes...
  [7] KNN...
  [8] AdaBoost...
  [9] CatBoost...
  [10] DecisionTree...
  [11] ExtraTrees...
  [12] Bagging...
  [13] GradientBoosting...
  [14] TabNet...

Early stopping occurred at epoch 29 with best_epoch = 9 and best_val_0_auc = 0.66667
  [15] StackingEnsemble...
  [16] VotingHard...
  [17] VotingSoft...
  [18] MLP...
    ⚠ MLP 스킵: ValueError: Classification metrics can't handle a mix of binary and continuous targets
    상세: Traceback (most recent call last):
  File "/tmp/ipykernel_1210809/987515945.py", 

W0000 00:00:1761008763.151814 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 31719424 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1761008766.120099 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environ

 5/46 [==>...........................] - ETA: 0s 

W0000 00:00:1761008918.100150 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 31719424 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }


  [27] Transformer...
  [28] TCN...
  [29] DTW_LSTM...
  [30] Informer...
  [31] NBEATS...
  [32] TFT...
  [33] Performer...
  [34] PatchTST...
  [35] Autoformer...
  [36] iTransformer...
  [37] EtherVoyant...
  [38] VMD_Hybrid...
    ⚠ VMD_Hybrid 스킵: NameError: name 'inputs' is not defined
    상세: Traceback (most recent call last):
  File "/tmp/ipykernel_1210809/987515945.py", line 132, in train_dl_model
    model = model_config['func'](
  File "/tmp/ipykernel_1210809/2720413276.py", line 780, in vmd_hybrid
    x = Conv1D(32, 1, padding='same', kernel_regularizer=l2(0.01))(inputs)
NameError: name 'inputs' is not defined

  [39] SimpleRNN...
  [40] EMD_LSTM...
  [41] Hybrid_LSTM_GRU...
  [42] Parallel_CNN...
  [43] LSTM_XGBoost_Hybrid...
  [44] Residual_LSTM...
  [45] WaveNet...

✓ DL 모델 완료: 26/27개 성공

전체 학습 완료: 43/45개 모델 성공
  Fold 16 completed

  Processing Fold 17/18

Classification 모델 학습 시작 (총 45개 모델)

[Part 1/2] Machine Learning 모델 (18개)
--------------------------------------------

W0000 00:00:1761012366.423174 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 31719424 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1761012369.263054 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environ

 5/48 [==>...........................] - ETA: 0s 

W0000 00:00:1761012514.338051 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 31719424 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }


  [27] Transformer...
  [28] TCN...
  [29] DTW_LSTM...
  [30] Informer...
  [31] NBEATS...
  [32] TFT...
  [33] Performer...
  [34] PatchTST...
  [35] Autoformer...
  [36] iTransformer...
  [37] EtherVoyant...
  [38] VMD_Hybrid...
    ⚠ VMD_Hybrid 스킵: NameError: name 'inputs' is not defined
    상세: Traceback (most recent call last):
  File "/tmp/ipykernel_1210809/987515945.py", line 132, in train_dl_model
    model = model_config['func'](
  File "/tmp/ipykernel_1210809/2720413276.py", line 780, in vmd_hybrid
    x = Conv1D(32, 1, padding='same', kernel_regularizer=l2(0.01))(inputs)
NameError: name 'inputs' is not defined

  [39] SimpleRNN...
  [40] EMD_LSTM...
  [41] Hybrid_LSTM_GRU...
  [42] Parallel_CNN...
  [43] LSTM_XGBoost_Hybrid...
  [44] Residual_LSTM...
  [45] WaveNet...

✓ DL 모델 완료: 26/27개 성공

전체 학습 완료: 43/45개 모델 성공
  Fold 17 completed

  Processing Fold 18/18

Classification 모델 학습 시작 (총 45개 모델)

[Part 1/2] Machine Learning 모델 (18개)
--------------------------------------------

W0000 00:00:1761015951.122755 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 31719424 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1761015953.647871 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environ

 6/50 [==>...........................] - ETA: 0s 

W0000 00:00:1761016068.839158 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 31719424 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }


  [27] Transformer...
  [28] TCN...
  [29] DTW_LSTM...
  [30] Informer...
  [31] NBEATS...
  [32] TFT...
  [33] Performer...
  [34] PatchTST...
  [35] Autoformer...
  [36] iTransformer...
  [37] EtherVoyant...
  [38] VMD_Hybrid...
    ⚠ VMD_Hybrid 스킵: NameError: name 'inputs' is not defined
    상세: Traceback (most recent call last):
  File "/tmp/ipykernel_1210809/987515945.py", line 132, in train_dl_model
    model = model_config['func'](
  File "/tmp/ipykernel_1210809/2720413276.py", line 780, in vmd_hybrid
    x = Conv1D(32, 1, padding='same', kernel_regularizer=l2(0.01))(inputs)
NameError: name 'inputs' is not defined

  [39] SimpleRNN...
  [40] EMD_LSTM...
  [41] Hybrid_LSTM_GRU...
  [42] Parallel_CNN...
  [43] LSTM_XGBoost_Hybrid...
  [44] Residual_LSTM...
  [45] WaveNet...

✓ DL 모델 완료: 26/27개 성공

전체 학습 완료: 43/45개 모델 성공
  Fold 18 completed

  Aggregating 18 folds...
Saved: model_results/direction_walk_forward_detailed.csv
Saved: model_results/direction_walk_forward.csv

Experiment

  [24] CNN_GRU...
  [25] CNN_BiLSTM...
  [26] LSTM_Attention...


W0000 00:00:1761019168.862825 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 31719424 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
W0000 00:00:1761019171.390841 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environ

 5/38 [==>...........................] - ETA: 0s 

W0000 00:00:1761019289.771805 1210809 op_level_cost_estimator.cc:699] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2100 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 31719424 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }


  [27] Transformer...
  [28] TCN...
  [29] DTW_LSTM...
  [30] Informer...
  [31] NBEATS...
  [32] TFT...
  [33] Performer...
  [34] PatchTST...
  [35] Autoformer...
  [36] iTransformer...
  [37] EtherVoyant...
  [38] VMD_Hybrid...
    ⚠ VMD_Hybrid 스킵: NameError: name 'inputs' is not defined
    상세: Traceback (most recent call last):
  File "/tmp/ipykernel_1210809/987515945.py", line 132, in train_dl_model
    model = model_config['func'](
  File "/tmp/ipykernel_1210809/2720413276.py", line 780, in vmd_hybrid
    x = Conv1D(32, 1, padding='same', kernel_regularizer=l2(0.01))(inputs)
NameError: name 'inputs' is not defined

  [39] SimpleRNN...
  [40] EMD_LSTM...
  [41] Hybrid_LSTM_GRU...
  [42] Parallel_CNN...
  [43] LSTM_XGBoost_Hybrid...
  [44] Residual_LSTM...
  [45] WaveNet...

✓ DL 모델 완료: 26/27개 성공

전체 학습 완료: 43/45개 모델 성공
Saved: model_results/direction_tvt.csv

Experiment: return x walk_forward
Auto-calculated n_splits: 18 (from 1752 days)

Walk-Forward Configuration
Total data: 175

선택된 지표들
DPO_20, btc_eth_strength_ratio, ADOSC_3_10, eth_contract_events, btc_return_lag1, btc_return_20d, eth_large_eth_transfers, volume_lag5, return_lag2, return_lag3, CCI_14, PRICE_CHANGE_5, PRICE_VS_SMA10, CLOSE_LOW_RANGE, GAP, high_lag3_ratio, low_lag3_ratio, btc_return_10d, ada_volume_change, doge_return, eth_large_eth_transfers_lag1, bnb_return, ISA_9, aave_aave_eth_tvl_lag1, makerdao_makerdao_eth_tvl, sentiment_volatility_7, gold_GOLD_lag1, chain_eth_chain_tvl_lag1, ISB_26, KCL_20, news_volume_ma7, eth_close_position, INTRADAY_POSITION, month_cos, high_lag2, high_lag1_ratio, close_lag1, makerdao_makerdao_eth_tvl_lag1, extreme_positive_count_lag1, eth_btc_beta_90d
선택된 지표들
DPO_20, btc_eth_strength_ratio, volume_lag5, Acceleration_Momentum, btc_return_lag1, eth_contract_events, chain_eth_chain_tvl, btc_volatility_7d, ADOSC_3_10, close_lag5_ratio, btc_return_20d, eth_large_eth_transfers, return_lag2, return_lag3, PRICE_CHANGE_5, CLOSE_LOW_RANGE, GAP, btc_return_lag3, btc_dominance,

선택된 지표들
DPO_20, eth_large_eth_transfers, volume_lag5, sentiment_ma14, volume_lag1, return_lag3, return_lag5, ADOSC_3_10, PRICE_VS_SMA10, GAP, eth_btc_spread_ma7, close_lag2_logret, close_lag3_ratio, high_lag2_ratio, high_lag3_ratio, low_lag5_ratio, btc_return_10d, doge_return, eth_large_eth_transfers_lag1, VOLATILITY_10, lido_lido_eth_tvl_lag1, VOLATILITY_20, usdt_totalCirculatingUSD, BBM_50, EMA_50, usdt_totalCirculating, RV_20, BTC_Weighted_Impact, btc_volatility_30d, news_volume_ma7, KCL_20, Liquidity_Risk, eth_total_gas_used_lag1, ISB_26, volume_lag3, PRICE_CHANGE_2, PRICE_VS_SMA200, close_lag2_ratio, eth_contract_events, aave_aave_eth_tvl_lag1
선택된 지표들
DPO_20, eth_large_eth_transfers, low_lag7_ratio, dxy_DXY_lag1, volume_lag5, PRICE_VS_SMA20, return_lag3, return_lag5, ADOSC_3_10, PRICE_VS_SMA10, SLOPE_5, BB_WIDTH, GAP, btc_return_lag2, btc_return_lag3, Acceleration_Momentum, close_lag3_ratio, low_lag2_ratio, high_lag3_ratio, low_lag3_ratio, high_lag5_ratio, sol_return, eth_large_et

In [None]:
print(type(y_train),type(y_val),type(y_test),type(X_train),type(X_val),type(X_test))

In [None]:
y_train,y_val,y_test,X_train,X_val,X_test


In [None]:
print(summary_df)