In [12]:
import pandas as pd
import numpy as np
import os
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

data_dir = "../model_results/predictions/direction_walk_forward"
output_dir = "backtest_results_enhanced"

class EnhancedCryptoBacktester:
    def __init__(self, initial_capital=10000, trading_fee=0.001, slippage=0.0005, 
                 risk_free_rate=0.0, min_trades_threshold=30):
        self.initial_capital = float(initial_capital)
        self.trading_fee = float(trading_fee)
        self.slippage = float(slippage)
        self.risk_free_rate = float(risk_free_rate)
        self.min_trades = min_trades_threshold

    def calculate_kelly_fraction(self, trades_history):
        if len(trades_history) < 10:
            return 0.05
        profits = np.array([t['net_profit'] for t in trades_history])
        wins = profits[profits > 0]
        losses = profits[profits < 0]
        if len(wins) == 0 or len(losses) == 0:
            return 0.05
        win_rate = len(wins) / len(profits)
        avg_win = np.mean(wins)
        avg_loss = np.abs(np.mean(losses))
        if avg_loss == 0 or np.isnan(avg_loss):
            return 0.05
        win_loss_ratio = avg_win / avg_loss
        kelly = (win_rate * win_loss_ratio - (1 - win_rate)) / win_loss_ratio
        kelly_fraction = max(0.01, min(kelly * 0.5, 0.2))
        if np.isnan(kelly_fraction) or kelly_fraction <= 0:
            return 0.01
        return kelly_fraction

    def get_position_fraction(self, row, idx, df, trades, strategy_type, kelly_lookback, vol_lookback, mom_lookback):
        if strategy_type == 'aggressive':
            return 0.3
        elif strategy_type == 'neutral':
            return min(row['max_proba'], 1.0) * 0.15
        elif strategy_type == 'conservative':
            return 0.05
        elif strategy_type == 'kelly' and len(trades) >= 20:
            recent_trades = trades[-kelly_lookback:]
            return self.calculate_kelly_fraction(recent_trades)
        elif strategy_type == 'ultra_aggressive':
            return 0.8
        elif strategy_type == 'fixed_50':
            return 0.5
        elif strategy_type == 'fixed_01':
            return 0.01
        elif strategy_type == 'volatility_scaled':
            start_idx = max(0, idx - vol_lookback)
            recent_returns = df.iloc[start_idx:idx]['actual_return']
            vol = np.std(recent_returns) if len(recent_returns) > 1 else 0.01
            return min(0.3, max(0.01, 0.1 / (vol + 1e-6)))
        elif strategy_type == 'inverse_volatility':
            start_idx = max(0, idx - vol_lookback)
            recent_returns = df.iloc[start_idx:idx]['actual_return']
            vol = np.std(recent_returns) if len(recent_returns) > 1 else 0.01
            return min(0.3, max(0.01, 0.2 / (vol + 1e-6)))
        elif strategy_type == 'trend_following':
            start_idx = max(0, idx - mom_lookback)
            recent_returns = df.iloc[start_idx:idx]['actual_return']
            trend = np.sum(recent_returns)
            if trend > 0:
                return 0.3
            elif trend < 0:
                return 0.05
            else:
                return 0.1
        elif strategy_type == 'momentum':
            start_idx = max(0, idx - mom_lookback)
            recent_returns = df.iloc[start_idx:idx]['actual_return']
            mom = np.sum(recent_returns)
            return min(0.3, max(0.01, 0.1 + 0.2 * (mom / (mom_lookback * 0.01))))
        else:
            return 0.1

    def backtest_strategy(self, df, confidence_threshold=0.0, strategy_type='kelly', kelly_lookback=50, vol_lookback=20, mom_lookback=10):
        df = df.copy()
        df = df.sort_values('date').reset_index(drop=True)
        capital = self.initial_capital
        equity_curve = []
        trades = []
        daily_returns = []
        for idx, row in df.iterrows():
            prev_capital = capital
            if row['confidence'] < confidence_threshold or capital <= 0:
                equity_curve.append(capital)
                daily_returns.append(0.0)
                continue
            position_fraction = self.get_position_fraction(row, idx, df, trades, strategy_type, kelly_lookback, vol_lookback, mom_lookback)
            position_fraction = max(0.01, min(position_fraction, 0.9))
            position_size = capital * position_fraction
            if position_size <= 0:
                equity_curve.append(capital)
                daily_returns.append(0.0)
                continue
            predicted_direction = int(row['pred_direction'])
            actual_return = float(row['actual_return'])
            gross_return = actual_return if predicted_direction == 1 else -actual_return
            gross_profit = position_size * gross_return
            trading_cost = position_size * (self.trading_fee + self.slippage) * 2
            net_profit = gross_profit - trading_cost
            capital = max(0, capital + net_profit)
            daily_return = (capital - prev_capital) / prev_capital if prev_capital > 0 else 0
            equity_curve.append(capital)
            daily_returns.append(daily_return)
            trade = {
                'date': row['date'],
                'pred_direction': predicted_direction,
                'actual_return': actual_return,
                'position_fraction': position_fraction,
                'position_size': position_size,
                'gross_profit': gross_profit,
                'trading_cost': trading_cost,
                'net_profit': net_profit,
                'capital': capital,
                'correct': bool(row['correct']),
                'confidence': float(row['confidence']),
                'max_proba': float(row['max_proba'])
            }
            trades.append(trade)
            if capital <= 0:
                break
        return equity_curve, trades, daily_returns

    def calculate_statistical_significance(self, trades, daily_returns):
        if len(trades) < self.min_trades:
            return {
                'sufficient_trades': False,
                'num_trades': len(trades),
                't_statistic': np.nan,
                'p_value': np.nan,
                'is_significant': False,
                'is_very_significant': False,
                'confidence_95_lower': np.nan,
                'confidence_95_upper': np.nan,
                'warning': f'Insufficient trades: {len(trades)} < {self.min_trades}'
            }
        returns_array = np.array(daily_returns)
        returns_array = returns_array[returns_array != 0]
        if len(returns_array) < 2:
            return {
                'sufficient_trades': True,
                'num_trades': len(trades),
                't_statistic': np.nan,
                'p_value': np.nan,
                'is_significant': False,
                'is_very_significant': False,
                'confidence_95_lower': np.nan,
                'confidence_95_upper': np.nan,
                'warning': 'Insufficient return data'
            }
        t_stat, p_value = stats.ttest_1samp(returns_array, 0)
        mean_return = np.mean(returns_array)
        std_error = stats.sem(returns_array)
        confidence_interval = stats.t.interval(0.95, len(returns_array)-1, 
                                               loc=mean_return, scale=std_error)
        is_very_significant = abs(t_stat) > 3.0 and p_value < 0.05
        is_significant = abs(t_stat) > 2.0 and p_value < 0.05
        return {
            'sufficient_trades': True,
            'num_trades': len(trades),
            't_statistic': float(t_stat),
            'p_value': float(p_value),
            'is_significant': is_significant,
            'is_very_significant': is_very_significant,
            'confidence_95_lower': float(confidence_interval[0]),
            'confidence_95_upper': float(confidence_interval[1]),
            'warning': None
        }

    def calculate_performance_metrics(self, equity_curve, trades, daily_returns):
        if len(trades) == 0:
            return self._empty_metrics()
        final_capital = equity_curve[-1] if equity_curve else self.initial_capital
        total_return = final_capital - self.initial_capital
        total_return_pct = (total_return / self.initial_capital) * 100
        num_trades = len(trades)
        profits = np.array([t['net_profit'] for t in trades])
        winning_trades = profits[profits > 0]
        losing_trades = profits[profits < 0]
        num_wins = len(winning_trades)
        num_losses = len(losing_trades)
        win_rate = (num_wins / num_trades * 100) if num_trades > 0 else 0
        avg_profit_per_trade = np.mean(profits)
        total_wins = np.sum(winning_trades) if num_wins > 0 else 0
        total_losses = np.abs(np.sum(losing_trades)) if num_losses > 0 else 0
        profit_factor = (total_wins / total_losses) if total_losses > 0 else 0
        avg_win = np.mean(winning_trades) if num_wins > 0 else 0
        avg_loss = np.abs(np.mean(losing_trades)) if num_losses > 0 else 0
        if avg_loss > 0:
            expectancy = (win_rate/100 * avg_win) - ((100-win_rate)/100 * avg_loss)
        else:
            expectancy = avg_profit_per_trade
        equity_array = np.array(equity_curve)
        running_max = np.maximum.accumulate(equity_array)
        drawdown_array = equity_array - running_max
        drawdown_pct_array = (drawdown_array / running_max) * 100
        max_drawdown_abs = np.min(drawdown_array)
        max_drawdown_pct = np.abs(np.min(drawdown_pct_array))
        returns_array = np.array(daily_returns)
        returns_array = returns_array[returns_array != 0]
        if len(returns_array) > 1 and np.std(returns_array) > 0:
            mean_return = np.mean(returns_array)
            std_return = np.std(returns_array, ddof=1)
            annual_mean = mean_return * 252
            annual_std = std_return * np.sqrt(252)
            sharpe_ratio = (annual_mean - self.risk_free_rate) / annual_std
        else:
            sharpe_ratio = 0.0
        downside_returns = returns_array[returns_array < 0]
        if len(downside_returns) > 1:
            downside_std = np.std(downside_returns, ddof=1) * np.sqrt(252)
            sortino_ratio = (np.mean(returns_array) * 252 - self.risk_free_rate) / downside_std
        else:
            sortino_ratio = 0.0
        num_days = len(equity_curve)
        years = num_days / 252
        if years > 0 and final_capital > 0 and self.initial_capital > 0:
            annual_return_pct = (np.power(final_capital / self.initial_capital, 1/years) - 1) * 100
        else:
            annual_return_pct = 0.0
        calmar_ratio = (annual_return_pct / max_drawdown_pct) if max_drawdown_pct > 0 else 0
        recovery_factor = (total_return / abs(max_drawdown_abs)) if max_drawdown_abs < 0 else 0
        consecutive_wins = 0
        consecutive_losses = 0
        max_consecutive_wins = 0
        max_consecutive_losses = 0
        for profit in profits:
            if profit > 0:
                consecutive_wins += 1
                consecutive_losses = 0
                max_consecutive_wins = max(max_consecutive_wins, consecutive_wins)
            else:
                consecutive_losses += 1
                consecutive_wins = 0
                max_consecutive_losses = max(max_consecutive_losses, consecutive_losses)
        significance = self.calculate_statistical_significance(trades, daily_returns)
        return {
            'initial_capital': self.initial_capital,
            'final_capital': final_capital,
            'total_return': total_return,
            'total_return_pct': total_return_pct,
            'annual_return_pct': annual_return_pct,
            'num_trades': num_trades,
            'num_wins': num_wins,
            'num_losses': num_losses,
            'win_rate': win_rate,
            'avg_profit_per_trade': avg_profit_per_trade,
            'avg_win': avg_win,
            'avg_loss': avg_loss,
            'expectancy': expectancy,
            'profit_factor': profit_factor,
            'max_drawdown': max_drawdown_abs,
            'max_drawdown_pct': max_drawdown_pct,
            'sharpe_ratio': sharpe_ratio,
            'sortino_ratio': sortino_ratio,
            'calmar_ratio': calmar_ratio,
            'recovery_factor': recovery_factor,
            'max_consecutive_wins': max_consecutive_wins,
            'max_consecutive_losses': max_consecutive_losses,
            'num_days': num_days,
            'years': years,
            'sufficient_trades': significance['sufficient_trades'],
            't_statistic': significance['t_statistic'],
            'p_value': significance['p_value'],
            'is_significant': significance['is_significant'],
            'is_very_significant': significance.get('is_very_significant', False),
            'confidence_95_lower': significance['confidence_95_lower'],
            'confidence_95_upper': significance['confidence_95_upper'],
            'stat_warning': significance['warning']
        }

    def _empty_metrics(self):
        return {
            'initial_capital': self.initial_capital,
            'final_capital': self.initial_capital,
            'total_return': 0.0,
            'total_return_pct': 0.0,
            'annual_return_pct': 0.0,
            'num_trades': 0,
            'num_wins': 0,
            'num_losses': 0,
            'win_rate': 0.0,
            'avg_profit_per_trade': 0.0,
            'avg_win': 0.0,
            'avg_loss': 0.0,
            'expectancy': 0.0,
            'profit_factor': 0.0,
            'max_drawdown': 0.0,
            'max_drawdown_pct': 0.0,
            'sharpe_ratio': 0.0,
            'sortino_ratio': 0.0,
            'calmar_ratio': 0.0,
            'recovery_factor': 0.0,
            'max_consecutive_wins': 0,
            'max_consecutive_losses': 0,
            'num_days': 0,
            'years': 0.0,
            'sufficient_trades': False,
            't_statistic': np.nan,
            'p_value': np.nan,
            'is_significant': False,
            'is_very_significant': False,
            'confidence_95_lower': np.nan,
            'confidence_95_upper': np.nan,
            'stat_warning': 'No trades'
        }
def run_comprehensive_backtest(data_dir, output_dir='backtest_results_enhanced', 
                               confidence_thresholds=None, strategy_types=None):
    os.makedirs(output_dir, exist_ok=True)
    if not os.path.exists(data_dir):
        print(f"ERROR: Directory not found: {data_dir}")
        return None
    csv_files = [f for f in os.listdir(data_dir) if f.endswith('_all_folds.csv')]
    if len(csv_files) == 0:
        print(f"ERROR: No CSV files found in {data_dir}")
        return None
    
    
    if confidence_thresholds is None:
        confidence_thresholds = [0.0, 0.05, 0.1, 0.15, 0.2]
        
        
    if strategy_types is None:
        strategy_types = [
            'kelly', 'aggressive', 'neutral', 'conservative',
            'ultra_aggressive', 'fixed_50', 'fixed_01',
            'volatility_scaled', 'inverse_volatility',
            'trend_following', 'momentum'
        ]

    print(f"\nProcessing {len(csv_files)} models...")
    print(f"Confidence thresholds: {confidence_thresholds}")
    print(f"Strategy types: {strategy_types}")
    print(f"Initial capital: $10,000")
    print(f"Trading costs: 0.3% round-trip\n")
    all_results = []
    failed_models = []
    for csv_file in sorted(csv_files):
        model_name = csv_file.replace('_all_folds.csv', '')
        try:
            file_path = os.path.join(data_dir, csv_file)
            df = pd.read_csv(file_path)
            if not validate_dataframe(df, model_name):
                failed_models.append(model_name)
                continue
            df['date'] = pd.to_datetime(df['date'])
            df = df.sort_values('date').reset_index(drop=True)
            for strategy_type in strategy_types:
                for threshold in confidence_thresholds:
                    backtester = EnhancedCryptoBacktester(
                        initial_capital=10000,
                        trading_fee=0.001,
                        slippage=0.0005,
                        risk_free_rate=0.0,
                        min_trades_threshold=30
                    )
                    equity_curve, trades, daily_returns = backtester.backtest_strategy(
                        df, 
                        confidence_threshold=threshold,
                        strategy_type=strategy_type,
                        kelly_lookback=50
                    )
                    metrics = backtester.calculate_performance_metrics(
                        equity_curve, trades, daily_returns
                    )
                    result = {
                        'model': model_name,
                        'strategy_type': strategy_type,
                        'confidence_threshold': threshold,
                        'total_predictions': len(df),
                        'base_accuracy': df['correct'].mean() * 100,
                        'date_start': df['date'].min(),
                        'date_end': df['date'].max(),
                        **metrics
                    }
                    all_results.append(result)
        except Exception as e:
            print(f"{model_name} failed: {e}")
            failed_models.append(model_name)
            continue
    if failed_models:
        print(f"Failed models: {', '.join(failed_models)}\n")
    if not all_results:
        print("ERROR: No successful backtests")
        return None
    results_df = pd.DataFrame(all_results)
    results_file = os.path.join(output_dir, 'backtest_results.csv')
    results_df.to_csv(results_file, index=False)
    print(f"Results saved: {results_file}\n")
    return results_df

def print_summary_dashboard(results_df, output_dir='backtest_results_enhanced'):
    for strategy_type in results_df['strategy_type'].unique():
        print(f"\n{'='*100}")
        print(f"STRATEGY: {strategy_type.upper()}")
        print(f"{'='*100}\n")
        analysis_data = results_df[
            (results_df['confidence_threshold'] == 0.1) & 
            (results_df['strategy_type'] == strategy_type)
        ].copy()
        significant_models = analysis_data[
            (analysis_data['is_significant'] == True) & 
            (analysis_data['sufficient_trades'] == True)
        ].copy()
        print(f"Total Models: {len(analysis_data)} | Significant: {len(significant_models)}\n")
        excellent = significant_models[significant_models['total_return'] > 0].copy()
        excellent['grade'] = excellent.apply(lambda x: 
            'A+' if (x['total_return_pct'] > 50 and x['sharpe_ratio'] > 3.5) else
            'A' if (x['total_return_pct'] > 30 and x['sharpe_ratio'] > 2.5) else
            'B' if (x['total_return_pct'] > 15 and x['sharpe_ratio'] > 1.5) else 'C', axis=1)
        top_10 = excellent.nlargest(10, 'total_return')
        print(f"{'Rank':<6} {'Model':<22} {'Grade':<7} {'Profit($)':<13} {'Return%':<9} {'Annual%':<9} {'Sharpe':<8} {'MaxDD%':<8} {'WinRate':<9} {'PF':<6} {'Exp':<8} {'Trades':<8} {'t-stat':<8} {'Acc%':<7}")
        print(f"{'-'*140}")
        for i, (idx, row) in enumerate(top_10.iterrows(), 1):
            print(f"{i:<6} {row['model']:<22} {row['grade']:<7} "
                  f"${row['total_return']:>10,.0f}  "
                  f"{row['total_return_pct']:>7.1f}%  "
                  f"{row['annual_return_pct']:>7.1f}%  "
                  f"{row['sharpe_ratio']:>6.2f}  "
                  f"{row['max_drawdown_pct']:>6.1f}%  "
                  f"{row['win_rate']:>7.1f}%  "
                  f"{row['profit_factor']:>4.2f}  "
                  f"{row['expectancy']:>6.2f}  "
                  f"{int(row['num_trades']):>6}  "
                  f"{row['t_statistic']:>7.2f}  "
                  f"{row['base_accuracy']:>5.1f}%")
        print(f"\n{'='*100}")
    return results_df


def print_fold_summary(data_dir, model_name, strategy_type, output_dir='backtest_results_enhanced'):
    file_path = os.path.join(data_dir, f"{model_name}_all_folds.csv")
    if not os.path.exists(file_path):
        return None
    df = pd.read_csv(file_path)
    df['date'] = pd.to_datetime(df['date'])
    df = df.sort_values('date').reset_index(drop=True)
    if 'fold' not in df.columns:
        return None
    print(f"\n{'-'*80}")
    print(f"FOLD ANALYSIS: {model_name} | STRATEGY: {strategy_type.upper()}")
    print(f"{'-'*80}\n")
    fold_results = []
    for fold in sorted(df['fold'].unique()):
        fold_data = df[df['fold'] == fold].copy()
        backtester = EnhancedCryptoBacktester(initial_capital=10000, min_trades_threshold=30)
        equity_curve, trades, daily_returns = backtester.backtest_strategy(
            fold_data, confidence_threshold=0.1, strategy_type=strategy_type
        )
        metrics = backtester.calculate_performance_metrics(equity_curve, trades, daily_returns)
        status = 'VERY_SIG' if metrics['is_very_significant'] else 'SIG' if metrics['is_significant'] else 'NOT_SIG'
        print(f"Fold {fold} | {fold_data['date'].min().date()} to {fold_data['date'].max().date()}")
        print(f"  {len(fold_data)} days | {int(metrics['num_trades'])} trades | "
              f"${metrics['total_return']:>8,.0f} ({metrics['total_return_pct']:>5.1f}%) | "
              f"Sharpe {metrics['sharpe_ratio']:>5.2f} | Win {metrics['win_rate']:>5.1f}% | "
              f"{status} (t={metrics['t_statistic']:.2f})\n")
        fold_result = {
            'fold': fold,
            'date_start': fold_data['date'].min(),
            'date_end': fold_data['date'].max(),
            'num_days': len(fold_data),
            'accuracy': fold_data['correct'].mean() * 100,
            **metrics
        }
        fold_results.append(fold_result)
    if len(fold_results) > 1:
        returns = [m['annual_return_pct'] for m in fold_results]
        mean_return = np.mean(returns)
        std_return = np.std(returns)
        consistency = (1 / (1 + abs(std_return / mean_return) if mean_return != 0 else 1)) * 100
        print(f"{'-'*80}")
        print(f"Walk-Forward Efficiency: {consistency:.1f}% | Avg Annual Return: {mean_return:.1f}% (±{std_return:.1f}%)")
        print(f"{'-'*80}\n")
    return fold_results

results_df = run_comprehensive_backtest(data_dir, output_dir)
if results_df is not None:
    print_summary_dashboard(results_df, output_dir)
    for strategy_type in ['kelly', 'aggressive', 'neutral', 'conservative', 'ultra_aggressive', 'fixed_50', 'fixed_01', 'volatility_scaled', 'inverse_volatility', 'trend_following', 'momentum']:
        top_models = results_df[
            (results_df['confidence_threshold'] == 0.1) & 
            (results_df['strategy_type'] == strategy_type)
        ].nlargest(3, 'total_return')
        print(f"\n{'='*100}")
        print(f"TOP 3 MODELS FOLD-BY-FOLD ANALYSIS ({strategy_type.upper()})")
        print(f"{'='*100}")
        for idx, row in top_models.iterrows():
            print_fold_summary(data_dir, row['model'], strategy_type, output_dir)




Processing 26 models...
Confidence thresholds: [0.0, 0.05, 0.1, 0.15, 0.2]
Strategy types: ['kelly', 'aggressive', 'neutral', 'conservative', 'ultra_aggressive', 'fixed_50', 'fixed_01', 'volatility_scaled', 'inverse_volatility', 'trend_following', 'momentum']
Initial capital: $10,000
Trading costs: 0.3% round-trip

Results saved: backtest_results_enhanced/backtest_results.csv


STRATEGY: KELLY

Total Models: 26 | Significant: 20

Rank   Model                  Grade   Profit($)     Return%   Annual%   Sharpe   MaxDD%   WinRate   PF     Exp      Trades   t-stat   Acc%   
--------------------------------------------------------------------------------------------------------------------------------------------
1      AdaBoost               A+      $     8,743     87.4%     35.7%    4.77     2.3%     59.9%  2.32   24.15     362     5.72   62.9%
2      VotingSoft             A+      $     7,097     71.0%     29.8%    3.81     3.8%     57.1%  2.14   16.02     443     5.05   62.0%
3      Gra

Fold 5 | 2025-01-01 to 2025-10-05
  278 days | 256 trades | $   4,342 ( 43.4%) | Sharpe  4.09 | Win  59.4% | VERY_SIG (t=4.12)

--------------------------------------------------------------------------------
Walk-Forward Efficiency: 54.8% | Avg Annual Return: 23.0% (±18.9%)
--------------------------------------------------------------------------------


TOP 3 MODELS FOLD-BY-FOLD ANALYSIS (AGGRESSIVE)

--------------------------------------------------------------------------------
FOLD ANALYSIS: XGBoost | STRATEGY: AGGRESSIVE
--------------------------------------------------------------------------------

Fold 1 | 2022-10-10 to 2022-12-08
  60 days | 55 trades | $   1,225 ( 12.3%) | Sharpe  2.39 | Win  52.7% | NOT_SIG (t=1.12)

Fold 2 | 2023-06-07 to 2023-08-05
  60 days | 55 trades | $     616 (  6.2%) | Sharpe  2.96 | Win  56.4% | NOT_SIG (t=1.38)

Fold 3 | 2024-02-02 to 2024-04-01
  60 days | 58 trades | $   2,368 ( 23.7%) | Sharpe  6.14 | Win  56.9% | SIG (t=2.95)

Fold 4 | 202

Fold 5 | 2025-01-01 to 2025-10-05
  278 days | 246 trades | $  60,804 (608.0%) | Sharpe  4.29 | Win  58.5% | VERY_SIG (t=4.24)

--------------------------------------------------------------------------------
Walk-Forward Efficiency: 59.2% | Avg Annual Return: 414.5% (±286.0%)
--------------------------------------------------------------------------------


--------------------------------------------------------------------------------
FOLD ANALYSIS: VotingSoft | STRATEGY: ULTRA_AGGRESSIVE
--------------------------------------------------------------------------------

Fold 1 | 2022-10-10 to 2022-12-08
  60 days | 55 trades | $   2,885 ( 28.8%) | Sharpe  2.14 | Win  50.9% | NOT_SIG (t=1.00)

Fold 2 | 2023-06-07 to 2023-08-05
  60 days | 51 trades | $   1,989 ( 19.9%) | Sharpe  3.64 | Win  60.8% | NOT_SIG (t=1.64)

Fold 3 | 2024-02-02 to 2024-04-01
  60 days | 57 trades | $   6,751 ( 67.5%) | Sharpe  6.47 | Win  57.9% | VERY_SIG (t=3.08)

Fold 4 | 2024-09-29 to 2024-11-27
  60 days |

Fold 5 | 2025-01-01 to 2025-10-05
  278 days | 246 trades | $  11,447 (114.5%) | Sharpe  4.29 | Win  58.5% | VERY_SIG (t=4.24)

--------------------------------------------------------------------------------
Walk-Forward Efficiency: 68.1% | Avg Annual Return: 82.4% (±38.6%)
--------------------------------------------------------------------------------


--------------------------------------------------------------------------------
FOLD ANALYSIS: VotingSoft | STRATEGY: VOLATILITY_SCALED
--------------------------------------------------------------------------------

Fold 1 | 2022-10-10 to 2022-12-08
  60 days | 55 trades | $   1,101 ( 11.0%) | Sharpe  2.14 | Win  50.9% | NOT_SIG (t=1.00)

Fold 2 | 2023-06-07 to 2023-08-05
  60 days | 51 trades | $     721 (  7.2%) | Sharpe  3.64 | Win  60.8% | NOT_SIG (t=1.64)

Fold 3 | 2024-02-02 to 2024-04-01
  60 days | 57 trades | $   2,182 ( 21.8%) | Sharpe  6.47 | Win  57.9% | VERY_SIG (t=3.08)

Fold 4 | 2024-09-29 to 2024-11-27
  60 days | 

Fold 5 | 2025-01-01 to 2025-10-05
  278 days | 256 trades | $   5,685 ( 56.8%) | Sharpe  3.57 | Win  59.4% | VERY_SIG (t=3.60)

--------------------------------------------------------------------------------
Walk-Forward Efficiency: 60.9% | Avg Annual Return: 48.4% (±31.0%)
--------------------------------------------------------------------------------


TOP 3 MODELS FOLD-BY-FOLD ANALYSIS (MOMENTUM)

--------------------------------------------------------------------------------
FOLD ANALYSIS: AdaBoost | STRATEGY: MOMENTUM
--------------------------------------------------------------------------------

Fold 1 | 2022-10-10 to 2022-12-08
  60 days | 35 trades | $     759 (  7.6%) | Sharpe  4.10 | Win  57.1% | NOT_SIG (t=1.53)

Fold 2 | 2023-06-07 to 2023-08-05
  60 days | 5 trades | $      94 (  0.9%) | Sharpe 16.41 | Win 100.0% | NOT_SIG (t=nan)

Fold 3 | 2024-02-02 to 2024-04-01
  60 days | 56 trades | $   1,490 ( 14.9%) | Sharpe  6.37 | Win  57.1% | VERY_SIG (t=3.00)

Fold 4 | 202

In [13]:
import pandas as pd
import numpy as np
import os
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

data_dir = "../model_results/predictions/direction_walk_forward"
output_dir = "backtest_results_enhanced"

def validate_dataframe(df, model_name):
    required_cols = ['date', 'pred_direction', 'actual_return', 'correct', 'confidence', 'max_proba']
    missing = [col for col in required_cols if col not in df.columns]
    if missing:
        print(f"{model_name}: Missing columns {missing}")
        return False
    if len(df) < 10:
        print(f"{model_name}: Insufficient data ({len(df)} rows)")
        return False
    if df['actual_return'].isna().all() or df['pred_direction'].isna().all():
        print(f"{model_name}: Critical columns contain all NaN values")
        return False
    return True

class EnhancedCryptoBacktester:
    def __init__(self, initial_capital=10000, trading_fee=0.001, slippage=0.0005, 
                 risk_free_rate=0.0, min_trades_threshold=30):
        self.initial_capital = float(initial_capital)
        self.trading_fee = float(trading_fee)
        self.slippage = float(slippage)
        self.risk_free_rate = float(risk_free_rate)
        self.min_trades = min_trades_threshold
        self.total_cost_rate = (self.trading_fee + self.slippage) * 2

    def calculate_kelly_fraction(self, profits):
        if len(profits) < 10:
            return 0.05
        profits = np.array(profits)
        wins = profits[profits > 0]
        losses = profits[profits < 0]
        if len(wins) == 0 or len(losses) == 0 or np.mean(losses) == 0:
            return 0.05
        win_rate = len(wins) / len(profits)
        avg_win = np.mean(wins)
        avg_loss = np.abs(np.mean(losses))
        win_loss_ratio = avg_win / avg_loss
        kelly = (win_rate * win_loss_ratio - (1 - win_rate)) / win_loss_ratio
        kelly_fraction = np.clip(kelly * 0.5, 0.01, 0.2)
        return kelly_fraction if not np.isnan(kelly_fraction) else 0.01

    def calculate_position_fractions_vectorized(self, df, strategy_type, kelly_lookback=50, 
                                                vol_lookback=20, mom_lookback=10):
        n = len(df)
        fractions = np.zeros(n)
        
        if strategy_type == 'aggressive':
            fractions[:] = 0.3
        elif strategy_type == 'neutral':
            fractions = np.clip(df['max_proba'].values, 0, 1.0) * 0.15
        elif strategy_type == 'conservative':
            fractions[:] = 0.05
        elif strategy_type == 'ultra_aggressive':
            fractions[:] = 0.8
        elif strategy_type == 'fixed_50':
            fractions[:] = 0.5
        elif strategy_type == 'fixed_01':
            fractions[:] = 0.01
        elif strategy_type in ['volatility_scaled', 'inverse_volatility']:
            returns = df['actual_return'].values
            vol = pd.Series(returns).rolling(vol_lookback, min_periods=1).std().values
            vol = np.where(vol == 0, 0.01, vol)
            if strategy_type == 'volatility_scaled':
                fractions = np.clip(0.1 / (vol + 1e-6), 0.01, 0.3)
            else:
                fractions = np.clip(0.2 / (vol + 1e-6), 0.01, 0.3)
        elif strategy_type in ['trend_following', 'momentum']:
            returns = df['actual_return'].values
            rolling_sum = pd.Series(returns).rolling(mom_lookback, min_periods=1).sum().values
            if strategy_type == 'trend_following':
                fractions = np.where(rolling_sum > 0, 0.3, np.where(rolling_sum < 0, 0.05, 0.1))
            else:
                fractions = np.clip(0.1 + 0.2 * (rolling_sum / (mom_lookback * 0.01)), 0.01, 0.3)
        elif strategy_type == 'kelly':
            fractions[:] = 0.1
        else:
            fractions[:] = 0.1
            
        return np.clip(fractions, 0.01, 0.9)

    def backtest_strategy(self, df, confidence_threshold=0.0, strategy_type='kelly', 
                         kelly_lookback=50, vol_lookback=20, mom_lookback=10):
        df = df.copy().sort_values('date').reset_index(drop=True)
        n = len(df)
        
        valid_mask = (df['confidence'].values >= confidence_threshold)
        pred_direction = df['pred_direction'].values.astype(int)
        actual_return = df['actual_return'].values.astype(float)
        
        position_fractions = self.calculate_position_fractions_vectorized(
            df, strategy_type, kelly_lookback, vol_lookback, mom_lookback
        )
        
        capital = self.initial_capital
        equity_curve = np.zeros(n)
        daily_returns = np.zeros(n)
        trades = []
        recent_profits = []
        
        for i in range(n):
            if not valid_mask[i] or capital <= 0:
                equity_curve[i] = capital
                continue
            
            if strategy_type == 'kelly' and len(recent_profits) >= 20:
                position_fraction = self.calculate_kelly_fraction(recent_profits[-kelly_lookback:])
            else:
                position_fraction = position_fractions[i]
            
            position_size = capital * position_fraction
            direction = pred_direction[i]
            ret = actual_return[i]
            
            gross_return = ret if direction == 1 else -ret
            gross_profit = position_size * gross_return
            trading_cost = position_size * self.total_cost_rate
            net_profit = gross_profit - trading_cost
            
            prev_capital = capital
            capital = max(0, capital + net_profit)
            
            equity_curve[i] = capital
            daily_returns[i] = (capital - prev_capital) / prev_capital if prev_capital > 0 else 0
            
            recent_profits.append(net_profit)
            
            trades.append({
                'date': df.iloc[i]['date'],
                'pred_direction': direction,
                'actual_return': ret,
                'position_fraction': position_fraction,
                'position_size': position_size,
                'gross_profit': gross_profit,
                'trading_cost': trading_cost,
                'net_profit': net_profit,
                'capital': capital,
                'correct': bool(df.iloc[i]['correct']),
                'confidence': float(df.iloc[i]['confidence']),
                'max_proba': float(df.iloc[i]['max_proba'])
            })
            
            if capital <= 0:
                equity_curve[i+1:] = 0
                break
        
        return equity_curve, trades, daily_returns

    def calculate_statistical_significance(self, trades, daily_returns):
        if len(trades) < self.min_trades:
            return {
                'sufficient_trades': False,
                'num_trades': len(trades),
                't_statistic': np.nan,
                'p_value': np.nan,
                'is_significant': False,
                'is_very_significant': False,
                'confidence_95_lower': np.nan,
                'confidence_95_upper': np.nan,
                'warning': f'Insufficient trades: {len(trades)} < {self.min_trades}'
            }
        
        returns_array = np.array(daily_returns)
        returns_array = returns_array[returns_array != 0]
        
        if len(returns_array) < 2:
            return {
                'sufficient_trades': True,
                'num_trades': len(trades),
                't_statistic': np.nan,
                'p_value': np.nan,
                'is_significant': False,
                'is_very_significant': False,
                'confidence_95_lower': np.nan,
                'confidence_95_upper': np.nan,
                'warning': 'Insufficient return data'
            }
        
        t_stat, p_value = stats.ttest_1samp(returns_array, 0)
        mean_return = np.mean(returns_array)
        std_error = stats.sem(returns_array)
        confidence_interval = stats.t.interval(
            0.95, len(returns_array)-1, loc=mean_return, scale=std_error
        )
        
        is_very_significant = abs(t_stat) > 3.0 and p_value < 0.05
        is_significant = abs(t_stat) > 2.0 and p_value < 0.05
        
        return {
            'sufficient_trades': True,
            'num_trades': len(trades),
            't_statistic': float(t_stat),
            'p_value': float(p_value),
            'is_significant': is_significant,
            'is_very_significant': is_very_significant,
            'confidence_95_lower': float(confidence_interval[0]),
            'confidence_95_upper': float(confidence_interval[1]),
            'warning': None
        }

    def calculate_performance_metrics(self, equity_curve, trades, daily_returns):
        if len(trades) == 0:
            return self._empty_metrics()
        
        final_capital = equity_curve[-1] if len(equity_curve) > 0 else self.initial_capital
        total_return = final_capital - self.initial_capital
        total_return_pct = (total_return / self.initial_capital) * 100
        
        profits = np.array([t['net_profit'] for t in trades])
        winning_trades = profits[profits > 0]
        losing_trades = profits[profits < 0]
        
        num_trades = len(trades)
        num_wins = len(winning_trades)
        num_losses = len(losing_trades)
        win_rate = (num_wins / num_trades * 100) if num_trades > 0 else 0
        
        avg_profit_per_trade = np.mean(profits)
        total_wins = np.sum(winning_trades) if num_wins > 0 else 0
        total_losses = np.abs(np.sum(losing_trades)) if num_losses > 0 else 0
        profit_factor = (total_wins / total_losses) if total_losses > 0 else 0
        
        avg_win = np.mean(winning_trades) if num_wins > 0 else 0
        avg_loss = np.abs(np.mean(losing_trades)) if num_losses > 0 else 0
        
        if avg_loss > 0:
            expectancy = (win_rate/100 * avg_win) - ((100-win_rate)/100 * avg_loss)
        else:
            expectancy = avg_profit_per_trade
        
        equity_array = np.array(equity_curve)
        running_max = np.maximum.accumulate(equity_array)
        drawdown_array = equity_array - running_max
        drawdown_pct_array = np.divide(drawdown_array, running_max, 
                                       out=np.zeros_like(drawdown_array), 
                                       where=running_max!=0) * 100
        
        max_drawdown_abs = np.min(drawdown_array)
        max_drawdown_pct = np.abs(np.min(drawdown_pct_array))
        
        returns_array = np.array(daily_returns)
        returns_array = returns_array[returns_array != 0]
        
        if len(returns_array) > 1 and np.std(returns_array) > 0:
            mean_return = np.mean(returns_array)
            std_return = np.std(returns_array, ddof=1)
            annual_mean = mean_return * 252
            annual_std = std_return * np.sqrt(252)
            sharpe_ratio = (annual_mean - self.risk_free_rate) / annual_std if annual_std > 0 else 0
        else:
            sharpe_ratio = 0.0
        
        downside_returns = returns_array[returns_array < 0]
        if len(downside_returns) > 1:
            downside_std = np.std(downside_returns, ddof=1) * np.sqrt(252)
            sortino_ratio = ((np.mean(returns_array) * 252 - self.risk_free_rate) / downside_std) if downside_std > 0 else 0
        else:
            sortino_ratio = 0.0
        
        num_days = len(equity_curve)
        years = num_days / 252
        
        if years > 0 and final_capital > 0 and self.initial_capital > 0:
            annual_return_pct = (np.power(final_capital / self.initial_capital, 1/years) - 1) * 100
        else:
            annual_return_pct = 0.0
        
        calmar_ratio = (annual_return_pct / max_drawdown_pct) if max_drawdown_pct > 0 else 0
        recovery_factor = (total_return / abs(max_drawdown_abs)) if max_drawdown_abs < 0 else 0
        
        consecutive_wins = 0
        consecutive_losses = 0
        max_consecutive_wins = 0
        max_consecutive_losses = 0
        
        for profit in profits:
            if profit > 0:
                consecutive_wins += 1
                consecutive_losses = 0
                max_consecutive_wins = max(max_consecutive_wins, consecutive_wins)
            else:
                consecutive_losses += 1
                consecutive_wins = 0
                max_consecutive_losses = max(max_consecutive_losses, consecutive_losses)
        
        significance = self.calculate_statistical_significance(trades, daily_returns)
        
        return {
            'initial_capital': self.initial_capital,
            'final_capital': final_capital,
            'total_return': total_return,
            'total_return_pct': total_return_pct,
            'annual_return_pct': annual_return_pct,
            'num_trades': num_trades,
            'num_wins': num_wins,
            'num_losses': num_losses,
            'win_rate': win_rate,
            'avg_profit_per_trade': avg_profit_per_trade,
            'avg_win': avg_win,
            'avg_loss': avg_loss,
            'expectancy': expectancy,
            'profit_factor': profit_factor,
            'max_drawdown': max_drawdown_abs,
            'max_drawdown_pct': max_drawdown_pct,
            'sharpe_ratio': sharpe_ratio,
            'sortino_ratio': sortino_ratio,
            'calmar_ratio': calmar_ratio,
            'recovery_factor': recovery_factor,
            'max_consecutive_wins': max_consecutive_wins,
            'max_consecutive_losses': max_consecutive_losses,
            'num_days': num_days,
            'years': years,
            'sufficient_trades': significance['sufficient_trades'],
            't_statistic': significance['t_statistic'],
            'p_value': significance['p_value'],
            'is_significant': significance['is_significant'],
            'is_very_significant': significance.get('is_very_significant', False),
            'confidence_95_lower': significance['confidence_95_lower'],
            'confidence_95_upper': significance['confidence_95_upper'],
            'stat_warning': significance['warning']
        }

    def _empty_metrics(self):
        return {
            'initial_capital': self.initial_capital,
            'final_capital': self.initial_capital,
            'total_return': 0.0,
            'total_return_pct': 0.0,
            'annual_return_pct': 0.0,
            'num_trades': 0,
            'num_wins': 0,
            'num_losses': 0,
            'win_rate': 0.0,
            'avg_profit_per_trade': 0.0,
            'avg_win': 0.0,
            'avg_loss': 0.0,
            'expectancy': 0.0,
            'profit_factor': 0.0,
            'max_drawdown': 0.0,
            'max_drawdown_pct': 0.0,
            'sharpe_ratio': 0.0,
            'sortino_ratio': 0.0,
            'calmar_ratio': 0.0,
            'recovery_factor': 0.0,
            'max_consecutive_wins': 0,
            'max_consecutive_losses': 0,
            'num_days': 0,
            'years': 0.0,
            'sufficient_trades': False,
            't_statistic': np.nan,
            'p_value': np.nan,
            'is_significant': False,
            'is_very_significant': False,
            'confidence_95_lower': np.nan,
            'confidence_95_upper': np.nan,
            'stat_warning': 'No trades'
        }

def run_comprehensive_backtest(data_dir, output_dir='backtest_results_enhanced', 
                               confidence_thresholds=None, strategy_types=None):
    os.makedirs(output_dir, exist_ok=True)
    
    if not os.path.exists(data_dir):
        print(f"ERROR: Directory not found: {data_dir}")
        return None
    
    csv_files = [f for f in os.listdir(data_dir) if f.endswith('_all_folds.csv')]
    if len(csv_files) == 0:
        print(f"ERROR: No CSV files found in {data_dir}")
        return None
    
    if confidence_thresholds is None:
        confidence_thresholds = [0.0, 0.05, 0.1, 0.15, 0.2]
    
    if strategy_types is None:
        strategy_types = [
            'kelly', 'aggressive', 'neutral', 'conservative',
            'ultra_aggressive', 'fixed_50', 'fixed_01',
            'volatility_scaled', 'inverse_volatility',
            'trend_following', 'momentum'
        ]
    
    print(f"Processing {len(csv_files)} models...")
    print(f"Confidence thresholds: {confidence_thresholds}")
    print(f"Strategy types: {strategy_types}")
    print(f"Initial capital: $10,000")
    print(f"Trading costs: 0.3% round-trip")
    
    all_results = []
    failed_models = []
    
    for csv_file in sorted(csv_files):
        model_name = csv_file.replace('_all_folds.csv', '')
        
        try:
            file_path = os.path.join(data_dir, csv_file)
            df = pd.read_csv(file_path)
            
            if not validate_dataframe(df, model_name):
                failed_models.append(model_name)
                continue
            
            df['date'] = pd.to_datetime(df['date'])
            df = df.sort_values('date').reset_index(drop=True)
            
            base_accuracy = df['correct'].mean() * 100
            date_start = df['date'].min()
            date_end = df['date'].max()
            total_predictions = len(df)
            
            for strategy_type in strategy_types:
                for threshold in confidence_thresholds:
                    backtester = EnhancedCryptoBacktester(
                        initial_capital=10000,
                        trading_fee=0.001,
                        slippage=0.0005,
                        risk_free_rate=0.0,
                        min_trades_threshold=30
                    )
                    
                    equity_curve, trades, daily_returns = backtester.backtest_strategy(
                        df, 
                        confidence_threshold=threshold,
                        strategy_type=strategy_type,
                        kelly_lookback=50
                    )
                    
                    metrics = backtester.calculate_performance_metrics(
                        equity_curve, trades, daily_returns
                    )
                    
                    result = {
                        'model': model_name,
                        'strategy_type': strategy_type,
                        'confidence_threshold': threshold,
                        'total_predictions': total_predictions,
                        'base_accuracy': base_accuracy,
                        'date_start': date_start,
                        'date_end': date_end,
                        **metrics
                    }
                    all_results.append(result)
        
        except Exception as e:
            print(f"{model_name} failed: {e}")
            failed_models.append(model_name)
            continue
    
    if failed_models:
        print(f"Failed models: {', '.join(failed_models)}")
    
    if not all_results:
        print("ERROR: No successful backtests")
        return None
    
    results_df = pd.DataFrame(all_results)
    results_file = os.path.join(output_dir, 'backtest_results.csv')
    results_df.to_csv(results_file, index=False)
    print(f"Results saved: {results_file}")
    
    return results_df

def print_summary_dashboard(results_df, output_dir='backtest_results_enhanced'):
    for strategy_type in results_df['strategy_type'].unique():
        print(f"{'='*100}")
        print(f"STRATEGY: {strategy_type.upper()}")
        print(f"{'='*100}")
        
        analysis_data = results_df[
            (results_df['confidence_threshold'] == 0.1) & 
            (results_df['strategy_type'] == strategy_type)
        ].copy()
        
        significant_models = analysis_data[
            (analysis_data['is_significant'] == True) & 
            (analysis_data['sufficient_trades'] == True)
        ].copy()
        
        print(f"Total Models: {len(analysis_data)} | Significant: {len(significant_models)}")
        
        excellent = significant_models[significant_models['total_return'] > 0].copy()
        excellent['grade'] = excellent.apply(lambda x: 
            'A+' if (x['total_return_pct'] > 50 and x['sharpe_ratio'] > 3.5) else
            'A' if (x['total_return_pct'] > 30 and x['sharpe_ratio'] > 2.5) else
            'B' if (x['total_return_pct'] > 15 and x['sharpe_ratio'] > 1.5) else 'C', axis=1)
        
        top_10 = excellent.nlargest(10, 'total_return')
        
        print(f"{'Rank':<6} {'Model':<22} {'Grade':<7} {'Profit($)':<13} {'Return%':<9} {'Annual%':<9} {'Sharpe':<8} {'MaxDD%':<8} {'WinRate':<9} {'PF':<6} {'Exp':<8} {'Trades':<8} {'t-stat':<8} {'Acc%':<7}")
        print(f"{'-'*140}")
        
        for i, (idx, row) in enumerate(top_10.iterrows(), 1):
            print(f"{i:<6} {row['model']:<22} {row['grade']:<7} "
                  f"${row['total_return']:>10,.0f}  "
                  f"{row['total_return_pct']:>7.1f}%  "
                  f"{row['annual_return_pct']:>7.1f}%  "
                  f"{row['sharpe_ratio']:>6.2f}  "
                  f"{row['max_drawdown_pct']:>6.1f}%  "
                  f"{row['win_rate']:>7.1f}%  "
                  f"{row['profit_factor']:>4.2f}  "
                  f"{row['expectancy']:>6.2f}  "
                  f"{int(row['num_trades']):>6}  "
                  f"{row['t_statistic']:>7.2f}  "
                  f"{row['base_accuracy']:>5.1f}%")
        
        print(f"{'='*100}")
    
    return results_df

def print_fold_summary(data_dir, model_name, strategy_type, output_dir='backtest_results_enhanced'):
    file_path = os.path.join(data_dir, f"{model_name}_all_folds.csv")
    if not os.path.exists(file_path):
        return None
    
    df = pd.read_csv(file_path)
    df['date'] = pd.to_datetime(df['date'])
    df = df.sort_values('date').reset_index(drop=True)
    
    if 'fold' not in df.columns:
        return None
    
    print(f"{'-'*80}")
    print(f"FOLD ANALYSIS: {model_name} | STRATEGY: {strategy_type.upper()}")
    print(f"{'-'*80}")
    
    fold_results = []
    
    for fold in sorted(df['fold'].unique()):
        fold_data = df[df['fold'] == fold].copy()
        
        backtester = EnhancedCryptoBacktester(initial_capital=10000, min_trades_threshold=30)
        equity_curve, trades, daily_returns = backtester.backtest_strategy(
            fold_data, confidence_threshold=0.1, strategy_type=strategy_type
        )
        
        metrics = backtester.calculate_performance_metrics(equity_curve, trades, daily_returns)
        
        status = 'VERY_SIG' if metrics['is_very_significant'] else 'SIG' if metrics['is_significant'] else 'NOT_SIG'
        
        print(f"Fold {fold} | {fold_data['date'].min().date()} to {fold_data['date'].max().date()}")
        print(f"  {len(fold_data)} days | {int(metrics['num_trades'])} trades | "
              f"${metrics['total_return']:>8,.0f} ({metrics['total_return_pct']:>5.1f}%) | "
              f"Sharpe {metrics['sharpe_ratio']:>5.2f} | Win {metrics['win_rate']:>5.1f}% | "
              f"{status} (t={metrics['t_statistic']:.2f})")
        
        fold_result = {
            'fold': fold,
            'date_start': fold_data['date'].min(),
            'date_end': fold_data['date'].max(),
            'num_days': len(fold_data),
            'accuracy': fold_data['correct'].mean() * 100,
            **metrics
        }
        fold_results.append(fold_result)
    
    if len(fold_results) > 1:
        returns = np.array([m['annual_return_pct'] for m in fold_results])
        mean_return = np.mean(returns)
        std_return = np.std(returns)
        consistency = (1 / (1 + abs(std_return / mean_return) if mean_return != 0 else 1)) * 100
        
        print(f"{'-'*80}")
        print(f"Walk-Forward Efficiency: {consistency:.1f}% | Avg Annual Return: {mean_return:.1f}% (±{std_return:.1f}%)")
        print(f"{'-'*80}")
    
    return fold_results

results_df = run_comprehensive_backtest(data_dir, output_dir)
if results_df is not None:
    print_summary_dashboard(results_df, output_dir)
    
    for strategy_type in ['kelly', 'aggressive', 'neutral', 'conservative', 'ultra_aggressive', 
                          'fixed_50', 'fixed_01', 'volatility_scaled', 'inverse_volatility', 
                          'trend_following', 'momentum']:
        top_models = results_df[
            (results_df['confidence_threshold'] == 0.1) & 
            (results_df['strategy_type'] == strategy_type)
        ].nlargest(3, 'total_return')
        
        print(f"{'='*100}")
        print(f"TOP 3 MODELS FOLD-BY-FOLD ANALYSIS ({strategy_type.upper()})")
        print(f"{'='*100}")
        
        for idx, row in top_models.iterrows():
            print_fold_summary(data_dir, row['model'], strategy_type, output_dir)


Processing 26 models...
Confidence thresholds: [0.0, 0.05, 0.1, 0.15, 0.2]
Strategy types: ['kelly', 'aggressive', 'neutral', 'conservative', 'ultra_aggressive', 'fixed_50', 'fixed_01', 'volatility_scaled', 'inverse_volatility', 'trend_following', 'momentum']
Initial capital: $10,000
Trading costs: 0.3% round-trip
Results saved: backtest_results_enhanced/backtest_results.csv
STRATEGY: KELLY
Total Models: 26 | Significant: 20
Rank   Model                  Grade   Profit($)     Return%   Annual%   Sharpe   MaxDD%   WinRate   PF     Exp      Trades   t-stat   Acc%   
--------------------------------------------------------------------------------------------------------------------------------------------
1      AdaBoost               A+      $     8,743     87.4%     35.7%    4.77     2.3%     59.9%  2.32   24.15     362     5.72   62.9%
2      VotingSoft             A+      $     7,097     71.0%     29.8%    3.81     3.8%     57.1%  2.14   16.02     443     5.05   62.0%
3      GradientB

--------------------------------------------------------------------------------
FOLD ANALYSIS: VotingSoft | STRATEGY: KELLY
--------------------------------------------------------------------------------
Fold 1 | 2022-10-10 to 2022-12-08
  60 days | 55 trades | $     257 (  2.6%) | Sharpe  1.46 | Win  50.9% | NOT_SIG (t=0.68)
Fold 2 | 2023-06-07 to 2023-08-05
  60 days | 51 trades | $     163 (  1.6%) | Sharpe  2.24 | Win  60.8% | NOT_SIG (t=1.01)
Fold 3 | 2024-02-02 to 2024-04-01
  60 days | 57 trades | $   1,134 ( 11.3%) | Sharpe  5.84 | Win  57.9% | SIG (t=2.78)
Fold 4 | 2024-09-29 to 2024-11-27
  60 days | 44 trades | $     517 (  5.2%) | Sharpe  4.62 | Win  59.1% | NOT_SIG (t=1.93)
Fold 5 | 2025-01-01 to 2025-10-05
  278 days | 236 trades | $   4,011 ( 40.1%) | Sharpe  4.30 | Win  57.2% | VERY_SIG (t=4.16)
--------------------------------------------------------------------------------
Walk-Forward Efficiency: 59.8% | Avg Annual Return: 26.9% (±18.1%)
---------------------------

Fold 5 | 2025-01-01 to 2025-10-05
  278 days | 236 trades | $   3,067 ( 30.7%) | Sharpe  4.61 | Win  57.2% | VERY_SIG (t=4.47)
--------------------------------------------------------------------------------
Walk-Forward Efficiency: 72.3% | Avg Annual Return: 23.2% (±8.9%)
--------------------------------------------------------------------------------
TOP 3 MODELS FOLD-BY-FOLD ANALYSIS (CONSERVATIVE)
--------------------------------------------------------------------------------
FOLD ANALYSIS: XGBoost | STRATEGY: CONSERVATIVE
--------------------------------------------------------------------------------
Fold 1 | 2022-10-10 to 2022-12-08
  60 days | 55 trades | $     203 (  2.0%) | Sharpe  2.39 | Win  52.7% | NOT_SIG (t=1.12)
Fold 2 | 2023-06-07 to 2023-08-05
  60 days | 55 trades | $     101 (  1.0%) | Sharpe  2.96 | Win  56.4% | NOT_SIG (t=1.38)
Fold 3 | 2024-02-02 to 2024-04-01
  60 days | 58 trades | $     365 (  3.6%) | Sharpe  6.14 | Win  56.9% | SIG (t=2.95)
Fold 4 | 2024-09-

Fold 3 | 2024-02-02 to 2024-04-01
  60 days | 57 trades | $   3,859 ( 38.6%) | Sharpe  6.47 | Win  57.9% | VERY_SIG (t=3.08)
Fold 4 | 2024-09-29 to 2024-11-27
  60 days | 44 trades | $   2,647 ( 26.5%) | Sharpe  5.14 | Win  59.1% | SIG (t=2.15)
Fold 5 | 2025-01-01 to 2025-10-05
  278 days | 236 trades | $  24,571 (245.7%) | Sharpe  4.56 | Win  57.2% | VERY_SIG (t=4.41)
--------------------------------------------------------------------------------
Walk-Forward Efficiency: 67.3% | Avg Annual Return: 166.8% (±81.2%)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
FOLD ANALYSIS: AdaBoost | STRATEGY: FIXED_50
--------------------------------------------------------------------------------
Fold 1 | 2022-10-10 to 2022-12-08
  60 days | 35 trades | $   2,725 ( 27.3%) | Sharpe  4.76 | Win  57.1% | NOT_SIG (t=1.77)
Fold 2 | 2023-06-07 to 2023-08-05
  60 days | 5 trades | $     623 

Fold 2 | 2023-06-07 to 2023-08-05
  60 days | 51 trades | $     322 (  3.2%) | Sharpe  2.03 | Win  60.8% | NOT_SIG (t=0.91)
Fold 3 | 2024-02-02 to 2024-04-01
  60 days | 57 trades | $   1,361 ( 13.6%) | Sharpe  5.36 | Win  57.9% | SIG (t=2.55)
Fold 4 | 2024-09-29 to 2024-11-27
  60 days | 44 trades | $   1,598 ( 16.0%) | Sharpe  6.32 | Win  59.1% | SIG (t=2.64)
Fold 5 | 2025-01-01 to 2025-10-05
  278 days | 236 trades | $   6,280 ( 62.8%) | Sharpe  4.23 | Win  57.2% | VERY_SIG (t=4.09)
--------------------------------------------------------------------------------
Walk-Forward Efficiency: 62.3% | Avg Annual Return: 48.4% (±29.3%)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
FOLD ANALYSIS: AdaBoost | STRATEGY: TREND_FOLLOWING
--------------------------------------------------------------------------------
Fold 1 | 2022-10-10 to 2022-12-08
  60 days | 35 trades | $     77

In [4]:
import pandas as pd
import numpy as np
import os
from scipy import stats
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

timestamp = datetime.now().strftime("%Y-%m-%d")
RESULT_DIR = os.path.join("../model_results", timestamp)
data_dir = os.path.join(RESULT_DIR, "predictions", "direction_walk_forward")
output_dir = os.path.join(RESULT_DIR, "backtest_results")

def validate_dataframe(df, model_name):
    required_cols = ['date', 'pred_direction', 'actual_return', 'correct', 'confidence', 'max_proba']
    missing = [col for col in required_cols if col not in df.columns]
    if missing:
        print(f"{model_name}: Missing columns {missing}")
        return False
    if len(df) < 10:
        print(f"{model_name}: Insufficient data ({len(df)} rows)")
        return False
    if df['actual_return'].isna().all() or df['pred_direction'].isna().all():
        print(f"{model_name}: Critical columns contain all NaN values")
        return False
    return True

class ReinforcementLearningPositionSizer:
    def __init__(self, learning_rate=0.1, epsilon=0.2, gamma=0.9):
        self.lr = learning_rate
        self.epsilon = epsilon
        self.gamma = gamma
        self.q_table = {}
        self.actions = [0.01, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3]
        
    def get_state(self, recent_returns, consecutive_wins, consecutive_losses, capital_ratio):
        recent_trend = 'up' if np.mean(recent_returns[-5:]) > 0 else 'down'
        momentum = 'strong' if consecutive_wins >= 3 else 'weak' if consecutive_losses >= 3 else 'neutral'
        capital_state = 'high' if capital_ratio > 1.2 else 'low' if capital_ratio < 0.9 else 'normal'
        return (recent_trend, momentum, capital_state)
    
    def choose_action(self, state):
        if state not in self.q_table:
            self.q_table[state] = {a: 0.0 for a in self.actions}
        
        if np.random.random() < self.epsilon:
            return np.random.choice(self.actions)
        else:
            return max(self.q_table[state], key=self.q_table[state].get)
    
    def update_q_value(self, state, action, reward, next_state):
        if state not in self.q_table:
            self.q_table[state] = {a: 0.0 for a in self.actions}
        if next_state not in self.q_table:
            self.q_table[next_state] = {a: 0.0 for a in self.actions}
        
        current_q = self.q_table[state][action]
        max_next_q = max(self.q_table[next_state].values())
        new_q = current_q + self.lr * (reward + self.gamma * max_next_q - current_q)
        self.q_table[state][action] = new_q

class EnhancedCryptoBacktester:
    def __init__(self, initial_capital=10000, trading_fee=0.0015, slippage=0.0015, 
                 risk_free_rate=0.0, min_trades_threshold=30, min_order_size=10):
        self.initial_capital = float(initial_capital)
        self.trading_fee = float(trading_fee)
        self.base_slippage = float(slippage)
        self.risk_free_rate = float(risk_free_rate)
        self.min_trades = min_trades_threshold
        self.min_order = min_order_size
        self.total_cost_rate = (self.trading_fee + self.base_slippage) * 2

    def calculate_kelly_fraction(self, profits):
        if len(profits) < 10:
            return 0.05
        profits = np.array(profits)
        wins = profits[profits > 0]
        losses = profits[profits < 0]
        if len(wins) == 0 or len(losses) == 0 or np.mean(losses) == 0:
            return 0.05
        win_rate = len(wins) / len(profits)
        avg_win = np.mean(wins)
        avg_loss = np.abs(np.mean(losses))
        win_loss_ratio = avg_win / avg_loss
        kelly = (win_rate * win_loss_ratio - (1 - win_rate)) / win_loss_ratio
        kelly_fraction = np.clip(kelly * 0.5, 0.01, 0.2)
        return kelly_fraction if not np.isnan(kelly_fraction) else 0.01

    def calculate_position_fractions_vectorized(self, df, strategy_type, kelly_lookback=50, 
                                                vol_lookback=20, mom_lookback=10):
        n = len(df)
        fractions = np.zeros(n)

        if strategy_type == 'aggressive':
            fractions[:] = 0.3
        elif strategy_type == 'neutral':
            fractions = np.clip(df['max_proba'].values, 0, 1.0) * 0.15
        elif strategy_type == 'conservative':
            fractions[:] = 0.05
        elif strategy_type == 'ultra_aggressive':
            fractions[:] = 0.8
        elif strategy_type == 'fixed_50':
            fractions[:] = 0.5
        elif strategy_type == 'fixed_01':
            fractions[:] = 0.01
        elif strategy_type in ['volatility_scaled', 'inverse_volatility']:
            returns = df['actual_return'].values
            vol = pd.Series(returns).rolling(vol_lookback, min_periods=1).std().values
            vol = np.where(vol == 0, 0.01, vol)
            if strategy_type == 'volatility_scaled':
                fractions = np.clip(0.1 / (vol + 1e-6), 0.01, 0.3)
            else:
                fractions = np.clip(0.2 / (vol + 1e-6), 0.01, 0.3)
        elif strategy_type in ['trend_following', 'momentum']:
            returns = df['actual_return'].values
            rolling_sum = pd.Series(returns).rolling(mom_lookback, min_periods=1).sum().values
            if strategy_type == 'trend_following':
                fractions = np.where(rolling_sum > 0, 0.3, np.where(rolling_sum < 0, 0.05, 0.1))
            else:
                fractions = np.clip(0.1 + 0.2 * (rolling_sum / (mom_lookback * 0.01)), 0.01, 0.3)
        elif strategy_type == 'reinforcement_learning':
            fractions[:] = 0.1
        elif strategy_type == 'kelly':
            fractions[:] = 0.1
        else:
            fractions[:] = 0.1

        return np.clip(fractions, 0.01, 0.9)

    def backtest_strategy(self, df, confidence_threshold=0.0, strategy_type='kelly', 
                         kelly_lookback=50, vol_lookback=20, mom_lookback=10):
        df = df.copy().sort_values('date').reset_index(drop=True)
        n = len(df)

        valid_mask = (df['confidence'].values >= confidence_threshold)
        pred_direction = df['pred_direction'].values.astype(int)
        actual_return = df['actual_return'].values.astype(float)

        if strategy_type == 'reinforcement_learning':
            rl_agent = ReinforcementLearningPositionSizer(
                learning_rate=0.1, epsilon=0.15, gamma=0.9
            )
            use_rl = True
        else:
            position_fractions = self.calculate_position_fractions_vectorized(
                df, strategy_type, kelly_lookback, vol_lookback, mom_lookback
            )
            use_rl = False

        capital = self.initial_capital
        equity_curve = np.zeros(n)
        daily_returns = np.zeros(n)
        trades = []
        recent_profits = []
        consecutive_wins = 0
        consecutive_losses = 0

        for i in range(n):
            if capital < self.min_order:
                equity_curve[i] = capital
                continue
                
            if not valid_mask[i]:
                equity_curve[i] = capital
                continue

            if use_rl:
                recent_returns = actual_return[max(0, i-10):i] if i > 0 else [0]
                capital_ratio = capital / self.initial_capital
                state = rl_agent.get_state(recent_returns, consecutive_wins, consecutive_losses, capital_ratio)
                position_fraction = rl_agent.choose_action(state)
            elif strategy_type == 'kelly' and len(recent_profits) >= 20:
                position_fraction = self.calculate_kelly_fraction(recent_profits[-kelly_lookback:])
            else:
                position_fraction = position_fractions[i]

            position_size = capital * position_fraction
            
            if position_size < self.min_order:
                equity_curve[i] = capital
                continue

            if i >= 20:
                recent_returns_vol = actual_return[i-20:i]
                volatility = np.std(recent_returns_vol)
                if volatility > 0.03:
                    dynamic_slippage = self.base_slippage * 2
                elif volatility > 0.02:
                    dynamic_slippage = self.base_slippage * 1.5
                else:
                    dynamic_slippage = self.base_slippage
                trading_cost = position_size * ((self.trading_fee + dynamic_slippage) * 2)
            else:
                trading_cost = position_size * self.total_cost_rate

            direction = pred_direction[i]
            ret = actual_return[i]

            gross_return = ret if direction == 1 else -ret
            gross_profit = position_size * gross_return
            net_profit = gross_profit - trading_cost

            prev_capital = capital
            capital = max(0, capital + net_profit)

            equity_curve[i] = capital
            daily_returns[i] = (capital - prev_capital) / prev_capital if prev_capital > 0 else 0

            if use_rl and i > 0:
                reward = net_profit / position_size if position_size > 0 else 0
                next_recent_returns = actual_return[max(0, i-9):i+1]
                next_capital_ratio = capital / self.initial_capital
                next_state = rl_agent.get_state(next_recent_returns, consecutive_wins, consecutive_losses, next_capital_ratio)
                rl_agent.update_q_value(state, position_fraction, reward, next_state)

            recent_profits.append(net_profit)

            if net_profit > 0:
                consecutive_wins += 1
                consecutive_losses = 0
            else:
                consecutive_losses += 1
                consecutive_wins = 0

            trades.append({
                'date': df.iloc[i]['date'],
                'pred_direction': direction,
                'actual_return': ret,
                'position_fraction': position_fraction,
                'position_size': position_size,
                'gross_profit': gross_profit,
                'trading_cost': trading_cost,
                'net_profit': net_profit,
                'capital': capital,
                'correct': bool(df.iloc[i]['correct']),
                'confidence': float(df.iloc[i]['confidence']),
                'max_proba': float(df.iloc[i]['max_proba'])
            })

            if capital <= 0:
                equity_curve[i+1:] = 0
                break

        return equity_curve, trades, daily_returns
    
    def calculate_statistical_significance(self, trades, daily_returns):
        if len(trades) < self.min_trades:
            return {
                'sufficient_trades': False,
                'num_trades': len(trades),
                't_statistic': np.nan,
                'p_value': np.nan,
                'is_significant': False,
                'is_very_significant': False,
                'confidence_95_lower': np.nan,
                'confidence_95_upper': np.nan,
                'warning': f'Insufficient trades: {len(trades)} < {self.min_trades}'
            }
        
        returns_array = np.array(daily_returns)
        returns_array = returns_array[returns_array != 0]
        
        if len(returns_array) < 2:
            return {
                'sufficient_trades': True,
                'num_trades': len(trades),
                't_statistic': np.nan,
                'p_value': np.nan,
                'is_significant': False,
                'is_very_significant': False,
                'confidence_95_lower': np.nan,
                'confidence_95_upper': np.nan,
                'warning': 'Insufficient return data'
            }
        
        t_stat, p_value = stats.ttest_1samp(returns_array, 0)
        mean_return = np.mean(returns_array)
        std_error = stats.sem(returns_array)
        confidence_interval = stats.t.interval(
            0.95, len(returns_array)-1, loc=mean_return, scale=std_error
        )
        
        is_very_significant = abs(t_stat) > 3.0 and p_value < 0.05
        is_significant = abs(t_stat) > 2.0 and p_value < 0.05
        
        return {
            'sufficient_trades': True,
            'num_trades': len(trades),
            't_statistic': float(t_stat),
            'p_value': float(p_value),
            'is_significant': is_significant,
            'is_very_significant': is_very_significant,
            'confidence_95_lower': float(confidence_interval[0]),
            'confidence_95_upper': float(confidence_interval[1]),
            'warning': None
        }

    def calculate_performance_metrics(self, equity_curve, trades, daily_returns):
        if len(trades) == 0:
            return self._empty_metrics()
        
        final_capital = equity_curve[-1] if len(equity_curve) > 0 else self.initial_capital
        total_return = final_capital - self.initial_capital
        total_return_pct = (total_return / self.initial_capital) * 100
        
        profits = np.array([t['net_profit'] for t in trades])
        winning_trades = profits[profits > 0]
        losing_trades = profits[profits < 0]
        
        num_trades = len(trades)
        num_wins = len(winning_trades)
        num_losses = len(losing_trades)
        win_rate = (num_wins / num_trades * 100) if num_trades > 0 else 0
        
        avg_profit_per_trade = np.mean(profits)
        total_wins = np.sum(winning_trades) if num_wins > 0 else 0
        total_losses = np.abs(np.sum(losing_trades)) if num_losses > 0 else 0
        profit_factor = (total_wins / total_losses) if total_losses > 0 else 0
        
        avg_win = np.mean(winning_trades) if num_wins > 0 else 0
        avg_loss = np.abs(np.mean(losing_trades)) if num_losses > 0 else 0
        
        if avg_loss > 0:
            expectancy = (win_rate/100 * avg_win) - ((100-win_rate)/100 * avg_loss)
        else:
            expectancy = avg_profit_per_trade
        
        equity_array = np.array(equity_curve)
        running_max = np.maximum.accumulate(equity_array)
        drawdown_array = equity_array - running_max
        drawdown_pct_array = np.divide(drawdown_array, running_max, 
                                       out=np.zeros_like(drawdown_array), 
                                       where=running_max!=0) * 100
        
        max_drawdown_abs = np.min(drawdown_array)
        max_drawdown_pct = np.abs(np.min(drawdown_pct_array))
        
        returns_array = np.array(daily_returns)
        returns_array = returns_array[returns_array != 0]
        
        if len(returns_array) > 1 and np.std(returns_array) > 0:
            mean_return = np.mean(returns_array)
            std_return = np.std(returns_array, ddof=1)
            annual_mean = mean_return * 252
            annual_std = std_return * np.sqrt(252)
            sharpe_ratio = (annual_mean - self.risk_free_rate) / annual_std if annual_std > 0 else 0
        else:
            sharpe_ratio = 0.0
        
        downside_returns = returns_array[returns_array < 0]
        if len(downside_returns) > 1:
            downside_std = np.std(downside_returns, ddof=1) * np.sqrt(252)
            sortino_ratio = ((np.mean(returns_array) * 252 - self.risk_free_rate) / downside_std) if downside_std > 0 else 0
        else:
            sortino_ratio = 0.0
        
        num_days = len(equity_curve)
        years = num_days / 252
        
        if years > 0 and final_capital > 0 and self.initial_capital > 0:
            annual_return_pct = (np.power(final_capital / self.initial_capital, 1/years) - 1) * 100
        else:
            annual_return_pct = 0.0
        
        calmar_ratio = (annual_return_pct / max_drawdown_pct) if max_drawdown_pct > 0 else 0
        recovery_factor = (total_return / abs(max_drawdown_abs)) if max_drawdown_abs < 0 else 0
        
        consecutive_wins = 0
        consecutive_losses = 0
        max_consecutive_wins = 0
        max_consecutive_losses = 0
        
        for profit in profits:
            if profit > 0:
                consecutive_wins += 1
                consecutive_losses = 0
                max_consecutive_wins = max(max_consecutive_wins, consecutive_wins)
            else:
                consecutive_losses += 1
                consecutive_wins = 0
                max_consecutive_losses = max(max_consecutive_losses, consecutive_losses)
        
        significance = self.calculate_statistical_significance(trades, daily_returns)
        
        return {
            'initial_capital': self.initial_capital,
            'final_capital': final_capital,
            'total_return': total_return,
            'total_return_pct': total_return_pct,
            'annual_return_pct': annual_return_pct,
            'num_trades': num_trades,
            'num_wins': num_wins,
            'num_losses': num_losses,
            'win_rate': win_rate,
            'avg_profit_per_trade': avg_profit_per_trade,
            'avg_win': avg_win,
            'avg_loss': avg_loss,
            'expectancy': expectancy,
            'profit_factor': profit_factor,
            'max_drawdown': max_drawdown_abs,
            'max_drawdown_pct': max_drawdown_pct,
            'sharpe_ratio': sharpe_ratio,
            'sortino_ratio': sortino_ratio,
            'calmar_ratio': calmar_ratio,
            'recovery_factor': recovery_factor,
            'max_consecutive_wins': max_consecutive_wins,
            'max_consecutive_losses': max_consecutive_losses,
            'num_days': num_days,
            'years': years,
            'sufficient_trades': significance['sufficient_trades'],
            't_statistic': significance['t_statistic'],
            'p_value': significance['p_value'],
            'is_significant': significance['is_significant'],
            'is_very_significant': significance.get('is_very_significant', False),
            'confidence_95_lower': significance['confidence_95_lower'],
            'confidence_95_upper': significance['confidence_95_upper'],
            'stat_warning': significance['warning']
        }

    def _empty_metrics(self):
        return {
            'initial_capital': self.initial_capital,
            'final_capital': self.initial_capital,
            'total_return': 0.0,
            'total_return_pct': 0.0,
            'annual_return_pct': 0.0,
            'num_trades': 0,
            'num_wins': 0,
            'num_losses': 0,
            'win_rate': 0.0,
            'avg_profit_per_trade': 0.0,
            'avg_win': 0.0,
            'avg_loss': 0.0,
            'expectancy': 0.0,
            'profit_factor': 0.0,
            'max_drawdown': 0.0,
            'max_drawdown_pct': 0.0,
            'sharpe_ratio': 0.0,
            'sortino_ratio': 0.0,
            'calmar_ratio': 0.0,
            'recovery_factor': 0.0,
            'max_consecutive_wins': 0,
            'max_consecutive_losses': 0,
            'num_days': 0,
            'years': 0.0,
            'sufficient_trades': False,
            't_statistic': np.nan,
            'p_value': np.nan,
            'is_significant': False,
            'is_very_significant': False,
            'confidence_95_lower': np.nan,
            'confidence_95_upper': np.nan,
            'stat_warning': 'No trades'
        }

def run_comprehensive_backtest(data_dir, output_dir, 
                               confidence_thresholds=None, strategy_types=None):
    os.makedirs(output_dir, exist_ok=True)
    
    if not os.path.exists(data_dir):
        print(f"ERROR: Directory not found: {data_dir}")
        return None
    
    csv_files = [f for f in os.listdir(data_dir) if f.endswith('_all_folds.csv')]
    if len(csv_files) == 0:
        print(f"ERROR: No CSV files found in {data_dir}")
        return None
    
    if confidence_thresholds is None:
        confidence_thresholds = [0.0, 0.05, 0.1, 0.15, 0.2]
    
    if strategy_types is None:
        strategy_types = [
            'kelly', 'aggressive', 'neutral', 'conservative',
            'ultra_aggressive', 'fixed_50', 'fixed_01',
            'volatility_scaled', 'inverse_volatility',
            'trend_following', 'momentum', 'reinforcement_learning'
        ]
    
    print(f"Data directory: {data_dir}")
    print(f"Output directory: {output_dir}")
    print(f"Processing {len(csv_files)} models...")
    print(f"Confidence thresholds: {confidence_thresholds}")
    print(f"Strategy types: {strategy_types}")
    
    all_results = []
    failed_models = []
    
    for csv_file in sorted(csv_files):
        model_name = csv_file.replace('_all_folds.csv', '')
        
        try:
            file_path = os.path.join(data_dir, csv_file)
            df = pd.read_csv(file_path)
            
            if not validate_dataframe(df, model_name):
                failed_models.append(model_name)
                continue
            
            df['date'] = pd.to_datetime(df['date'])
            df = df.sort_values('date').reset_index(drop=True)
            
            base_accuracy = df['correct'].mean() * 100
            date_start = df['date'].min()
            date_end = df['date'].max()
            total_predictions = len(df)
            
            for strategy_type in strategy_types:
                for threshold in confidence_thresholds:
                    backtester = EnhancedCryptoBacktester(
                        initial_capital=10000000,  
                        trading_fee=0.0005,         
                        slippage=0.0015,          
                        min_order_size=5000,        
                        risk_free_rate=0.0,
                        min_trades_threshold=30
                    )
                    
                    equity_curve, trades, daily_returns = backtester.backtest_strategy(
                        df, 
                        confidence_threshold=threshold,
                        strategy_type=strategy_type,
                        kelly_lookback=50
                    )
                    
                    metrics = backtester.calculate_performance_metrics(
                        equity_curve, trades, daily_returns
                    )
                    
                    result = {
                        'model': model_name,
                        'strategy_type': strategy_type,
                        'confidence_threshold': threshold,
                        'total_predictions': total_predictions,
                        'base_accuracy': base_accuracy,
                        'date_start': date_start,
                        'date_end': date_end,
                        **metrics
                    }
                    all_results.append(result)
        
        except Exception as e:
            print(f"{model_name} failed: {e}")
            failed_models.append(model_name)
            continue
    
    if failed_models:
        print(f"Failed models: {', '.join(failed_models)}")
    
    if not all_results:
        print("ERROR: No successful backtests")
        return None
    
    results_df = pd.DataFrame(all_results)
    results_file = os.path.join(output_dir, 'backtest_results.csv')
    results_df.to_csv(results_file, index=False)
    print(f"Results saved: {results_file}")
    
    return results_df, output_dir

def print_summary_dashboard(results_df, output_dir):
    for strategy_type in results_df['strategy_type'].unique():
        print(f"{'='*100}")
        print(f"STRATEGY: {strategy_type.upper()}")
        print(f"{'='*100}")
        
        analysis_data = results_df[
            (results_df['confidence_threshold'] == 0.1) & 
            (results_df['strategy_type'] == strategy_type)
        ].copy()
        
        significant_models = analysis_data[
            (analysis_data['is_significant'] == True) & 
            (analysis_data['sufficient_trades'] == True)
        ].copy()
        
        print(f"Total Models: {len(analysis_data)} | Significant: {len(significant_models)}")
        
        excellent = significant_models[significant_models['total_return'] > 0].copy()
        excellent['grade'] = excellent.apply(lambda x: 
            'A+' if (x['annual_return_pct'] > 20 and x['sharpe_ratio'] >= 3.0) else
            'A'  if (x['annual_return_pct'] > 15 and x['sharpe_ratio'] >= 2.0) else
            'B+' if (x['annual_return_pct'] > 10 and x['sharpe_ratio'] >= 1.5) else
            'B'  if (x['annual_return_pct'] > 5 and x['sharpe_ratio'] >= 1.0) else 'C', axis=1)

        
        top_10 = excellent.nlargest(10, 'total_return')
        
        print(f"{'Rank':<6} {'Model':<22} {'Grade':<7} {'Profit(원)':<13} {'Return%':<9} {'Annual%':<9} {'Sharpe':<8} {'MaxDD%':<8} {'WinRate':<9} {'PF':<6} {'Exp':<8} {'Trades':<8} {'t-stat':<8} {'Acc%':<7}")
        print(f"{'-'*140}")
        
        for i, (idx, row) in enumerate(top_10.iterrows(), 1):
            print(f"{i:<6} {row['model']:<22} {row['grade']:<7} "
                  f"{row['total_return']:>10,.0f}  "
                  f"{row['total_return_pct']:>7.1f}%  "
                  f"{row['annual_return_pct']:>7.1f}%  "
                  f"{row['sharpe_ratio']:>6.2f}  "
                  f"{row['max_drawdown_pct']:>6.1f}%  "
                  f"{row['win_rate']:>7.1f}%  "
                  f"{row['profit_factor']:>4.2f}  "
                  f"{row['expectancy']:>6.2f}  "
                  f"{int(row['num_trades']):>6}  "
                  f"{row['t_statistic']:>7.2f}  "
                  f"{row['base_accuracy']:>5.1f}%")
        
        print(f"{'='*100}")
    
    return results_df

def print_fold_summary(data_dir, model_name, strategy_type, output_dir):
    file_path = os.path.join(data_dir, f"{model_name}_all_folds.csv")
    if not os.path.exists(file_path):
        return None
    
    df = pd.read_csv(file_path)
    df['date'] = pd.to_datetime(df['date'])
    df = df.sort_values('date').reset_index(drop=True)
    
    if 'fold' not in df.columns:
        return None
    
    print(f"{'-'*80}")
    print(f"FOLD ANALYSIS: {model_name} | STRATEGY: {strategy_type.upper()}")
    print(f"{'-'*80}")
    
    fold_results = []
    
    for fold in sorted(df['fold'].unique()):
        fold_data = df[df['fold'] == fold].copy()
        
        backtester = EnhancedCryptoBacktester(
            initial_capital=10000000,  
            trading_fee=0.0005,         
            slippage=0.0015,          
            min_order_size=5000,        
            risk_free_rate=0.0,
            min_trades_threshold=30
        )

        equity_curve, trades, daily_returns = backtester.backtest_strategy(
            fold_data, confidence_threshold=0.1, strategy_type=strategy_type
        )
        
        metrics = backtester.calculate_performance_metrics(equity_curve, trades, daily_returns)
        
        status = 'VERY_SIG' if metrics['is_very_significant'] else 'SIG' if metrics['is_significant'] else 'NOT_SIG'
        
        print(f"Fold {fold} | {fold_data['date'].min().date()} to {fold_data['date'].max().date()}")
        print(f"  {len(fold_data)} days | {int(metrics['num_trades'])} trades | "
              f"\{metrics['total_return']:>8,.0f} ({metrics['total_return_pct']:>5.1f}%) | "
              f"Sharpe {metrics['sharpe_ratio']:>5.2f} | Win {metrics['win_rate']:>5.1f}% | "
              f"{status} (t={metrics['t_statistic']:.2f})")
        
        fold_result = {
            'fold': fold,
            'date_start': fold_data['date'].min(),
            'date_end': fold_data['date'].max(),
            'num_days': len(fold_data),
            'accuracy': fold_data['correct'].mean() * 100,
            **metrics
        }
        fold_results.append(fold_result)
    
    if len(fold_results) > 1:
        returns = np.array([m['annual_return_pct'] for m in fold_results])
        mean_return = np.mean(returns)
        std_return = np.std(returns)
        consistency = (1 / (1 + abs(std_return / mean_return) if mean_return != 0 else 1)) * 100
        
        print(f"{'-'*80}")
        print(f"Walk-Forward Efficiency: {consistency:.1f}% | Avg Annual Return: {mean_return:.1f}% (±{std_return:.1f}%)")
        print(f"{'-'*80}")
    
    return fold_results

result = run_comprehensive_backtest(data_dir, output_dir)
if result is not None:
    results_df, used_output_dir = result
    print(f"\nAll results saved to: {used_output_dir}")
    print_summary_dashboard(results_df, used_output_dir)
    
    for strategy_type in ['kelly', 'aggressive', 'neutral', 'conservative', 'ultra_aggressive', 
                          'fixed_50', 'fixed_01', 'volatility_scaled', 'inverse_volatility', 
                          'trend_following', 'momentum', 'reinforcement_learning']:
        top_models = results_df[
            (results_df['confidence_threshold'] == 0.1) & 
            (results_df['strategy_type'] == strategy_type)
        ].nlargest(3, 'total_return')
        
        print(f"{'='*100}")
        print(f"TOP 3 MODELS FOLD-BY-FOLD ANALYSIS ({strategy_type.upper()})")
        print(f"{'='*100}")
        
        for idx, row in top_models.iterrows():
            print_fold_summary(data_dir, row['model'], strategy_type, used_output_dir)


Data directory: ../model_results/2025-10-24/predictions/direction_walk_forward
Output directory: ../model_results/2025-10-24/backtest_results
Processing 26 models...
Confidence thresholds: [0.0, 0.05, 0.1, 0.15, 0.2]
Strategy types: ['kelly', 'aggressive', 'neutral', 'conservative', 'ultra_aggressive', 'fixed_50', 'fixed_01', 'volatility_scaled', 'inverse_volatility', 'trend_following', 'momentum', 'reinforcement_learning']
Results saved: ../model_results/2025-10-24/backtest_results/backtest_results.csv

All results saved to: ../model_results/2025-10-24/backtest_results
STRATEGY: KELLY
Total Models: 26 | Significant: 14
Rank   Model                  Grade   Profit(원)     Return%   Annual%   Sharpe   MaxDD%   WinRate   PF     Exp      Trades   t-stat   Acc%   
--------------------------------------------------------------------------------------------------------------------------------------------
1      DTW_LSTM               A        5,631,134     56.3%     19.8%    3.96     3.1%    

Fold 1 | 2022-10-20 to 2022-12-18
  60 days | 43 trades | \ 559,494 (  5.6%) | Sharpe  3.74 | Win  53.5% | NOT_SIG (t=1.54)
Fold 2 | 2023-02-17 to 2023-04-17
  60 days | 9 trades | \ 108,511 (  1.1%) | Sharpe  4.97 | Win  55.6% | NOT_SIG (t=nan)
Fold 3 | 2023-06-17 to 2023-08-15
  60 days | 27 trades | \ -39,151 ( -0.4%) | Sharpe -1.47 | Win  25.9% | NOT_SIG (t=nan)
Fold 4 | 2023-10-15 to 2023-12-13
  60 days | 19 trades | \ 108,354 (  1.1%) | Sharpe  3.88 | Win  57.9% | NOT_SIG (t=nan)
Fold 5 | 2024-02-12 to 2024-04-11
  60 days | 30 trades | \ 727,245 (  7.3%) | Sharpe  7.21 | Win  66.7% | SIG (t=2.49)
Fold 6 | 2024-06-11 to 2024-08-09
  60 days | 43 trades | \ 134,983 (  1.3%) | Sharpe  2.06 | Win  53.5% | NOT_SIG (t=0.85)
Fold 7 | 2025-01-31 to 2025-10-19
  262 days | 158 trades | \3,101,990 ( 31.0%) | Sharpe  4.65 | Win  56.3% | VERY_SIG (t=3.68)
--------------------------------------------------------------------------------
Walk-Forward Efficiency: 52.2% | Avg Annual Return: 14.

Fold 7 | 2025-01-01 to 2025-10-19
  292 days | 211 trades | \2,473,176 ( 24.7%) | Sharpe  3.83 | Win  55.0% | VERY_SIG (t=3.51)
--------------------------------------------------------------------------------
Walk-Forward Efficiency: 62.2% | Avg Annual Return: 11.3% (±6.9%)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
FOLD ANALYSIS: Bagging | STRATEGY: NEUTRAL
--------------------------------------------------------------------------------
Fold 1 | 2022-09-20 to 2022-12-18
  90 days | 47 trades | \ 529,948 (  5.3%) | Sharpe  5.43 | Win  68.1% | SIG (t=2.34)
Fold 2 | 2023-01-18 to 2023-04-17
  90 days | 64 trades | \ 242,874 (  2.4%) | Sharpe  2.44 | Win  53.1% | NOT_SIG (t=1.23)
Fold 3 | 2023-05-18 to 2023-08-15
  90 days | 70 trades | \ 322,340 (  3.2%) | Sharpe  4.11 | Win  61.4% | SIG (t=2.16)
Fold 4 | 2023-09-15 to 2023-12-13
  90 days | 73 trades | \ 347,696 (  3.5%

Fold 7 | 2025-01-01 to 2025-10-19
  292 days | 241 trades | \22,546,590 (225.5%) | Sharpe  2.77 | Win  52.7% | SIG (t=2.71)
--------------------------------------------------------------------------------
Walk-Forward Efficiency: 83.8% | Avg Annual Return: 144.8% (±28.0%)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
FOLD ANALYSIS: Bagging | STRATEGY: ULTRA_AGGRESSIVE
--------------------------------------------------------------------------------
Fold 1 | 2022-09-20 to 2022-12-18
  90 days | 47 trades | \4,921,402 ( 49.2%) | Sharpe  5.22 | Win  68.1% | SIG (t=2.25)
Fold 2 | 2023-01-18 to 2023-04-17
  90 days | 64 trades | \1,971,088 ( 19.7%) | Sharpe  2.27 | Win  53.1% | NOT_SIG (t=1.14)
Fold 3 | 2023-05-18 to 2023-08-15
  90 days | 70 trades | \2,847,408 ( 28.5%) | Sharpe  3.92 | Win  61.4% | SIG (t=2.07)
Fold 4 | 2023-09-15 to 2023-12-13
  90 days | 73 trades | \2,959,

Fold 7 | 2025-01-01 to 2025-10-19
  292 days | 241 trades | \ 162,832 (  1.6%) | Sharpe  2.77 | Win  52.7% | SIG (t=2.71)
--------------------------------------------------------------------------------
Walk-Forward Efficiency: 87.4% | Avg Annual Return: 1.2% (±0.2%)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
FOLD ANALYSIS: Bagging | STRATEGY: FIXED_01
--------------------------------------------------------------------------------
Fold 1 | 2022-09-20 to 2022-12-18
  90 days | 47 trades | \  52,372 (  0.5%) | Sharpe  5.22 | Win  68.1% | SIG (t=2.25)
Fold 2 | 2023-01-18 to 2023-04-17
  90 days | 64 trades | \  24,302 (  0.2%) | Sharpe  2.27 | Win  53.1% | NOT_SIG (t=1.14)
Fold 3 | 2023-05-18 to 2023-08-15
  90 days | 70 trades | \  32,376 (  0.3%) | Sharpe  3.92 | Win  61.4% | SIG (t=2.07)
Fold 4 | 2023-09-15 to 2023-12-13
  90 days | 73 trades | \  34,039 (  0.3%) | Sh

Fold 1 | 2022-09-20 to 2022-12-18
  90 days | 65 trades | \1,312,571 ( 13.1%) | Sharpe  2.33 | Win  61.5% | NOT_SIG (t=1.18)
Fold 2 | 2023-01-18 to 2023-04-17
  90 days | 67 trades | \ 192,444 (  1.9%) | Sharpe  0.61 | Win  50.7% | NOT_SIG (t=0.32)
Fold 3 | 2023-05-18 to 2023-08-15
  90 days | 1 trades | \-10,000,000 (-100.0%) | Sharpe  0.00 | Win   0.0% | NOT_SIG (t=nan)
Fold 4 | 2023-09-15 to 2023-12-13
  90 days | 1 trades | \-10,000,000 (-100.0%) | Sharpe  0.00 | Win   0.0% | NOT_SIG (t=nan)
Fold 5 | 2024-01-13 to 2024-04-11
  90 days | 1 trades | \-10,000,000 (-100.0%) | Sharpe  0.00 | Win   0.0% | NOT_SIG (t=nan)
Fold 6 | 2024-05-12 to 2024-08-09
  90 days | 1 trades | \-10,000,000 (-100.0%) | Sharpe  0.00 | Win   0.0% | NOT_SIG (t=nan)
Fold 7 | 2025-01-01 to 2025-10-19
  292 days | 1 trades | \-10,000,000 (-100.0%) | Sharpe  0.00 | Win   0.0% | NOT_SIG (t=nan)
--------------------------------------------------------------------------------
Walk-Forward Efficiency: 31.9% | Avg An

Fold 7 | 2025-01-01 to 2025-10-19
  292 days | 256 trades | \2,753,155 ( 27.5%) | Sharpe  2.27 | Win  52.7% | SIG (t=2.28)
--------------------------------------------------------------------------------
Walk-Forward Efficiency: 70.1% | Avg Annual Return: 17.2% (±7.4%)
--------------------------------------------------------------------------------
TOP 3 MODELS FOLD-BY-FOLD ANALYSIS (REINFORCEMENT_LEARNING)
--------------------------------------------------------------------------------
FOLD ANALYSIS: XGBoost | STRATEGY: REINFORCEMENT_LEARNING
--------------------------------------------------------------------------------
Fold 1 | 2022-09-20 to 2022-12-18
  90 days | 65 trades | \ 714,167 (  7.1%) | Sharpe  2.32 | Win  61.5% | NOT_SIG (t=1.18)
Fold 2 | 2023-01-18 to 2023-04-17
  90 days | 67 trades | \  17,725 (  0.2%) | Sharpe  0.20 | Win  50.7% | NOT_SIG (t=0.10)
Fold 3 | 2023-05-18 to 2023-08-15
  90 days | 72 trades | \ 258,913 (  2.6%) | Sharpe  2.51 | Win  56.9% | NOT_SIG (t=1.3

In [5]:
import pandas as pd
import numpy as np
import os
from scipy import stats
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

timestamp = datetime.now().strftime("%Y-%m-%d")
RESULT_DIR = os.path.join("../model_results", timestamp)
data_dir = os.path.join(RESULT_DIR, "predictions", "direction_walk_forward")
output_dir = os.path.join(RESULT_DIR, "backtest_results")

# ==================== REGIME 분류 ====================
REGIME_MAP = {
    1: "bear_crash",     # 급락장 - FTX 파산
    2: "bull_steady",    # 급등장 - Shapella
    3: "sideways",       # 횡보장 - Summer
    4: "bull_steady",    # 급등장 - BTC ETF 기대
    5: "bull_strong",    # 급등장 - BTC ETF 승인
    6: "bear_volatile"   # 급락장 - 조정기
}

def validate_dataframe(df, model_name):
    required_cols = ['date', 'pred_direction', 'actual_return', 'correct', 'confidence', 'max_proba']
    missing = [col for col in required_cols if col not in df.columns]
    if missing:
        print(f"{model_name}: Missing columns {missing}")
        return False
    if len(df) < 10:
        print(f"{model_name}: Insufficient data ({len(df)} rows)")
        return False
    if df['actual_return'].isna().all() or df['pred_direction'].isna().all():
        print(f"{model_name}: Critical columns contain all NaN values")
        return False
    return True

class ReinforcementLearningPositionSizer:
    def __init__(self, learning_rate=0.1, epsilon=0.2, gamma=0.9):
        self.lr = learning_rate
        self.epsilon = epsilon
        self.gamma = gamma
        self.q_table = {}
        self.actions = [0.01, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3]
        
    def get_state(self, recent_returns, consecutive_wins, consecutive_losses, capital_ratio):
        recent_trend = 'up' if np.mean(recent_returns[-5:]) > 0 else 'down'
        momentum = 'strong' if consecutive_wins >= 3 else 'weak' if consecutive_losses >= 3 else 'neutral'
        capital_state = 'high' if capital_ratio > 1.2 else 'low' if capital_ratio < 0.9 else 'normal'
        return (recent_trend, momentum, capital_state)
    
    def choose_action(self, state):
        if state not in self.q_table:
            self.q_table[state] = {a: 0.0 for a in self.actions}
        
        if np.random.random() < self.epsilon:
            return np.random.choice(self.actions)
        else:
            return max(self.q_table[state], key=self.q_table[state].get)
    
    def update_q_value(self, state, action, reward, next_state):
        if state not in self.q_table:
            self.q_table[state] = {a: 0.0 for a in self.actions}
        if next_state not in self.q_table:
            self.q_table[next_state] = {a: 0.0 for a in self.actions}
        
        current_q = self.q_table[state][action]
        max_next_q = max(self.q_table[next_state].values())
        new_q = current_q + self.lr * (reward + self.gamma * max_next_q - current_q)
        self.q_table[state][action] = new_q

class EnhancedCryptoBacktester:
    def __init__(self, initial_capital=10000000, trading_fee=0.0005, slippage=0.0015, 
                 risk_free_rate=0.0, min_trades_threshold=30, min_order_size=5000):
        self.initial_capital = float(initial_capital)
        self.trading_fee = float(trading_fee)
        self.base_slippage = float(slippage)
        self.risk_free_rate = float(risk_free_rate)
        self.min_trades = min_trades_threshold
        self.min_order = min_order_size
        self.total_cost_rate = (self.trading_fee + self.base_slippage) * 2

    def calculate_kelly_fraction(self, profits):
        if len(profits) < 10:
            return 0.05
        profits = np.array(profits)
        wins = profits[profits > 0]
        losses = profits[profits < 0]
        if len(wins) == 0 or len(losses) == 0 or np.mean(losses) == 0:
            return 0.05
        win_rate = len(wins) / len(profits)
        avg_win = np.mean(wins)
        avg_loss = np.abs(np.mean(losses))
        win_loss_ratio = avg_win / avg_loss
        kelly = (win_rate * win_loss_ratio - (1 - win_rate)) / win_loss_ratio
        kelly_fraction = np.clip(kelly * 0.5, 0.01, 0.2)
        return kelly_fraction if not np.isnan(kelly_fraction) else 0.01

    def calculate_regime_adaptive_fraction(self, df, i, strategy_subtype='balanced'):
        """Fold 기반 Regime 감지 + 동적 포지션 조절"""
        if 'fold' not in df.columns:
            return 0.15
        
        fold = int(df.iloc[i]['fold']) if i < len(df) else 1
        regime = REGIME_MAP.get(fold, "sideways")
        confidence = float(df.iloc[i]['confidence']) if i < len(df) else 0.5
        
        # Regime별 기본 포지션
        if strategy_subtype == 'balanced':
            regime_fractions = {
                "bear_crash": 0.05, "bear_volatile": 0.08, "sideways": 0.15,
                "bull_steady": 0.30, "bull_strong": 0.40
            }
        elif strategy_subtype == 'defensive':
            regime_fractions = {
                "bear_crash": 0.03, "bear_volatile": 0.05, "sideways": 0.10,
                "bull_steady": 0.20, "bull_strong": 0.25
            }
        else:  # aggressive
            regime_fractions = {
                "bear_crash": 0.10, "bear_volatile": 0.15, "sideways": 0.25,
                "bull_steady": 0.50, "bull_strong": 0.70
            }
        
        base_fraction = regime_fractions.get(regime, 0.15)
        
        # Confidence 조정
        multiplier = 1.2 if confidence > 0.20 else 1.1 if confidence > 0.15 else 0.7 if confidence < 0.08 else 1.0
        
        # 최근 성과 확인
        if i >= 10:
            recent_returns = df.iloc[max(0, i-10):i]['actual_return'].values
            if np.mean(recent_returns) < -0.01:
                multiplier *= 0.8
        
        final_fraction = base_fraction * multiplier
        
        # 안전 범위
        if regime in ["bear_crash", "bear_volatile"]:
            return np.clip(final_fraction, 0.01, 0.12)
        else:
            return np.clip(final_fraction, 0.01, 0.8)

    def calculate_position_fractions_vectorized(self, df, strategy_type, kelly_lookback=50, 
                                                vol_lookback=20, mom_lookback=10):
        n = len(df)
        fractions = np.zeros(n)

        if strategy_type == 'aggressive':
            fractions[:] = 0.3
        elif strategy_type == 'neutral':
            fractions = np.clip(df['max_proba'].values, 0, 1.0) * 0.15
        elif strategy_type == 'conservative':
            fractions[:] = 0.05
        elif strategy_type == 'ultra_aggressive':
            fractions[:] = 0.8
        elif strategy_type == 'fixed_50':
            fractions[:] = 0.5
        elif strategy_type == 'fixed_01':
            fractions[:] = 0.01
        elif strategy_type in ['volatility_scaled', 'inverse_volatility']:
            returns = df['actual_return'].values
            vol = pd.Series(returns).rolling(vol_lookback, min_periods=1).std().values
            vol = np.where(vol == 0, 0.01, vol)
            if strategy_type == 'volatility_scaled':
                fractions = np.clip(0.1 / (vol + 1e-6), 0.01, 0.3)
            else:
                fractions = np.clip(0.2 / (vol + 1e-6), 0.01, 0.3)
        elif strategy_type in ['trend_following', 'momentum']:
            returns = df['actual_return'].values
            rolling_sum = pd.Series(returns).rolling(mom_lookback, min_periods=1).sum().values
            if strategy_type == 'trend_following':
                fractions = np.where(rolling_sum > 0, 0.3, np.where(rolling_sum < 0, 0.05, 0.1))
            else:
                fractions = np.clip(0.1 + 0.2 * (rolling_sum / (mom_lookback * 0.01)), 0.01, 0.3)
        elif strategy_type == 'reinforcement_learning':
            fractions[:] = 0.1
        elif strategy_type == 'regime_adaptive_balanced':
            fractions = np.array([self.calculate_regime_adaptive_fraction(df, i, 'balanced') for i in range(n)])
        elif strategy_type == 'regime_adaptive_defensive':
            fractions = np.array([self.calculate_regime_adaptive_fraction(df, i, 'defensive') for i in range(n)])
        elif strategy_type == 'regime_adaptive_aggressive':
            fractions = np.array([self.calculate_regime_adaptive_fraction(df, i, 'aggressive') for i in range(n)])
        elif strategy_type == 'kelly':
            fractions[:] = 0.1
        else:
            fractions[:] = 0.1

        return np.clip(fractions, 0.01, 0.9)

    def backtest_strategy(self, df, confidence_threshold=0.0, strategy_type='kelly', 
                         kelly_lookback=50, vol_lookback=20, mom_lookback=10):
        df = df.copy().sort_values('date').reset_index(drop=True)
        n = len(df)

        valid_mask = (df['confidence'].values >= confidence_threshold)
        pred_direction = df['pred_direction'].values.astype(int)
        actual_return = df['actual_return'].values.astype(float)

        if strategy_type == 'reinforcement_learning':
            rl_agent = ReinforcementLearningPositionSizer(
                learning_rate=0.1, epsilon=0.15, gamma=0.9
            )
            use_rl = True
        else:
            position_fractions = self.calculate_position_fractions_vectorized(
                df, strategy_type, kelly_lookback, vol_lookback, mom_lookback
            )
            use_rl = False

        capital = self.initial_capital
        equity_curve = np.zeros(n)
        daily_returns = np.zeros(n)
        trades = []
        recent_profits = []
        consecutive_wins = 0
        consecutive_losses = 0

        for i in range(n):
            if capital < self.min_order:
                equity_curve[i] = capital
                continue
                
            if not valid_mask[i]:
                equity_curve[i] = capital
                continue

            if use_rl:
                recent_returns = actual_return[max(0, i-10):i] if i > 0 else [0]
                capital_ratio = capital / self.initial_capital
                state = rl_agent.get_state(recent_returns, consecutive_wins, consecutive_losses, capital_ratio)
                position_fraction = rl_agent.choose_action(state)
            elif strategy_type == 'kelly' and len(recent_profits) >= 20:
                position_fraction = self.calculate_kelly_fraction(recent_profits[-kelly_lookback:])
            else:
                position_fraction = position_fractions[i]

            position_size = capital * position_fraction
            
            if position_size < self.min_order:
                equity_curve[i] = capital
                continue

            if i >= 20:
                recent_returns_vol = actual_return[i-20:i]
                volatility = np.std(recent_returns_vol)
                if volatility > 0.03:
                    dynamic_slippage = self.base_slippage * 2
                elif volatility > 0.02:
                    dynamic_slippage = self.base_slippage * 1.5
                else:
                    dynamic_slippage = self.base_slippage
                trading_cost = position_size * ((self.trading_fee + dynamic_slippage) * 2)
            else:
                trading_cost = position_size * self.total_cost_rate

            direction = pred_direction[i]
            ret = actual_return[i]

            gross_return = ret if direction == 1 else -ret
            gross_profit = position_size * gross_return
            net_profit = gross_profit - trading_cost

            prev_capital = capital
            capital = max(0, capital + net_profit)

            equity_curve[i] = capital
            daily_returns[i] = (capital - prev_capital) / prev_capital if prev_capital > 0 else 0

            if use_rl and i > 0:
                reward = net_profit / position_size if position_size > 0 else 0
                next_recent_returns = actual_return[max(0, i-9):i+1]
                next_capital_ratio = capital / self.initial_capital
                next_state = rl_agent.get_state(next_recent_returns, consecutive_wins, consecutive_losses, next_capital_ratio)
                rl_agent.update_q_value(state, position_fraction, reward, next_state)

            recent_profits.append(net_profit)

            if net_profit > 0:
                consecutive_wins += 1
                consecutive_losses = 0
            else:
                consecutive_losses += 1
                consecutive_wins = 0

            trades.append({
                'date': df.iloc[i]['date'],
                'pred_direction': direction,
                'actual_return': ret,
                'position_fraction': position_fraction,
                'position_size': position_size,
                'gross_profit': gross_profit,
                'trading_cost': trading_cost,
                'net_profit': net_profit,
                'capital': capital,
                'correct': bool(df.iloc[i]['correct']),
                'confidence': float(df.iloc[i]['confidence']),
                'max_proba': float(df.iloc[i]['max_proba'])
            })

            if capital <= 0:
                equity_curve[i+1:] = 0
                break

        return equity_curve, trades, daily_returns
    
    def calculate_statistical_significance(self, trades, daily_returns):
        if len(trades) < self.min_trades:
            return {
                'sufficient_trades': False,
                'num_trades': len(trades),
                't_statistic': np.nan,
                'p_value': np.nan,
                'is_significant': False,
                'is_very_significant': False,
                'confidence_95_lower': np.nan,
                'confidence_95_upper': np.nan,
                'warning': f'Insufficient trades: {len(trades)} < {self.min_trades}'
            }
        
        returns_array = np.array(daily_returns)
        returns_array = returns_array[returns_array != 0]
        
        if len(returns_array) < 2:
            return {
                'sufficient_trades': True,
                'num_trades': len(trades),
                't_statistic': np.nan,
                'p_value': np.nan,
                'is_significant': False,
                'is_very_significant': False,
                'confidence_95_lower': np.nan,
                'confidence_95_upper': np.nan,
                'warning': 'Insufficient return data'
            }
        
        t_stat, p_value = stats.ttest_1samp(returns_array, 0)
        mean_return = np.mean(returns_array)
        std_error = stats.sem(returns_array)
        confidence_interval = stats.t.interval(
            0.95, len(returns_array)-1, loc=mean_return, scale=std_error
        )
        
        is_very_significant = abs(t_stat) > 3.0 and p_value < 0.05
        is_significant = abs(t_stat) > 2.0 and p_value < 0.05
        
        return {
            'sufficient_trades': True,
            'num_trades': len(trades),
            't_statistic': float(t_stat),
            'p_value': float(p_value),
            'is_significant': is_significant,
            'is_very_significant': is_very_significant,
            'confidence_95_lower': float(confidence_interval[0]),
            'confidence_95_upper': float(confidence_interval[1]),
            'warning': None
        }

    def calculate_performance_metrics(self, equity_curve, trades, daily_returns):
        if len(trades) == 0:
            return self._empty_metrics()
        
        final_capital = equity_curve[-1] if len(equity_curve) > 0 else self.initial_capital
        total_return = final_capital - self.initial_capital
        total_return_pct = (total_return / self.initial_capital) * 100
        
        profits = np.array([t['net_profit'] for t in trades])
        winning_trades = profits[profits > 0]
        losing_trades = profits[profits < 0]
        
        num_trades = len(trades)
        num_wins = len(winning_trades)
        num_losses = len(losing_trades)
        win_rate = (num_wins / num_trades * 100) if num_trades > 0 else 0
        
        avg_profit_per_trade = np.mean(profits)
        total_wins = np.sum(winning_trades) if num_wins > 0 else 0
        total_losses = np.abs(np.sum(losing_trades)) if num_losses > 0 else 0
        profit_factor = (total_wins / total_losses) if total_losses > 0 else 0
        
        avg_win = np.mean(winning_trades) if num_wins > 0 else 0
        avg_loss = np.abs(np.mean(losing_trades)) if num_losses > 0 else 0
        
        if avg_loss > 0:
            expectancy = (win_rate/100 * avg_win) - ((100-win_rate)/100 * avg_loss)
        else:
            expectancy = avg_profit_per_trade
        
        equity_array = np.array(equity_curve)
        running_max = np.maximum.accumulate(equity_array)
        drawdown_array = equity_array - running_max
        drawdown_pct_array = np.divide(drawdown_array, running_max, 
                                       out=np.zeros_like(drawdown_array), 
                                       where=running_max!=0) * 100
        
        max_drawdown_abs = np.min(drawdown_array)
        max_drawdown_pct = np.abs(np.min(drawdown_pct_array))
        
        returns_array = np.array(daily_returns)
        returns_array = returns_array[returns_array != 0]
        
        if len(returns_array) > 1 and np.std(returns_array) > 0:
            mean_return = np.mean(returns_array)
            std_return = np.std(returns_array, ddof=1)
            annual_mean = mean_return * 252
            annual_std = std_return * np.sqrt(252)
            sharpe_ratio = (annual_mean - self.risk_free_rate) / annual_std if annual_std > 0 else 0
        else:
            sharpe_ratio = 0.0
        
        downside_returns = returns_array[returns_array < 0]
        if len(downside_returns) > 1:
            downside_std = np.std(downside_returns, ddof=1) * np.sqrt(252)
            sortino_ratio = ((np.mean(returns_array) * 252 - self.risk_free_rate) / downside_std) if downside_std > 0 else 0
        else:
            sortino_ratio = 0.0
        
        num_days = len(equity_curve)
        years = num_days / 252
        
        if years > 0 and final_capital > 0 and self.initial_capital > 0:
            annual_return_pct = (np.power(final_capital / self.initial_capital, 1/years) - 1) * 100
        else:
            annual_return_pct = 0.0
        
        calmar_ratio = (annual_return_pct / max_drawdown_pct) if max_drawdown_pct > 0 else 0
        recovery_factor = (total_return / abs(max_drawdown_abs)) if max_drawdown_abs < 0 else 0
        
        consecutive_wins = 0
        consecutive_losses = 0
        max_consecutive_wins = 0
        max_consecutive_losses = 0
        
        for profit in profits:
            if profit > 0:
                consecutive_wins += 1
                consecutive_losses = 0
                max_consecutive_wins = max(max_consecutive_wins, consecutive_wins)
            else:
                consecutive_losses += 1
                consecutive_wins = 0
                max_consecutive_losses = max(max_consecutive_losses, consecutive_losses)
        
        significance = self.calculate_statistical_significance(trades, daily_returns)
        
        return {
            'initial_capital': self.initial_capital,
            'final_capital': final_capital,
            'total_return': total_return,
            'total_return_pct': total_return_pct,
            'annual_return_pct': annual_return_pct,
            'num_trades': num_trades,
            'num_wins': num_wins,
            'num_losses': num_losses,
            'win_rate': win_rate,
            'avg_profit_per_trade': avg_profit_per_trade,
            'avg_win': avg_win,
            'avg_loss': avg_loss,
            'expectancy': expectancy,
            'profit_factor': profit_factor,
            'max_drawdown': max_drawdown_abs,
            'max_drawdown_pct': max_drawdown_pct,
            'sharpe_ratio': sharpe_ratio,
            'sortino_ratio': sortino_ratio,
            'calmar_ratio': calmar_ratio,
            'recovery_factor': recovery_factor,
            'max_consecutive_wins': max_consecutive_wins,
            'max_consecutive_losses': max_consecutive_losses,
            'num_days': num_days,
            'years': years,
            'sufficient_trades': significance['sufficient_trades'],
            't_statistic': significance['t_statistic'],
            'p_value': significance['p_value'],
            'is_significant': significance['is_significant'],
            'is_very_significant': significance.get('is_very_significant', False),
            'confidence_95_lower': significance['confidence_95_lower'],
            'confidence_95_upper': significance['confidence_95_upper'],
            'stat_warning': significance['warning']
        }

    def _empty_metrics(self):
        return {
            'initial_capital': self.initial_capital,
            'final_capital': self.initial_capital,
            'total_return': 0.0,
            'total_return_pct': 0.0,
            'annual_return_pct': 0.0,
            'num_trades': 0,
            'num_wins': 0,
            'num_losses': 0,
            'win_rate': 0.0,
            'avg_profit_per_trade': 0.0,
            'avg_win': 0.0,
            'avg_loss': 0.0,
            'expectancy': 0.0,
            'profit_factor': 0.0,
            'max_drawdown': 0.0,
            'max_drawdown_pct': 0.0,
            'sharpe_ratio': 0.0,
            'sortino_ratio': 0.0,
            'calmar_ratio': 0.0,
            'recovery_factor': 0.0,
            'max_consecutive_wins': 0,
            'max_consecutive_losses': 0,
            'num_days': 0,
            'years': 0.0,
            'sufficient_trades': False,
            't_statistic': np.nan,
            'p_value': np.nan,
            'is_significant': False,
            'is_very_significant': False,
            'confidence_95_lower': np.nan,
            'confidence_95_upper': np.nan,
            'stat_warning': 'No trades'
        }

def run_comprehensive_backtest(data_dir, output_dir, 
                               confidence_thresholds=None, strategy_types=None):
    os.makedirs(output_dir, exist_ok=True)
    
    if not os.path.exists(data_dir):
        print(f"ERROR: Directory not found: {data_dir}")
        return None
    
    csv_files = [f for f in os.listdir(data_dir) if f.endswith('_all_folds.csv')]
    if len(csv_files) == 0:
        print(f"ERROR: No CSV files found in {data_dir}")
        return None
    
    if confidence_thresholds is None:
        confidence_thresholds = [0.0, 0.05, 0.1, 0.15, 0.2]
    
    if strategy_types is None:
        strategy_types = [
            'kelly', 'aggressive', 'neutral', 'conservative',
            'ultra_aggressive', 'fixed_50', 'fixed_01',
            'volatility_scaled', 'inverse_volatility',
            'trend_following', 'momentum', 'reinforcement_learning',
            'regime_adaptive_balanced',   
            'regime_adaptive_defensive', 
            'regime_adaptive_aggressive' 
        ]
    
    print(f"Data directory: {data_dir}")
    print(f"Output directory: {output_dir}")
    print(f"Processing {len(csv_files)} models...")
    print(f"Confidence thresholds: {confidence_thresholds}")
    print(f"Strategy types: {strategy_types}")
    
    all_results = []
    failed_models = []
    
    for csv_file in sorted(csv_files):
        model_name = csv_file.replace('_all_folds.csv', '')
        
        try:
            file_path = os.path.join(data_dir, csv_file)
            df = pd.read_csv(file_path)
            
            if not validate_dataframe(df, model_name):
                failed_models.append(model_name)
                continue
            
            df['date'] = pd.to_datetime(df['date'])
            df = df.sort_values('date').reset_index(drop=True)
            
            base_accuracy = df['correct'].mean() * 100
            date_start = df['date'].min()
            date_end = df['date'].max()
            total_predictions = len(df)
            
            for strategy_type in strategy_types:
                for threshold in confidence_thresholds:
                    backtester = EnhancedCryptoBacktester(
                        initial_capital=10000000,  
                        trading_fee=0.0005,         
                        slippage=0.0015,          
                        min_order_size=5000,        
                        risk_free_rate=0.0,
                        min_trades_threshold=30
                    )
                    
                    equity_curve, trades, daily_returns = backtester.backtest_strategy(
                        df, 
                        confidence_threshold=threshold,
                        strategy_type=strategy_type,
                        kelly_lookback=50
                    )
                    
                    metrics = backtester.calculate_performance_metrics(
                        equity_curve, trades, daily_returns
                    )
                    
                    result = {
                        'model': model_name,
                        'strategy_type': strategy_type,
                        'confidence_threshold': threshold,
                        'total_predictions': total_predictions,
                        'base_accuracy': base_accuracy,
                        'date_start': date_start,
                        'date_end': date_end,
                        **metrics
                    }
                    all_results.append(result)
        
        except Exception as e:
            print(f"{model_name} failed: {e}")
            failed_models.append(model_name)
            continue
    
    if failed_models:
        print(f"Failed models: {', '.join(failed_models)}")
    
    if not all_results:
        print("ERROR: No successful backtests")
        return None
    
    results_df = pd.DataFrame(all_results)
    results_file = os.path.join(output_dir, 'backtest_results.csv')
    results_df.to_csv(results_file, index=False)
    print(f"Results saved: {results_file}")
    
    return results_df, output_dir

def print_summary_dashboard(results_df, output_dir):
    for strategy_type in results_df['strategy_type'].unique():
        print(f"{'='*100}")
        print(f"STRATEGY: {strategy_type.upper()}")
        print(f"{'='*100}")
        
        analysis_data = results_df[
            (results_df['confidence_threshold'] == 0.1) & 
            (results_df['strategy_type'] == strategy_type)
        ].copy()
        
        significant_models = analysis_data[
            (analysis_data['is_significant'] == True) & 
            (analysis_data['sufficient_trades'] == True)
        ].copy()
        
        print(f"Total Models: {len(analysis_data)} | Significant: {len(significant_models)}")
        
        excellent = significant_models[significant_models['total_return'] > 0].copy()
        excellent['grade'] = excellent.apply(lambda x: 
            'A+' if (x['annual_return_pct'] > 20 and x['sharpe_ratio'] >= 3.0) else
            'A'  if (x['annual_return_pct'] > 15 and x['sharpe_ratio'] >= 2.0) else
            'B+' if (x['annual_return_pct'] > 10 and x['sharpe_ratio'] >= 1.5) else
            'B'  if (x['annual_return_pct'] > 5 and x['sharpe_ratio'] >= 1.0) else 'C', axis=1)

        
        top_10 = excellent.nlargest(10, 'total_return')
        
        print(f"{'Rank':<6} {'Model':<22} {'Grade':<7} {'Profit(원)':<13} {'Return%':<9} {'Annual%':<9} {'Sharpe':<8} {'MaxDD%':<8} {'WinRate':<9} {'PF':<6} {'Exp':<8} {'Trades':<8} {'t-stat':<8} {'Acc%':<7}")
        print(f"{'-'*140}")
        
        for i, (idx, row) in enumerate(top_10.iterrows(), 1):
            print(f"{i:<6} {row['model']:<22} {row['grade']:<7} "
                  f"{row['total_return']:>10,.0f}  "
                  f"{row['total_return_pct']:>7.1f}%  "
                  f"{row['annual_return_pct']:>7.1f}%  "
                  f"{row['sharpe_ratio']:>6.2f}  "
                  f"{row['max_drawdown_pct']:>6.1f}%  "
                  f"{row['win_rate']:>7.1f}%  "
                  f"{row['profit_factor']:>4.2f}  "
                  f"{row['expectancy']:>6.2f}  "
                  f"{int(row['num_trades']):>6}  "
                  f"{row['t_statistic']:>7.2f}  "
                  f"{row['base_accuracy']:>5.1f}%")
        
        print(f"{'='*100}")
    
    return results_df

def print_fold_summary(data_dir, model_name, strategy_type, output_dir):
    file_path = os.path.join(data_dir, f"{model_name}_all_folds.csv")
    if not os.path.exists(file_path):
        return None
    
    df = pd.read_csv(file_path)
    df['date'] = pd.to_datetime(df['date'])
    df = df.sort_values('date').reset_index(drop=True)
    
    if 'fold' not in df.columns:
        return None
    
    print(f"{'-'*80}")
    print(f"FOLD ANALYSIS: {model_name} | STRATEGY: {strategy_type.upper()}")
    print(f"{'-'*80}")
    
    fold_results = []
    
    for fold in sorted(df['fold'].unique()):
        fold_data = df[df['fold'] == fold].copy()
        
        backtester = EnhancedCryptoBacktester(
            initial_capital=10000000,  
            trading_fee=0.0005,         
            slippage=0.0015,          
            min_order_size=5000,        
            risk_free_rate=0.0,
            min_trades_threshold=30
        )

        equity_curve, trades, daily_returns = backtester.backtest_strategy(
            fold_data, confidence_threshold=0.1, strategy_type=strategy_type
        )
        
        metrics = backtester.calculate_performance_metrics(equity_curve, trades, daily_returns)
        
        status = 'VERY_SIG' if metrics['is_very_significant'] else 'SIG' if metrics['is_significant'] else 'NOT_SIG'
        
        print(f"Fold {fold} | {fold_data['date'].min().date()} to {fold_data['date'].max().date()}")
        print(f"  {len(fold_data)} days | {int(metrics['num_trades'])} trades | "
              f"₩{metrics['total_return']:>8,.0f} ({metrics['total_return_pct']:>5.1f}%) | "
              f"Sharpe {metrics['sharpe_ratio']:>5.2f} | Win {metrics['win_rate']:>5.1f}% | "
              f"{status} (t={metrics['t_statistic']:.2f})")
        
        fold_result = {
            'fold': fold,
            'date_start': fold_data['date'].min(),
            'date_end': fold_data['date'].max(),
            'num_days': len(fold_data),
            'accuracy': fold_data['correct'].mean() * 100,
            **metrics
        }
        fold_results.append(fold_result)
    
    if len(fold_results) > 1:
        returns = np.array([m['annual_return_pct'] for m in fold_results])
        mean_return = np.mean(returns)
        std_return = np.std(returns)
        consistency = (1 / (1 + abs(std_return / mean_return) if mean_return != 0 else 1)) * 100
        
        print(f"{'-'*80}")
        print(f"Walk-Forward Efficiency: {consistency:.1f}% | Avg Annual Return: {mean_return:.1f}% (±{std_return:.1f}%)")
        print(f"{'-'*80}")
    
    return fold_results

# ==================== 실행 ====================
result = run_comprehensive_backtest(data_dir, output_dir)
if result is not None:
    results_df, used_output_dir = result
    print(f"\nAll results saved to: {used_output_dir}")
    print_summary_dashboard(results_df, used_output_dir)
    
    # 새로운 3개 전략 추가됨
    for strategy_type in ['kelly', 'aggressive', 'neutral', 'conservative', 'ultra_aggressive', 
                          'fixed_50', 'fixed_01', 'volatility_scaled', 'inverse_volatility', 
                          'trend_following', 'momentum', 'reinforcement_learning',
                          'regime_adaptive_balanced', 'regime_adaptive_defensive', 'regime_adaptive_aggressive']:
        top_models = results_df[
            (results_df['confidence_threshold'] == 0.1) & 
            (results_df['strategy_type'] == strategy_type)
        ].nlargest(3, 'total_return')
        
        print(f"{'='*100}")
        print(f"TOP 3 MODELS FOLD-BY-FOLD ANALYSIS ({strategy_type.upper()})")
        print(f"{'='*100}")
        
        for idx, row in top_models.iterrows():
            print_fold_summary(data_dir, row['model'], strategy_type, used_output_dir)


Data directory: ../model_results/2025-10-24/predictions/direction_walk_forward
Output directory: ../model_results/2025-10-24/backtest_results
Processing 26 models...
Confidence thresholds: [0.0, 0.05, 0.1, 0.15, 0.2]
Strategy types: ['kelly', 'aggressive', 'neutral', 'conservative', 'ultra_aggressive', 'fixed_50', 'fixed_01', 'volatility_scaled', 'inverse_volatility', 'trend_following', 'momentum', 'reinforcement_learning', 'regime_adaptive_balanced', 'regime_adaptive_defensive', 'regime_adaptive_aggressive']
Results saved: ../model_results/2025-10-24/backtest_results/backtest_results.csv

All results saved to: ../model_results/2025-10-24/backtest_results
STRATEGY: KELLY
Total Models: 26 | Significant: 14
Rank   Model                  Grade   Profit(원)     Return%   Annual%   Sharpe   MaxDD%   WinRate   PF     Exp      Trades   t-stat   Acc%   
--------------------------------------------------------------------------------------------------------------------------------------------
1 

Fold 7 | 2025-01-31 to 2025-10-19
  262 days | 154 trades | ₩3,399,446 ( 34.0%) | Sharpe  4.70 | Win  57.8% | VERY_SIG (t=3.68)
--------------------------------------------------------------------------------
Walk-Forward Efficiency: 54.6% | Avg Annual Return: 13.0% (±10.8%)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
FOLD ANALYSIS: LSTM | STRATEGY: KELLY
--------------------------------------------------------------------------------
Fold 1 | 2022-10-20 to 2022-12-18
  60 days | 43 trades | ₩ 559,494 (  5.6%) | Sharpe  3.74 | Win  53.5% | NOT_SIG (t=1.54)
Fold 2 | 2023-02-17 to 2023-04-17
  60 days | 9 trades | ₩ 108,511 (  1.1%) | Sharpe  4.97 | Win  55.6% | NOT_SIG (t=nan)
Fold 3 | 2023-06-17 to 2023-08-15
  60 days | 27 trades | ₩ -39,151 ( -0.4%) | Sharpe -1.47 | Win  25.9% | NOT_SIG (t=nan)
Fold 4 | 2023-10-15 to 2023-12-13
  60 days | 19 trades | ₩ 108,354 (  1.1

Fold 2 | 2023-01-18 to 2023-04-17
  90 days | 52 trades | ₩ 311,687 (  3.1%) | Sharpe  3.72 | Win  53.8% | NOT_SIG (t=1.69)
Fold 3 | 2023-05-18 to 2023-08-15
  90 days | 48 trades | ₩ 130,975 (  1.3%) | Sharpe  3.07 | Win  54.2% | NOT_SIG (t=1.34)
Fold 4 | 2023-09-15 to 2023-12-13
  90 days | 55 trades | ₩ 179,249 (  1.8%) | Sharpe  2.09 | Win  47.3% | NOT_SIG (t=0.97)
Fold 5 | 2024-01-13 to 2024-04-11
  90 days | 71 trades | ₩ 605,486 (  6.1%) | Sharpe  4.24 | Win  57.7% | SIG (t=2.25)
Fold 6 | 2024-05-12 to 2024-08-09
  90 days | 74 trades | ₩ 609,564 (  6.1%) | Sharpe  2.86 | Win  58.1% | NOT_SIG (t=1.55)
Fold 7 | 2025-01-01 to 2025-10-19
  292 days | 211 trades | ₩2,473,176 ( 24.7%) | Sharpe  3.83 | Win  55.0% | VERY_SIG (t=3.51)
--------------------------------------------------------------------------------
Walk-Forward Efficiency: 62.2% | Avg Annual Return: 11.3% (±6.9%)
--------------------------------------------------------------------------------
----------------------------

Fold 7 | 2025-01-01 to 2025-10-19
  292 days | 241 trades | ₩22,546,590 (225.5%) | Sharpe  2.77 | Win  52.7% | SIG (t=2.71)
--------------------------------------------------------------------------------
Walk-Forward Efficiency: 83.8% | Avg Annual Return: 144.8% (±28.0%)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
FOLD ANALYSIS: Bagging | STRATEGY: ULTRA_AGGRESSIVE
--------------------------------------------------------------------------------
Fold 1 | 2022-09-20 to 2022-12-18
  90 days | 47 trades | ₩4,921,402 ( 49.2%) | Sharpe  5.22 | Win  68.1% | SIG (t=2.25)
Fold 2 | 2023-01-18 to 2023-04-17
  90 days | 64 trades | ₩1,971,088 ( 19.7%) | Sharpe  2.27 | Win  53.1% | NOT_SIG (t=1.14)
Fold 3 | 2023-05-18 to 2023-08-15
  90 days | 70 trades | ₩2,847,408 ( 28.5%) | Sharpe  3.92 | Win  61.4% | SIG (t=2.07)
Fold 4 | 2023-09-15 to 2023-12-13
  90 days | 73 trades | ₩2,959,

Fold 7 | 2025-01-01 to 2025-10-19
  292 days | 241 trades | ₩ 162,832 (  1.6%) | Sharpe  2.77 | Win  52.7% | SIG (t=2.71)
--------------------------------------------------------------------------------
Walk-Forward Efficiency: 87.4% | Avg Annual Return: 1.2% (±0.2%)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
FOLD ANALYSIS: Bagging | STRATEGY: FIXED_01
--------------------------------------------------------------------------------
Fold 1 | 2022-09-20 to 2022-12-18
  90 days | 47 trades | ₩  52,372 (  0.5%) | Sharpe  5.22 | Win  68.1% | SIG (t=2.25)
Fold 2 | 2023-01-18 to 2023-04-17
  90 days | 64 trades | ₩  24,302 (  0.2%) | Sharpe  2.27 | Win  53.1% | NOT_SIG (t=1.14)
Fold 3 | 2023-05-18 to 2023-08-15
  90 days | 70 trades | ₩  32,376 (  0.3%) | Sharpe  3.92 | Win  61.4% | SIG (t=2.07)
Fold 4 | 2023-09-15 to 2023-12-13
  90 days | 73 trades | ₩  34,039 (  0.3%) | Sh

Fold 1 | 2022-09-20 to 2022-12-18
  90 days | 65 trades | ₩1,312,571 ( 13.1%) | Sharpe  2.33 | Win  61.5% | NOT_SIG (t=1.18)
Fold 2 | 2023-01-18 to 2023-04-17
  90 days | 67 trades | ₩ 192,444 (  1.9%) | Sharpe  0.61 | Win  50.7% | NOT_SIG (t=0.32)
Fold 3 | 2023-05-18 to 2023-08-15
  90 days | 1 trades | ₩-10,000,000 (-100.0%) | Sharpe  0.00 | Win   0.0% | NOT_SIG (t=nan)
Fold 4 | 2023-09-15 to 2023-12-13
  90 days | 1 trades | ₩-10,000,000 (-100.0%) | Sharpe  0.00 | Win   0.0% | NOT_SIG (t=nan)
Fold 5 | 2024-01-13 to 2024-04-11
  90 days | 1 trades | ₩-10,000,000 (-100.0%) | Sharpe  0.00 | Win   0.0% | NOT_SIG (t=nan)
Fold 6 | 2024-05-12 to 2024-08-09
  90 days | 1 trades | ₩-10,000,000 (-100.0%) | Sharpe  0.00 | Win   0.0% | NOT_SIG (t=nan)
Fold 7 | 2025-01-01 to 2025-10-19
  292 days | 1 trades | ₩-10,000,000 (-100.0%) | Sharpe  0.00 | Win   0.0% | NOT_SIG (t=nan)
--------------------------------------------------------------------------------
Walk-Forward Efficiency: 31.9% | Avg An

Fold 7 | 2025-01-01 to 2025-10-19
  292 days | 256 trades | ₩2,753,155 ( 27.5%) | Sharpe  2.27 | Win  52.7% | SIG (t=2.28)
--------------------------------------------------------------------------------
Walk-Forward Efficiency: 70.1% | Avg Annual Return: 17.2% (±7.4%)
--------------------------------------------------------------------------------
TOP 3 MODELS FOLD-BY-FOLD ANALYSIS (REINFORCEMENT_LEARNING)
--------------------------------------------------------------------------------
FOLD ANALYSIS: LogisticRegression | STRATEGY: REINFORCEMENT_LEARNING
--------------------------------------------------------------------------------
Fold 1 | 2022-09-20 to 2022-12-18
  90 days | 39 trades | ₩ 402,286 (  4.0%) | Sharpe  4.04 | Win  48.7% | NOT_SIG (t=1.59)
Fold 2 | 2023-01-18 to 2023-04-17
  90 days | 52 trades | ₩1,039,495 ( 10.4%) | Sharpe  4.45 | Win  53.8% | SIG (t=2.02)
Fold 3 | 2023-05-18 to 2023-08-15
  90 days | 48 trades | ₩ 158,179 (  1.6%) | Sharpe  1.94 | Win  54.2% | NOT_SI

Fold 1 | 2022-09-20 to 2022-12-18
  90 days | 48 trades | ₩ 160,025 (  1.6%) | Sharpe  4.41 | Win  68.8% | NOT_SIG (t=1.93)
Fold 2 | 2023-01-18 to 2023-04-17
  90 days | 66 trades | ₩ 881,350 (  8.8%) | Sharpe  3.44 | Win  56.1% | NOT_SIG (t=1.76)
Fold 3 | 2023-05-18 to 2023-08-15
  90 days | 68 trades | ₩ 345,814 (  3.5%) | Sharpe  4.05 | Win  54.4% | SIG (t=2.10)
Fold 4 | 2023-09-15 to 2023-12-13
  90 days | 70 trades | ₩ 746,552 (  7.5%) | Sharpe  2.93 | Win  48.6% | NOT_SIG (t=1.54)
Fold 5 | 2024-01-13 to 2024-04-11
  90 days | 83 trades | ₩1,555,419 ( 15.6%) | Sharpe  3.23 | Win  54.2% | NOT_SIG (t=1.85)
Fold 6 | 2024-05-12 to 2024-08-09
  90 days | 82 trades | ₩ 300,906 (  3.0%) | Sharpe  2.85 | Win  57.3% | NOT_SIG (t=1.62)
Fold 7 | 2025-01-01 to 2025-10-19
  292 days | 256 trades | ₩2,412,623 ( 24.1%) | Sharpe  3.09 | Win  52.7% | VERY_SIG (t=3.11)
--------------------------------------------------------------------------------
Walk-Forward Efficiency: 58.9% | Avg Annual Return

In [6]:
import pandas as pd
import numpy as np
import os
from scipy import stats
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

timestamp = datetime.now().strftime("%Y-%m-%d")
RESULT_DIR = os.path.join("../model_results", timestamp)
data_dir = os.path.join(RESULT_DIR, "predictions", "direction_walk_forward")
output_dir = os.path.join(RESULT_DIR, "backtest_results")

def validate_dataframe(df, model_name):
    required_cols = ['date', 'pred_direction', 'actual_return', 'correct', 'confidence', 'max_proba']
    missing = [col for col in required_cols if col not in df.columns]
    if missing:
        print(f"{model_name}: Missing {missing}")
        return False
    if len(df) < 10 or df['actual_return'].isna().all() or df['pred_direction'].isna().all():
        print(f"{model_name}: Insufficient data")
        return False
    return True

class AdaptivePositionSizer:
    def __init__(self):
        self.q_table = {}
        self.actions = [0.01, 0.05, 0.1, 0.15, 0.2, 0.3, 0.5]
        self.lr = 0.15
        self.epsilon = 0.1
        self.gamma = 0.95
        
    def get_state(self, volatility, trend, streak, drawdown):
        vol_level = 'high' if volatility > 0.025 else 'med' if volatility > 0.015 else 'low'
        trend_level = 'bull' if trend > 0.01 else 'bear' if trend < -0.01 else 'flat'
        streak_level = 'hot' if streak >= 3 else 'cold' if streak <= -3 else 'neutral'
        dd_level = 'severe' if drawdown < -0.15 else 'moderate' if drawdown < -0.08 else 'safe'
        return (vol_level, trend_level, streak_level, dd_level)
    
    def choose_action(self, state, confidence):
        if state not in self.q_table:
            self.q_table[state] = {a: 0.0 for a in self.actions}
        
        if np.random.random() < self.epsilon:
            action = np.random.choice(self.actions)
        else:
            action = max(self.q_table[state], key=self.q_table[state].get)
        
        conf_mult = 1.0 + (confidence - 0.5) * 0.4
        return np.clip(action * conf_mult, 0.01, 0.7)
    
    def update(self, state, action, reward, next_state):
        if state not in self.q_table:
            self.q_table[state] = {a: 0.0 for a in self.actions}
        if next_state not in self.q_table:
            self.q_table[next_state] = {a: 0.0 for a in self.actions}
        
        closest_action = min(self.actions, key=lambda x: abs(x - action))
        current_q = self.q_table[state][closest_action]
        max_next_q = max(self.q_table[next_state].values())
        self.q_table[state][closest_action] = current_q + self.lr * (reward + self.gamma * max_next_q - current_q)

class EnhancedBacktester:
    def __init__(self, initial_capital=10000000, fee=0.0005, slippage=0.0015, min_order=5000):
        self.initial_capital = float(initial_capital)
        self.fee = float(fee)
        self.slippage = float(slippage)
        self.min_order = min_order
        self.min_trades = 30

    def dynamic_strategy(self, df, confidence_threshold=0.1):
        df = df.copy().sort_values('date').reset_index(drop=True)
        n = len(df)
        
        capital = self.initial_capital
        equity_curve = np.zeros(n)
        trades = []
        
        streak = 0
        peak = capital
        recent_returns = []
        
        for i in range(n):
            if capital < self.min_order or df.iloc[i]['confidence'] < confidence_threshold:
                equity_curve[i] = capital
                continue
            
            recent_returns.append(df.iloc[i-1]['actual_return'] if i > 0 else 0)
            recent_returns = recent_returns[-30:]
            
            volatility = np.std(recent_returns[-20:]) if len(recent_returns) >= 20 else 0.02
            trend = np.mean(recent_returns[-10:]) if len(recent_returns) >= 10 else 0
            drawdown = (capital - peak) / peak if peak > 0 else 0
            confidence = df.iloc[i]['confidence']
            
            base_fraction = 0.15
            
            if volatility < 0.015:
                vol_mult = 1.5
            elif volatility < 0.025:
                vol_mult = 1.0
            else:
                vol_mult = 0.5
            
            if trend > 0.01:
                trend_mult = 1.3
            elif trend < -0.01:
                trend_mult = 0.6
            else:
                trend_mult = 1.0
            
            if streak >= 3:
                streak_mult = 1.2
            elif streak <= -3:
                streak_mult = 0.5
            else:
                streak_mult = 1.0
            
            if drawdown < -0.15:
                dd_mult = 0.3
            elif drawdown < -0.08:
                dd_mult = 0.6
            else:
                dd_mult = 1.0
            
            conf_mult = 0.7 + (confidence * 1.5)
            
            position_fraction = base_fraction * vol_mult * trend_mult * streak_mult * dd_mult * conf_mult
            position_fraction = np.clip(position_fraction, 0.01, 0.6)
            
            position_size = capital * position_fraction
            
            if position_size < self.min_order:
                equity_curve[i] = capital
                continue
            
            cost_rate = (self.fee + self.slippage * (1.5 if volatility > 0.025 else 1.0)) * 2
            trading_cost = position_size * cost_rate
            
            direction = int(df.iloc[i]['pred_direction'])
            ret = float(df.iloc[i]['actual_return'])
            
            gross_profit = position_size * (ret if direction == 1 else -ret)
            net_profit = gross_profit - trading_cost
            
            prev_capital = capital
            capital = max(0, capital + net_profit)
            peak = max(peak, capital)
            
            equity_curve[i] = capital
            
            if net_profit > 0:
                streak = max(0, streak) + 1
            else:
                streak = min(0, streak) - 1
            
            trades.append({
                'date': df.iloc[i]['date'],
                'position_fraction': position_fraction,
                'net_profit': net_profit,
                'capital': capital
            })
            
            if capital <= 0:
                equity_curve[i+1:] = 0
                break
        
        daily_returns = np.diff(equity_curve) / equity_curve[:-1]
        daily_returns = np.concatenate([[0], daily_returns])
        
        return equity_curve, trades, daily_returns

    def adaptive_rl_strategy(self, df, confidence_threshold=0.1):
        df = df.copy().sort_values('date').reset_index(drop=True)
        n = len(df)
        
        rl_agent = AdaptivePositionSizer()
        capital = self.initial_capital
        equity_curve = np.zeros(n)
        trades = []
        
        streak = 0
        peak = capital
        recent_returns = []
        
        for i in range(n):
            if capital < self.min_order or df.iloc[i]['confidence'] < confidence_threshold:
                equity_curve[i] = capital
                continue
            
            recent_returns.append(df.iloc[i-1]['actual_return'] if i > 0 else 0)
            recent_returns = recent_returns[-30:]
            
            volatility = np.std(recent_returns[-20:]) if len(recent_returns) >= 20 else 0.02
            trend = np.mean(recent_returns[-10:]) if len(recent_returns) >= 10 else 0
            drawdown = (capital - peak) / peak if peak > 0 else 0
            confidence = df.iloc[i]['confidence']
            
            state = rl_agent.get_state(volatility, trend, streak, drawdown)
            position_fraction = rl_agent.choose_action(state, confidence)
            position_size = capital * position_fraction
            
            if position_size < self.min_order:
                equity_curve[i] = capital
                continue
            
            cost_rate = (self.fee + self.slippage * (1.5 if volatility > 0.025 else 1.0)) * 2
            trading_cost = position_size * cost_rate
            
            direction = int(df.iloc[i]['pred_direction'])
            ret = float(df.iloc[i]['actual_return'])
            
            gross_profit = position_size * (ret if direction == 1 else -ret)
            net_profit = gross_profit - trading_cost
            
            prev_capital = capital
            capital = max(0, capital + net_profit)
            peak = max(peak, capital)
            
            equity_curve[i] = capital
            
            reward = net_profit / position_size if position_size > 0 else 0
            
            next_volatility = np.std(recent_returns[-20:]) if len(recent_returns) >= 20 else 0.02
            next_trend = np.mean(recent_returns[-10:]) if len(recent_returns) >= 10 else 0
            next_drawdown = (capital - peak) / peak if peak > 0 else 0
            next_state = rl_agent.get_state(next_volatility, next_trend, streak, next_drawdown)
            
            rl_agent.update(state, position_fraction, reward, next_state)
            
            if net_profit > 0:
                streak = max(0, streak) + 1
            else:
                streak = min(0, streak) - 1
            
            trades.append({
                'date': df.iloc[i]['date'],
                'position_fraction': position_fraction,
                'net_profit': net_profit,
                'capital': capital
            })
            
            if capital <= 0:
                equity_curve[i+1:] = 0
                break
        
        daily_returns = np.diff(equity_curve) / equity_curve[:-1]
        daily_returns = np.concatenate([[0], daily_returns])
        
        return equity_curve, trades, daily_returns

    def calculate_metrics(self, equity_curve, trades, daily_returns):
        if len(trades) == 0:
            return self._empty_metrics()
        
        final_capital = equity_curve[-1]
        total_return = final_capital - self.initial_capital
        total_return_pct = (total_return / self.initial_capital) * 100
        
        profits = np.array([t['net_profit'] for t in trades])
        num_trades = len(trades)
        num_wins = np.sum(profits > 0)
        win_rate = (num_wins / num_trades * 100) if num_trades > 0 else 0
        
        running_max = np.maximum.accumulate(equity_curve)
        drawdown = (equity_curve - running_max) / running_max * 100
        max_dd = np.abs(np.min(drawdown))
        
        returns = daily_returns[daily_returns != 0]
        
        if len(returns) > 1 and np.std(returns) > 0:
            annual_return = np.mean(returns) * 252
            annual_std = np.std(returns, ddof=1) * np.sqrt(252)
            sharpe = annual_return / annual_std if annual_std > 0 else 0
            
            downside = returns[returns < 0]
            if len(downside) > 1:
                downside_std = np.std(downside, ddof=1) * np.sqrt(252)
                sortino = annual_return / downside_std if downside_std > 0 else 0
            else:
                sortino = 0
        else:
            sharpe = sortino = 0
        
        years = len(equity_curve) / 252
        annual_return_pct = (np.power(final_capital / self.initial_capital, 1/years) - 1) * 100 if years > 0 else 0
        
        if len(returns) >= self.min_trades:
            t_stat, p_value = stats.ttest_1samp(returns, 0)
            is_sig = abs(t_stat) > 2.0 and p_value < 0.05
            sufficient = True
        else:
            t_stat = p_value = np.nan
            is_sig = sufficient = False
        
        return {
            'final_capital': final_capital,
            'total_return': total_return,
            'total_return_pct': total_return_pct,
            'annual_return_pct': annual_return_pct,
            'num_trades': num_trades,
            'win_rate': win_rate,
            'max_dd': max_dd,
            'sharpe': sharpe,
            'sortino': sortino,
            'calmar': annual_return_pct / max_dd if max_dd > 0 else 0,
            't_stat': t_stat,
            'p_value': p_value,
            'is_sig': is_sig,
            'sufficient': sufficient
        }
    
    def _empty_metrics(self):
        return {
            'final_capital': self.initial_capital, 'total_return': 0, 'total_return_pct': 0,
            'annual_return_pct': 0, 'num_trades': 0, 'win_rate': 0, 'max_dd': 0,
            'sharpe': 0, 'sortino': 0, 'calmar': 0, 't_stat': np.nan, 'p_value': np.nan,
            'is_sig': False, 'sufficient': False
        }

def calculate_composite_score(row):
    if not row['sufficient'] or not row['is_sig']:
        return -999
    
    sharpe_norm = np.clip(row['sharpe'] / 5.0, 0, 1)
    return_norm = np.clip(row['annual_return_pct'] / 50, 0, 1)
    dd_norm = np.clip(1 - (row['max_dd'] / 50), 0, 1)
    t_norm = np.clip(abs(row['t_stat']) / 10, 0, 1)
    
    score = (sharpe_norm * 0.35 + return_norm * 0.30 + dd_norm * 0.20 + t_norm * 0.15) * 100
    return score

def run_backtest(data_dir, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    
    csv_files = [f for f in os.listdir(data_dir) if f.endswith('_all_folds.csv')]
    
    strategies = ['dynamic', 'adaptive_rl']
    thresholds = [0.0, 0.05, 0.1, 0.15]
    
    all_results = []
    
    for csv_file in sorted(csv_files):
        model_name = csv_file.replace('_all_folds.csv', '')
        
        try:
            df = pd.read_csv(os.path.join(data_dir, csv_file))
            
            if not validate_dataframe(df, model_name):
                continue
            
            df['date'] = pd.to_datetime(df['date'])
            
            for strategy in strategies:
                for threshold in thresholds:
                    backtester = EnhancedBacktester()
                    
                    if strategy == 'dynamic':
                        equity, trades, returns = backtester.dynamic_strategy(df, threshold)
                    else:
                        equity, trades, returns = backtester.adaptive_rl_strategy(df, threshold)
                    
                    metrics = backtester.calculate_metrics(equity, trades, returns)
                    
                    result = {
                        'model': model_name,
                        'strategy': strategy,
                        'threshold': threshold,
                        **metrics
                    }
                    all_results.append(result)
        
        except Exception as e:
            print(f"{model_name} failed: {e}")
            continue
    
    results_df = pd.DataFrame(all_results)
    results_df['score'] = results_df.apply(calculate_composite_score, axis=1)
    
    results_file = os.path.join(output_dir, 'backtest_results.csv')
    results_df.to_csv(results_file, index=False)
    
    return results_df

def print_top_models(results_df):
    print("\n" + "="*120)
    print("TOP MODELS BY STRATEGY")
    print("="*120)
    
    for strategy in results_df['strategy'].unique():
        strategy_data = results_df[
            (results_df['strategy'] == strategy) & 
            (results_df['threshold'] == 0.1) &
            (results_df['sufficient'] == True) &
            (results_df['is_sig'] == True)
        ].copy()
        
        if len(strategy_data) == 0:
            continue
        
        top1 = strategy_data.nlargest(1, 'score')
        
        print(f"\nSTRATEGY: {strategy.upper()}")
        print("-"*120)
        
        for _, row in top1.iterrows():
            print(f"Model: {row['model']}")
            print(f"  Profit: ₩{row['total_return']:,.0f} ({row['total_return_pct']:.1f}%) | "
                  f"Annual: {row['annual_return_pct']:.1f}% | "
                  f"Sharpe: {row['sharpe']:.2f} | "
                  f"Sortino: {row['sortino']:.2f}")
            print(f"  MaxDD: {row['max_dd']:.1f}% | "
                  f"Calmar: {row['calmar']:.2f} | "
                  f"WinRate: {row['win_rate']:.1f}% | "
                  f"Trades: {int(row['num_trades'])}")
            print(f"  t-stat: {row['t_stat']:.2f} | "
                  f"p-value: {row['p_value']:.4f} | "
                  f"Score: {row['score']:.1f}")

result_df = run_backtest(data_dir, output_dir)
print_top_models(result_df)


TOP MODELS BY STRATEGY

STRATEGY: DYNAMIC
------------------------------------------------------------------------------------------------------------------------
Model: DTW_LSTM
  Profit: ₩4,106,035 (41.1%) | Annual: 15.0% | Sharpe: 4.57 | Sortino: 12.12
  MaxDD: 1.2% | Calmar: 12.41 | WinRate: 56.1% | Trades: 314
  t-stat: 5.09 | p-value: 0.0000 | Score: 68.1

STRATEGY: ADAPTIVE_RL
------------------------------------------------------------------------------------------------------------------------
Model: DTW_LSTM
  Profit: ₩6,327,741 (63.3%) | Annual: 22.0% | Sharpe: 4.23 | Sortino: 9.04
  MaxDD: 2.4% | Calmar: 9.30 | WinRate: 56.1% | Trades: 314
  t-stat: 4.72 | p-value: 0.0000 | Score: 68.9


In [12]:
import pandas as pd
import numpy as np
import os
from scipy import stats
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

timestamp = datetime.now().strftime("%Y-%m-%d")
RESULT_DIR = os.path.join("../model_results", timestamp)
data_dir = os.path.join(RESULT_DIR, "predictions", "direction_walk_forward")
output_dir = os.path.join(RESULT_DIR, "backtest_results")

def validate_dataframe(df, model_name):
    required_cols = ['date', 'pred_direction', 'actual_return', 'correct', 'confidence', 'max_proba']
    missing = [col for col in required_cols if col not in df.columns]
    if missing:
        return False
    if len(df) < 10 or df['actual_return'].isna().all():
        return False
    return True

class ReinforcementLearningPositionSizer:
    def __init__(self, learning_rate=0.1, epsilon=0.2, gamma=0.9):
        self.lr = learning_rate
        self.epsilon = epsilon
        self.gamma = gamma
        self.q_table = {}
        self.actions = [0.01, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3]
        
    def get_state(self, recent_returns, consecutive_wins, consecutive_losses, capital_ratio):
        recent_trend = 'up' if np.mean(recent_returns[-5:]) > 0 else 'down'
        momentum = 'strong' if consecutive_wins >= 3 else 'weak' if consecutive_losses >= 3 else 'neutral'
        capital_state = 'high' if capital_ratio > 1.2 else 'low' if capital_ratio < 0.9 else 'normal'
        return (recent_trend, momentum, capital_state)
    
    def choose_action(self, state):
        if state not in self.q_table:
            self.q_table[state] = {a: 0.0 for a in self.actions}
        if np.random.random() < self.epsilon:
            return np.random.choice(self.actions)
        return max(self.q_table[state], key=self.q_table[state].get)
    
    def update_q_value(self, state, action, reward, next_state):
        if state not in self.q_table:
            self.q_table[state] = {a: 0.0 for a in self.actions}
        if next_state not in self.q_table:
            self.q_table[next_state] = {a: 0.0 for a in self.actions}
        
        current_q = self.q_table[state][action]
        max_next_q = max(self.q_table[next_state].values())
        new_q = current_q + self.lr * (reward + self.gamma * max_next_q - current_q)
        self.q_table[state][action] = new_q

class EnhancedCryptoBacktester:
    def __init__(self, initial_capital=10000000, trading_fee=0.0005, slippage=0.0015, 
                 risk_free_rate=0.0, min_trades_threshold=30, min_order_size=5000):
        self.initial_capital = float(initial_capital)
        self.trading_fee = float(trading_fee)
        self.base_slippage = float(slippage)
        self.risk_free_rate = float(risk_free_rate)
        self.min_trades = min_trades_threshold
        self.min_order = min_order_size
        self.total_cost_rate = (self.trading_fee + self.base_slippage) * 2

    def detect_regime_realtime(self, df, i, lookback=30):
        """실시간 Regime 감지 (NO CHEATING!)"""
        if i < lookback:
            return "sideways", 0.15, 0.5
        
        window = df.iloc[i-lookback:i]
        returns = window['actual_return'].values
        
        cum_return = np.sum(returns)
        volatility = np.std(returns)
        
        if cum_return > 0.20 and volatility > 0.03:
            return "bull_strong", 0.40, min(cum_return / 0.3, 1.0)
        elif cum_return > 0.10 and volatility < 0.02:
            return "bull_steady", 0.30, min(cum_return / 0.2, 1.0)
        elif cum_return < -0.15 and volatility > 0.03:
            return "bear_crash", 0.05, min(abs(cum_return) / 0.3, 1.0)
        elif cum_return < -0.08:
            return "bear_volatile", 0.08, min(abs(cum_return) / 0.15, 1.0)
        else:
            return "sideways", 0.15, 1.0 - min(abs(cum_return) / 0.1, 1.0)

    def calculate_kelly_fraction(self, profits):
        if len(profits) < 10:
            return 0.05
        profits = np.array(profits)
        wins = profits[profits > 0]
        losses = profits[profits < 0]
        if len(wins) == 0 or len(losses) == 0 or np.mean(losses) == 0:
            return 0.05
        win_rate = len(wins) / len(profits)
        avg_win = np.mean(wins)
        avg_loss = np.abs(np.mean(losses))
        win_loss_ratio = avg_win / avg_loss
        kelly = (win_rate * win_loss_ratio - (1 - win_rate)) / win_loss_ratio
        return np.clip(kelly * 0.5, 0.01, 0.2)

    def calculate_regime_adaptive_fraction(self, df, i, strategy_subtype='balanced'):
        """실시간 Regime 감지 버전 """
        regime, base_position, regime_conf = self.detect_regime_realtime(df, i, 30)
        confidence = float(df.iloc[i]['confidence']) if i < len(df) else 0.5
        
        if strategy_subtype == 'balanced':
            regime_fractions = {
                "bear_crash": 0.05, "bear_volatile": 0.08, "sideways": 0.15,
                "bull_steady": 0.30, "bull_strong": 0.40
            }
        elif strategy_subtype == 'defensive':
            regime_fractions = {
                "bear_crash": 0.03, "bear_volatile": 0.05, "sideways": 0.10,
                "bull_steady": 0.20, "bull_strong": 0.25
            }
        else:  # aggressive
            regime_fractions = {
                "bear_crash": 0.10, "bear_volatile": 0.15, "sideways": 0.25,
                "bull_steady": 0.50, "bull_strong": 0.70
            }
        
        base_fraction = regime_fractions.get(regime, 0.15)
        multiplier = 1.3 if confidence > 0.20 else 1.1 if confidence > 0.15 else 0.8
        
        if i >= 10:
            recent_returns = df.iloc[max(0, i-10):i]['actual_return'].values
            if np.mean(recent_returns) < -0.01:
                multiplier *= 0.8
        
        final_fraction = base_fraction * multiplier * regime_conf
        
        if regime in ["bear_crash", "bear_volatile"]:
            return np.clip(final_fraction, 0.01, 0.15)
        else:
            return np.clip(final_fraction, 0.01, 1.0)

    def calculate_position_fractions_vectorized(self, df, strategy_type, kelly_lookback=50, 
                                                vol_lookback=20, mom_lookback=10):
        n = len(df)
        fractions = np.zeros(n)

        if strategy_type == 'aggressive':
            fractions[:] = 0.3
        elif strategy_type == 'neutral':
            fractions = np.clip(df['max_proba'].values, 0, 1.0) * 0.15
        elif strategy_type == 'conservative':
            fractions[:] = 0.05
        elif strategy_type == 'ultra_aggressive':
            fractions[:] = 0.8
        elif strategy_type == 'fixed_50':
            fractions[:] = 0.5
        elif strategy_type == 'fixed_01':
            fractions[:] = 0.01
        elif strategy_type in ['volatility_scaled', 'inverse_volatility']:
            returns = df['actual_return'].values
            vol = pd.Series(returns).rolling(vol_lookback, min_periods=1).std().values
            vol = np.where(vol == 0, 0.01, vol)
            if strategy_type == 'volatility_scaled':
                fractions = np.clip(0.1 / (vol + 1e-6), 0.01, 0.3)
            else:
                fractions = np.clip(0.2 / (vol + 1e-6), 0.01, 0.3)
        elif strategy_type in ['trend_following', 'momentum']:
            returns = df['actual_return'].values
            rolling_sum = pd.Series(returns).rolling(mom_lookback, min_periods=1).sum().values
            if strategy_type == 'trend_following':
                fractions = np.where(rolling_sum > 0, 0.3, np.where(rolling_sum < 0, 0.05, 0.1))
            else:
                fractions = np.clip(0.1 + 0.2 * (rolling_sum / (mom_lookback * 0.01)), 0.01, 0.3)
        elif strategy_type == 'reinforcement_learning':
            fractions[:] = 0.1
        elif strategy_type == 'regime_adaptive_balanced':
            fractions = np.array([self.calculate_regime_adaptive_fraction(df, i, 'balanced') for i in range(n)])
        elif strategy_type == 'regime_adaptive_defensive':
            fractions = np.array([self.calculate_regime_adaptive_fraction(df, i, 'defensive') for i in range(n)])
        elif strategy_type == 'regime_adaptive_aggressive':
            fractions = np.array([self.calculate_regime_adaptive_fraction(df, i, 'aggressive') for i in range(n)])
        elif strategy_type == 'kelly':
            fractions[:] = 0.1
        else:
            fractions[:] = 0.1

        return np.clip(fractions, 0.01, 1.0)

    def backtest_strategy(self, df, confidence_threshold=0.0, strategy_type='kelly', 
                         kelly_lookback=50, vol_lookback=20, mom_lookback=10):
        df = df.copy().sort_values('date').reset_index(drop=True)
        n = len(df)

        valid_mask = (df['confidence'].values >= confidence_threshold)
        pred_direction = df['pred_direction'].values.astype(int)
        actual_return = df['actual_return'].values.astype(float)

        if strategy_type == 'reinforcement_learning':
            rl_agent = ReinforcementLearningPositionSizer(
                learning_rate=0.1, epsilon=0.15, gamma=0.9
            )
            use_rl = True
        else:
            position_fractions = self.calculate_position_fractions_vectorized(
                df, strategy_type, kelly_lookback, vol_lookback, mom_lookback
            )
            use_rl = False

        capital = self.initial_capital
        equity_curve = np.zeros(n)
        daily_returns = np.zeros(n)
        trades = []
        recent_profits = []
        consecutive_wins = 0
        consecutive_losses = 0

        for i in range(n):
            if capital < self.min_order:
                equity_curve[i] = capital
                continue
                
            if not valid_mask[i]:
                equity_curve[i] = capital
                continue

            if use_rl:
                recent_returns = actual_return[max(0, i-10):i] if i > 0 else [0]
                capital_ratio = capital / self.initial_capital
                state = rl_agent.get_state(recent_returns, consecutive_wins, consecutive_losses, capital_ratio)
                position_fraction = rl_agent.choose_action(state)
            elif strategy_type == 'kelly' and len(recent_profits) >= 20:
                position_fraction = self.calculate_kelly_fraction(recent_profits[-kelly_lookback:])
            else:
                position_fraction = position_fractions[i]

            position_size = capital * position_fraction
            
            if position_size < self.min_order:
                equity_curve[i] = capital
                continue

            if i >= 20:
                recent_returns_vol = actual_return[i-20:i]
                volatility = np.std(recent_returns_vol)
                if volatility > 0.03:
                    dynamic_slippage = self.base_slippage * 2
                elif volatility > 0.02:
                    dynamic_slippage = self.base_slippage * 1.5
                else:
                    dynamic_slippage = self.base_slippage
                trading_cost = position_size * ((self.trading_fee + dynamic_slippage) * 2)
            else:
                trading_cost = position_size * self.total_cost_rate

            direction = pred_direction[i]
            ret = actual_return[i]

            gross_return = ret if direction == 1 else -ret
            gross_profit = position_size * gross_return
            net_profit = gross_profit - trading_cost

            prev_capital = capital
            capital = max(0, capital + net_profit)

            equity_curve[i] = capital
            daily_returns[i] = (capital - prev_capital) / prev_capital if prev_capital > 0 else 0

            if use_rl and i > 0:
                reward = net_profit / position_size if position_size > 0 else 0
                next_recent_returns = actual_return[max(0, i-9):i+1]
                next_capital_ratio = capital / self.initial_capital
                next_state = rl_agent.get_state(next_recent_returns, consecutive_wins, consecutive_losses, next_capital_ratio)
                rl_agent.update_q_value(state, position_fraction, reward, next_state)

            recent_profits.append(net_profit)

            if net_profit > 0:
                consecutive_wins += 1
                consecutive_losses = 0
            else:
                consecutive_losses += 1
                consecutive_wins = 0

            trades.append({
                'date': df.iloc[i]['date'],
                'pred_direction': direction,
                'actual_return': ret,
                'position_fraction': position_fraction,
                'position_size': position_size,
                'gross_profit': gross_profit,
                'trading_cost': trading_cost,
                'net_profit': net_profit,
                'capital': capital,
                'correct': bool(df.iloc[i]['correct']),
                'confidence': float(df.iloc[i]['confidence']),
                'max_proba': float(df.iloc[i]['max_proba'])
            })

            if capital <= 0:
                equity_curve[i+1:] = 0
                break

        return equity_curve, trades, daily_returns
    
    def calculate_statistical_significance(self, trades, daily_returns):
        if len(trades) < self.min_trades:
            return {
                'sufficient_trades': False,
                'num_trades': len(trades),
                't_statistic': np.nan,
                'p_value': np.nan,
                'is_significant': False,
                'is_very_significant': False,
                'confidence_95_lower': np.nan,
                'confidence_95_upper': np.nan,
                'warning': f'Insufficient trades: {len(trades)} < {self.min_trades}'
            }
        
        returns_array = np.array(daily_returns)
        returns_array = returns_array[returns_array != 0]
        
        if len(returns_array) < 2:
            return {
                'sufficient_trades': True,
                'num_trades': len(trades),
                't_statistic': np.nan,
                'p_value': np.nan,
                'is_significant': False,
                'is_very_significant': False,
                'confidence_95_lower': np.nan,
                'confidence_95_upper': np.nan,
                'warning': 'Insufficient return data'
            }
        
        t_stat, p_value = stats.ttest_1samp(returns_array, 0)
        mean_return = np.mean(returns_array)
        std_error = stats.sem(returns_array)
        confidence_interval = stats.t.interval(
            0.95, len(returns_array)-1, loc=mean_return, scale=std_error
        )
        
        is_very_significant = abs(t_stat) > 3.0 and p_value < 0.05
        is_significant = abs(t_stat) > 2.0 and p_value < 0.05
        
        return {
            'sufficient_trades': True,
            'num_trades': len(trades),
            't_statistic': float(t_stat),
            'p_value': float(p_value),
            'is_significant': is_significant,
            'is_very_significant': is_very_significant,
            'confidence_95_lower': float(confidence_interval[0]),
            'confidence_95_upper': float(confidence_interval[1]),
            'warning': None
        }

    def calculate_performance_metrics(self, equity_curve, trades, daily_returns):
        if len(trades) == 0:
            return self._empty_metrics()
        
        final_capital = equity_curve[-1] if len(equity_curve) > 0 else self.initial_capital
        total_return = final_capital - self.initial_capital
        total_return_pct = (total_return / self.initial_capital) * 100
        
        profits = np.array([t['net_profit'] for t in trades])
        winning_trades = profits[profits > 0]
        losing_trades = profits[profits < 0]
        
        num_trades = len(trades)
        num_wins = len(winning_trades)
        num_losses = len(losing_trades)
        win_rate = (num_wins / num_trades * 100) if num_trades > 0 else 0
        
        avg_profit_per_trade = np.mean(profits)
        total_wins = np.sum(winning_trades) if num_wins > 0 else 0
        total_losses = np.abs(np.sum(losing_trades)) if num_losses > 0 else 0
        profit_factor = (total_wins / total_losses) if total_losses > 0 else 0
        
        avg_win = np.mean(winning_trades) if num_wins > 0 else 0
        avg_loss = np.abs(np.mean(losing_trades)) if num_losses > 0 else 0
        
        if avg_loss > 0:
            expectancy = (win_rate/100 * avg_win) - ((100-win_rate)/100 * avg_loss)
        else:
            expectancy = avg_profit_per_trade
        
        equity_array = np.array(equity_curve)
        running_max = np.maximum.accumulate(equity_array)
        drawdown_array = equity_array - running_max
        drawdown_pct_array = np.divide(drawdown_array, running_max, 
                                       out=np.zeros_like(drawdown_array), 
                                       where=running_max!=0) * 100
        
        max_drawdown_abs = np.min(drawdown_array)
        max_drawdown_pct = np.abs(np.min(drawdown_pct_array))
        
        returns_array = np.array(daily_returns)
        returns_array = returns_array[returns_array != 0]
        
        if len(returns_array) > 1 and np.std(returns_array) > 0:
            mean_return = np.mean(returns_array)
            std_return = np.std(returns_array, ddof=1)
            annual_mean = mean_return * 252
            annual_std = std_return * np.sqrt(252)
            sharpe_ratio = (annual_mean - self.risk_free_rate) / annual_std if annual_std > 0 else 0
        else:
            sharpe_ratio = 0.0
        
        downside_returns = returns_array[returns_array < 0]
        if len(downside_returns) > 1:
            downside_std = np.std(downside_returns, ddof=1) * np.sqrt(252)
            sortino_ratio = ((np.mean(returns_array) * 252 - self.risk_free_rate) / downside_std) if downside_std > 0 else 0
        else:
            sortino_ratio = 0.0
        
        num_days = len(equity_curve)
        years = num_days / 252
        
        if years > 0 and final_capital > 0 and self.initial_capital > 0:
            annual_return_pct = (np.power(final_capital / self.initial_capital, 1/years) - 1) * 100
        else:
            annual_return_pct = 0.0
        
        calmar_ratio = (annual_return_pct / max_drawdown_pct) if max_drawdown_pct > 0 else 0
        recovery_factor = (total_return / abs(max_drawdown_abs)) if max_drawdown_abs < 0 else 0
        
        consecutive_wins = 0
        consecutive_losses = 0
        max_consecutive_wins = 0
        max_consecutive_losses = 0
        
        for profit in profits:
            if profit > 0:
                consecutive_wins += 1
                consecutive_losses = 0
                max_consecutive_wins = max(max_consecutive_wins, consecutive_wins)
            else:
                consecutive_losses += 1
                consecutive_wins = 0
                max_consecutive_losses = max(max_consecutive_losses, consecutive_losses)
        
        significance = self.calculate_statistical_significance(trades, daily_returns)
        
        return {
            'initial_capital': self.initial_capital,
            'final_capital': final_capital,
            'total_return': total_return,
            'total_return_pct': total_return_pct,
            'annual_return_pct': annual_return_pct,
            'num_trades': num_trades,
            'num_wins': num_wins,
            'num_losses': num_losses,
            'win_rate': win_rate,
            'avg_profit_per_trade': avg_profit_per_trade,
            'avg_win': avg_win,
            'avg_loss': avg_loss,
            'expectancy': expectancy,
            'profit_factor': profit_factor,
            'max_drawdown': max_drawdown_abs,
            'max_drawdown_pct': max_drawdown_pct,
            'sharpe_ratio': sharpe_ratio,
            'sortino_ratio': sortino_ratio,
            'calmar_ratio': calmar_ratio,
            'recovery_factor': recovery_factor,
            'max_consecutive_wins': max_consecutive_wins,
            'max_consecutive_losses': max_consecutive_losses,
            'num_days': num_days,
            'years': years,
            'sufficient_trades': significance['sufficient_trades'],
            't_statistic': significance['t_statistic'],
            'p_value': significance['p_value'],
            'is_significant': significance['is_significant'],
            'is_very_significant': significance.get('is_very_significant', False),
            'confidence_95_lower': significance['confidence_95_lower'],
            'confidence_95_upper': significance['confidence_95_upper'],
            'stat_warning': significance['warning']
        }

    def _empty_metrics(self):
        return {
            'initial_capital': self.initial_capital,
            'final_capital': self.initial_capital,
            'total_return': 0.0,
            'total_return_pct': 0.0,
            'annual_return_pct': 0.0,
            'num_trades': 0,
            'num_wins': 0,
            'num_losses': 0,
            'win_rate': 0.0,
            'avg_profit_per_trade': 0.0,
            'avg_win': 0.0,
            'avg_loss': 0.0,
            'expectancy': 0.0,
            'profit_factor': 0.0,
            'max_drawdown': 0.0,
            'max_drawdown_pct': 0.0,
            'sharpe_ratio': 0.0,
            'sortino_ratio': 0.0,
            'calmar_ratio': 0.0,
            'recovery_factor': 0.0,
            'max_consecutive_wins': 0,
            'max_consecutive_losses': 0,
            'num_days': 0,
            'years': 0.0,
            'sufficient_trades': False,
            't_statistic': np.nan,
            'p_value': np.nan,
            'is_significant': False,
            'is_very_significant': False,
            'confidence_95_lower': np.nan,
            'confidence_95_upper': np.nan,
            'stat_warning': 'No trades'
        }

def run_comprehensive_backtest(data_dir, output_dir, 
                               confidence_thresholds=None, strategy_types=None):
    os.makedirs(output_dir, exist_ok=True)
    
    if not os.path.exists(data_dir):
        print(f"ERROR: Directory not found: {data_dir}")
        return None
    
    csv_files = [f for f in os.listdir(data_dir) if f.endswith('_all_folds.csv')]
    if len(csv_files) == 0:
        print(f"ERROR: No CSV files found in {data_dir}")
        return None
    
    if confidence_thresholds is None:
        confidence_thresholds = [0.0, 0.05, 0.1, 0.15, 0.2]
    
    if strategy_types is None:
        strategy_types = ['kelly', 'aggressive', 'neutral', 'conservative', 'ultra_aggressive', 
                          'fixed_50', 'fixed_01', 'volatility_scaled', 'inverse_volatility', 
                          'trend_following', 'momentum', 'reinforcement_learning','regime_adaptive_balanced',
                          'regime_adaptive_defensive','regime_adaptive_aggressive']
    
    print(f"Data directory: {data_dir}")
    print(f"Output directory: {output_dir}")
    print(f"Processing {len(csv_files)} models...")
    print(f"Confidence thresholds: {confidence_thresholds}")
    print(f"Strategy types: {strategy_types}")
    
    all_results = []
    failed_models = []
    
    for csv_file in sorted(csv_files):
        model_name = csv_file.replace('_all_folds.csv', '')
        
        try:
            file_path = os.path.join(data_dir, csv_file)
            df = pd.read_csv(file_path)
            
            if not validate_dataframe(df, model_name):
                failed_models.append(model_name)
                continue
            
            df['date'] = pd.to_datetime(df['date'])
            df = df.sort_values('date').reset_index(drop=True)
            
            base_accuracy = df['correct'].mean() * 100
            date_start = df['date'].min()
            date_end = df['date'].max()
            total_predictions = len(df)
            
            for strategy_type in strategy_types:
                for threshold in confidence_thresholds:
                    backtester = EnhancedCryptoBacktester(
                        initial_capital=10000000,  
                        trading_fee=0.0005,         
                        slippage=0.0015,          
                        min_order_size=5000,        
                        risk_free_rate=0.0,
                        min_trades_threshold=30
                    )
                    
                    equity_curve, trades, daily_returns = backtester.backtest_strategy(
                        df, 
                        confidence_threshold=threshold,
                        strategy_type=strategy_type,
                        kelly_lookback=50
                    )
                    
                    metrics = backtester.calculate_performance_metrics(
                        equity_curve, trades, daily_returns
                    )
                    
                    result = {
                        'model': model_name,
                        'strategy_type': strategy_type,
                        'confidence_threshold': threshold,
                        'total_predictions': total_predictions,
                        'base_accuracy': base_accuracy,
                        'date_start': date_start,
                        'date_end': date_end,
                        **metrics
                    }
                    all_results.append(result)
        
        except Exception as e:
            print(f"{model_name} failed: {e}")
            failed_models.append(model_name)
            continue
    
    if failed_models:
        print(f"Failed models: {', '.join(failed_models)}")
    
    if not all_results:
        print("ERROR: No successful backtests")
        return None
    
    results_df = pd.DataFrame(all_results)
    results_file = os.path.join(output_dir, 'backtest_results.csv')
    results_df.to_csv(results_file, index=False)
    print(f"Results saved: {results_file}")
    
    return results_df, output_dir

def print_summary_dashboard(results_df, output_dir):
    for strategy_type in results_df['strategy_type'].unique():
        print(f"{'='*100}")
        print(f"STRATEGY: {strategy_type.upper()}")
        print(f"{'='*100}")
        
        analysis_data = results_df[
            (results_df['confidence_threshold'] == 0.1) & 
            (results_df['strategy_type'] == strategy_type)
        ].copy()
        
        significant_models = analysis_data[
            (analysis_data['is_significant'] == True) & 
            (analysis_data['sufficient_trades'] == True)
        ].copy()
        
        print(f"Total Models: {len(analysis_data)} | Significant: {len(significant_models)}")
        
        excellent = significant_models[significant_models['total_return'] > 0].copy()
        excellent['grade'] = excellent.apply(lambda x: 
            'A+' if (x['annual_return_pct'] > 20 and x['sharpe_ratio'] >= 3.0) else
            'A'  if (x['annual_return_pct'] > 15 and x['sharpe_ratio'] >= 2.0) else
            'B+' if (x['annual_return_pct'] > 10 and x['sharpe_ratio'] >= 1.5) else
            'B'  if (x['annual_return_pct'] > 5 and x['sharpe_ratio'] >= 1.0) else 'C', axis=1)

        
        top_10 = excellent.nlargest(10, 'total_return')
        
        print(f"{'Rank':<6} {'Model':<22} {'Grade':<7} {'Profit(원)':<13} {'Return%':<9} {'Annual%':<9} {'Sharpe':<8} {'MaxDD%':<8} {'WinRate':<9} {'PF':<6} {'Exp':<8} {'Trades':<8} {'t-stat':<8} {'Acc%':<7}")
        print(f"{'-'*140}")
        
        for i, (idx, row) in enumerate(top_10.iterrows(), 1):
            print(f"{i:<6} {row['model']:<22} {row['grade']:<7} "
                  f"{row['total_return']:>10,.0f}  "
                  f"{row['total_return_pct']:>7.1f}%  "
                  f"{row['annual_return_pct']:>7.1f}%  "
                  f"{row['sharpe_ratio']:>6.2f}  "
                  f"{row['max_drawdown_pct']:>6.1f}%  "
                  f"{row['win_rate']:>7.1f}%  "
                  f"{row['profit_factor']:>4.2f}  "
                  f"{row['expectancy']:>6.2f}  "
                  f"{int(row['num_trades']):>6}  "
                  f"{row['t_statistic']:>7.2f}  "
                  f"{row['base_accuracy']:>5.1f}%")
        
        print(f"{'='*100}")
    
    return results_df

def print_fold_summary(data_dir, model_name, strategy_type, output_dir):
    file_path = os.path.join(data_dir, f"{model_name}_all_folds.csv")
    if not os.path.exists(file_path):
        return None
    
    df = pd.read_csv(file_path)
    df['date'] = pd.to_datetime(df['date'])
    df = df.sort_values('date').reset_index(drop=True)
    
    if 'fold' not in df.columns:
        return None
    
    print(f"{'-'*80}")
    print(f"FOLD ANALYSIS: {model_name} | STRATEGY: {strategy_type.upper()}")
    print(f"{'-'*80}")
    
    fold_results = []
    
    for fold in sorted(df['fold'].unique()):
        fold_data = df[df['fold'] == fold].copy()
        
        backtester = EnhancedCryptoBacktester(
            initial_capital=10000000,  
            trading_fee=0.0005,         
            slippage=0.0015,          
            min_order_size=5000,        
            risk_free_rate=0.0,
            min_trades_threshold=30
        )

        equity_curve, trades, daily_returns = backtester.backtest_strategy(
            fold_data, confidence_threshold=0.1, strategy_type=strategy_type
        )
        
        metrics = backtester.calculate_performance_metrics(equity_curve, trades, daily_returns)
        
        status = 'VERY_SIG' if metrics['is_very_significant'] else 'SIG' if metrics['is_significant'] else 'NOT_SIG'
        
        print(f"Fold {fold} | {fold_data['date'].min().date()} to {fold_data['date'].max().date()}")
        print(f"  {len(fold_data)} days | {int(metrics['num_trades'])} trades | "
              f" {metrics['total_return']:>8,.0f} ({metrics['total_return_pct']:>5.1f}%) | "
              f"Sharpe {metrics['sharpe_ratio']:>5.2f} | Win {metrics['win_rate']:>5.1f}% | "
              f"{status} (t={metrics['t_statistic']:.2f})")
        
        fold_result = {
            'fold': fold,
            'date_start': fold_data['date'].min(),
            'date_end': fold_data['date'].max(),
            'num_days': len(fold_data),
            'accuracy': fold_data['correct'].mean() * 100,
            **metrics
        }
        fold_results.append(fold_result)
    
    if len(fold_results) > 1:
        returns = np.array([m['annual_return_pct'] for m in fold_results])
        mean_return = np.mean(returns)
        std_return = np.std(returns)
        consistency = (1 / (1 + abs(std_return / mean_return) if mean_return != 0 else 1)) * 100
        
        print(f"{'-'*80}")
        print(f"Walk-Forward Efficiency: {consistency:.1f}% | Avg Annual Return: {mean_return:.1f}% (±{std_return:.1f}%)")
        print(f"{'-'*80}")
    
    return fold_results

result = run_comprehensive_backtest(data_dir, output_dir)
if result is not None:
    results_df, used_output_dir = result
    print(f"\nAll results saved to: {used_output_dir}")
    print_summary_dashboard(results_df, used_output_dir)
    
    for strategy_type in ['kelly', 'aggressive', 'neutral', 'conservative', 'ultra_aggressive', 
                          'fixed_50', 'fixed_01', 'volatility_scaled', 'inverse_volatility', 
                          'trend_following', 'momentum', 'reinforcement_learning','regime_adaptive_balanced',
                          'regime_adaptive_defensive','regime_adaptive_aggressive']:
        top_models = results_df[
            (results_df['confidence_threshold'] == 0.1) & 
            (results_df['strategy_type'] == strategy_type)
        ].nlargest(3, 'total_return')
        
        print(f"{'='*100}")
        print(f"TOP 3 MODELS FOLD-BY-FOLD ANALYSIS ({strategy_type.upper()})")
        print(f"{'='*100}")
        
        for idx, row in top_models.iterrows():
            print_fold_summary(data_dir, row['model'], strategy_type, used_output_dir)



Data directory: ../model_results/2025-10-24/predictions/direction_walk_forward
Output directory: ../model_results/2025-10-24/backtest_results
Processing 26 models...
Confidence thresholds: [0.0, 0.05, 0.1, 0.15, 0.2]
Strategy types: ['kelly', 'aggressive', 'neutral', 'conservative', 'ultra_aggressive', 'fixed_50', 'fixed_01', 'volatility_scaled', 'inverse_volatility', 'trend_following', 'momentum', 'reinforcement_learning', 'regime_adaptive_balanced', 'regime_adaptive_defensive', 'regime_adaptive_aggressive']
Results saved: ../model_results/2025-10-24/backtest_results/backtest_results.csv

All results saved to: ../model_results/2025-10-24/backtest_results
STRATEGY: KELLY
Total Models: 26 | Significant: 14
Rank   Model                  Grade   Profit(원)     Return%   Annual%   Sharpe   MaxDD%   WinRate   PF     Exp      Trades   t-stat   Acc%   
--------------------------------------------------------------------------------------------------------------------------------------------
1 

Fold 7 | 2025-01-31 to 2025-10-19
  262 days | 154 trades |  3,399,446 ( 34.0%) | Sharpe  4.70 | Win  57.8% | VERY_SIG (t=3.68)
--------------------------------------------------------------------------------
Walk-Forward Efficiency: 54.6% | Avg Annual Return: 13.0% (±10.8%)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
FOLD ANALYSIS: LSTM | STRATEGY: KELLY
--------------------------------------------------------------------------------
Fold 1 | 2022-10-20 to 2022-12-18
  60 days | 43 trades |   559,494 (  5.6%) | Sharpe  3.74 | Win  53.5% | NOT_SIG (t=1.54)
Fold 2 | 2023-02-17 to 2023-04-17
  60 days | 9 trades |   108,511 (  1.1%) | Sharpe  4.97 | Win  55.6% | NOT_SIG (t=nan)
Fold 3 | 2023-06-17 to 2023-08-15
  60 days | 27 trades |   -39,151 ( -0.4%) | Sharpe -1.47 | Win  25.9% | NOT_SIG (t=nan)
Fold 4 | 2023-10-15 to 2023-12-13
  60 days | 19 trades |   108,354 (  1.1

Fold 7 | 2025-01-01 to 2025-10-19
  292 days | 256 trades |  2,281,417 ( 22.8%) | Sharpe  3.10 | Win  52.7% | VERY_SIG (t=3.12)
--------------------------------------------------------------------------------
Walk-Forward Efficiency: 77.0% | Avg Annual Return: 13.3% (±4.0%)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
FOLD ANALYSIS: LogisticRegression | STRATEGY: NEUTRAL
--------------------------------------------------------------------------------
Fold 1 | 2022-09-20 to 2022-12-18
  90 days | 39 trades |   159,211 (  1.6%) | Sharpe  1.23 | Win  48.7% | NOT_SIG (t=0.49)
Fold 2 | 2023-01-18 to 2023-04-17
  90 days | 52 trades |   311,687 (  3.1%) | Sharpe  3.72 | Win  53.8% | NOT_SIG (t=1.69)
Fold 3 | 2023-05-18 to 2023-08-15
  90 days | 48 trades |   130,975 (  1.3%) | Sharpe  3.07 | Win  54.2% | NOT_SIG (t=1.34)
Fold 4 | 2023-09-15 to 2023-12-13
  90 days | 55 trades 

Fold 7 | 2025-01-01 to 2025-10-19
  292 days | 256 trades |  28,116,435 (281.2%) | Sharpe  2.92 | Win  52.7% | SIG (t=2.94)
--------------------------------------------------------------------------------
Walk-Forward Efficiency: 77.4% | Avg Annual Return: 149.1% (±43.4%)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
FOLD ANALYSIS: CatBoost | STRATEGY: ULTRA_AGGRESSIVE
--------------------------------------------------------------------------------
Fold 1 | 2022-09-20 to 2022-12-18
  90 days | 21 trades |  2,765,742 ( 27.7%) | Sharpe  4.40 | Win  57.1% | NOT_SIG (t=nan)
Fold 2 | 2023-01-18 to 2023-04-17
  90 days | 69 trades |  3,086,571 ( 30.9%) | Sharpe  3.10 | Win  52.2% | NOT_SIG (t=1.62)
Fold 3 | 2023-05-18 to 2023-08-15
  90 days | 59 trades |  3,647,937 ( 36.5%) | Sharpe  5.96 | Win  62.7% | SIG (t=2.88)
Fold 4 | 2023-09-15 to 2023-12-13
  90 days | 74 trades |  3,

Fold 7 | 2025-01-01 to 2025-10-19
  292 days | 256 trades |   184,169 (  1.8%) | Sharpe  2.92 | Win  52.7% | SIG (t=2.94)
--------------------------------------------------------------------------------
Walk-Forward Efficiency: 82.2% | Avg Annual Return: 1.2% (±0.3%)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
FOLD ANALYSIS: CatBoost | STRATEGY: FIXED_01
--------------------------------------------------------------------------------
Fold 1 | 2022-09-20 to 2022-12-18
  90 days | 21 trades |    33,191 (  0.3%) | Sharpe  4.40 | Win  57.1% | NOT_SIG (t=nan)
Fold 2 | 2023-01-18 to 2023-04-17
  90 days | 69 trades |    35,624 (  0.4%) | Sharpe  3.10 | Win  52.2% | NOT_SIG (t=1.62)
Fold 3 | 2023-05-18 to 2023-08-15
  90 days | 59 trades |    39,787 (  0.4%) | Sharpe  5.96 | Win  62.7% | SIG (t=2.88)
Fold 4 | 2023-09-15 to 2023-12-13
  90 days | 74 trades |    42,702 (  0.4%) 

Fold 1 | 2022-09-20 to 2022-12-18
  90 days | 65 trades |  1,312,571 ( 13.1%) | Sharpe  2.33 | Win  61.5% | NOT_SIG (t=1.18)
Fold 2 | 2023-01-18 to 2023-04-17
  90 days | 67 trades |   192,444 (  1.9%) | Sharpe  0.61 | Win  50.7% | NOT_SIG (t=0.32)
Fold 3 | 2023-05-18 to 2023-08-15
  90 days | 1 trades |  -10,000,000 (-100.0%) | Sharpe  0.00 | Win   0.0% | NOT_SIG (t=nan)
Fold 4 | 2023-09-15 to 2023-12-13
  90 days | 1 trades |  -10,000,000 (-100.0%) | Sharpe  0.00 | Win   0.0% | NOT_SIG (t=nan)
Fold 5 | 2024-01-13 to 2024-04-11
  90 days | 1 trades |  -10,000,000 (-100.0%) | Sharpe  0.00 | Win   0.0% | NOT_SIG (t=nan)
Fold 6 | 2024-05-12 to 2024-08-09
  90 days | 1 trades |  -10,000,000 (-100.0%) | Sharpe  0.00 | Win   0.0% | NOT_SIG (t=nan)
Fold 7 | 2025-01-01 to 2025-10-19
  292 days | 1 trades |  -10,000,000 (-100.0%) | Sharpe  0.00 | Win   0.0% | NOT_SIG (t=nan)
--------------------------------------------------------------------------------
Walk-Forward Efficiency: 31.9% | Avg An

Fold 7 | 2025-01-01 to 2025-10-19
  292 days | 256 trades |  2,753,155 ( 27.5%) | Sharpe  2.27 | Win  52.7% | SIG (t=2.28)
--------------------------------------------------------------------------------
Walk-Forward Efficiency: 70.1% | Avg Annual Return: 17.2% (±7.4%)
--------------------------------------------------------------------------------
TOP 3 MODELS FOLD-BY-FOLD ANALYSIS (REINFORCEMENT_LEARNING)
--------------------------------------------------------------------------------
FOLD ANALYSIS: CatBoost | STRATEGY: REINFORCEMENT_LEARNING
--------------------------------------------------------------------------------
Fold 1 | 2022-09-20 to 2022-12-18
  90 days | 21 trades |   526,827 (  5.3%) | Sharpe  4.36 | Win  57.1% | NOT_SIG (t=nan)
Fold 2 | 2023-01-18 to 2023-04-17
  90 days | 69 trades |    52,467 (  0.5%) | Sharpe  0.59 | Win  52.2% | NOT_SIG (t=0.31)
Fold 3 | 2023-05-18 to 2023-08-15
  90 days | 59 trades |   675,771 (  6.8%) | Sharpe  4.64 | Win  62.7% | SIG (t=2.25)
F

Fold 1 | 2022-09-20 to 2022-12-18
  90 days | 48 trades |   242,846 (  2.4%) | Sharpe  4.67 | Win  68.8% | SIG (t=2.04)
Fold 2 | 2023-01-18 to 2023-04-17
  90 days | 66 trades |   173,328 (  1.7%) | Sharpe  2.53 | Win  56.1% | NOT_SIG (t=1.29)
Fold 3 | 2023-05-18 to 2023-08-15
  90 days | 68 trades |   188,569 (  1.9%) | Sharpe  4.03 | Win  54.4% | SIG (t=2.09)
Fold 4 | 2023-09-15 to 2023-12-13
  90 days | 70 trades |   300,242 (  3.0%) | Sharpe  2.18 | Win  48.6% | NOT_SIG (t=1.15)
Fold 5 | 2024-01-13 to 2024-04-11
  90 days | 83 trades |   453,700 (  4.5%) | Sharpe  3.42 | Win  54.2% | NOT_SIG (t=1.96)
Fold 6 | 2024-05-12 to 2024-08-09
  90 days | 82 trades |   319,123 (  3.2%) | Sharpe  3.02 | Win  57.3% | NOT_SIG (t=1.72)
Fold 7 | 2025-01-01 to 2025-10-19
  292 days | 256 trades |  2,099,613 ( 21.0%) | Sharpe  2.62 | Win  52.7% | SIG (t=2.64)
--------------------------------------------------------------------------------
Walk-Forward Efficiency: 68.7% | Avg Annual Return: 9.5% (±4