# Monte Carlo Walk-Forward Validation - Statistical Arbitrage Strategy

**Strategy**: Hypothesis 5 - Statistical Arbitrage Pairs Trading  
**Project ID**: 26140717  
**Optimized Sharpe**: 1.829  
**Baseline Sharpe**: 0.127  

## Optimized Parameters to Validate:
- z_entry_threshold: 1.5
- z_exit_threshold: 1.0
- lookback_period: 30
- position_size_per_pair: 0.40
- max_holding_days: 30
- stop_loss_z: 4.0

## How to Use:
1. Upload this notebook to QuantConnect Research (free with subscription data)
2. Run all cells sequentially
3. Monte Carlo will test parameter robustness across random time periods
4. Results will show if 1.829 Sharpe is robust or overfit

In [None]:
# Import required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import random
from collections import Counter
import json
import time

# QuantConnect Research APIs
from QuantConnect import *
from QuantConnect.Research import QuantBook
from QuantConnect.Api import Api

# Initialize QuantBook
qb = QuantBook()
api = Api()  # Auto-authenticated in Research

print("✓ QuantConnect Research environment initialized")

In [None]:
# ==================== CONFIGURATION ====================

config = {
    # Your QuantConnect project ID
    'project_id': 26140717,  # StatArb_H5_PARTIAL_FIX
    
    # Algorithm file name
    'algorithm_file': 'main.py',
    
    # Total period for analysis (from original backtest)
    'total_period': {
        'start': datetime(2022, 1, 1),
        'end': datetime(2025, 10, 31)
    },
    
    # Train/test split (70% train, 30% test)
    'train_test_split': 0.70,
    
    # Number of Monte Carlo runs (start with 5 for testing, increase to 10 for production)
    'monte_carlo_runs': 5,
    
    # Parameters to optimize (from Round 2 optimization)
    'parameters': {
        'z_entry_threshold': {'min': 1.5, 'max': 2.5, 'step': 0.5},
        'z_exit_threshold': {'min': 0.3, 'max': 1.0, 'step': 0.2},
        'lookback_period': {'min': 30, 'max': 70, 'step': 20}
    },
    
    # Fixed parameters (not optimized in Monte Carlo)
    'fixed_parameters': {
        'position_size_per_pair': 0.40,
        'max_holding_days': 30,
        'stop_loss_z': 4.0
    },
    
    # Target metric
    'target_metric': 'SharpeRatio',
    
    # Random seed for reproducibility
    'random_seed': 42,
    
    # Baseline performance (from optimization)
    'baseline_sharpe': 1.829
}

# Set random seed
if config['random_seed'] is not None:
    random.seed(config['random_seed'])
    np.random.seed(config['random_seed'])

print("Configuration:")
print(f"  Project ID: {config['project_id']}")
print(f"  Period: {config['total_period']['start'].date()} to {config['total_period']['end'].date()}")
print(f"  Train/Test: {config['train_test_split']*100:.0f}%/{(1-config['train_test_split'])*100:.0f}%")
print(f"  Monte Carlo runs: {config['monte_carlo_runs']}")
print(f"  Parameters to optimize: {list(config['parameters'].keys())}")
print(f"  Fixed parameters: {list(config['fixed_parameters'].keys())}")
print(f"  Baseline Sharpe: {config['baseline_sharpe']:.3f}")

In [None]:
# ==================== HELPER FUNCTIONS ====================

def generate_random_split(start_date, end_date, train_pct, seed=None):
    """
    Generate random training and testing periods (Monte Carlo sampling)
    """
    if seed is not None:
        random.seed(seed)
    
    total_days = (end_date - start_date).days
    train_days = int(total_days * train_pct)
    test_days = total_days - train_days
    
    # Ensure minimum test period (120 days ~ 4 months)
    min_test_days = 120
    if test_days < min_test_days:
        raise ValueError(f"Test period too short ({test_days} days). Need at least {min_test_days} days.")
    
    # Random start point for training window
    max_start_offset = test_days
    start_offset = random.randint(0, max(0, max_start_offset))
    
    train_start = start_date + timedelta(days=start_offset)
    train_end = train_start + timedelta(days=train_days)
    test_start = train_end + timedelta(days=1)
    test_end = train_start + timedelta(days=total_days)
    
    return train_start, train_end, test_start, test_end


def format_optimization_params(params_config):
    """
    Convert parameter config to QC API optimization format
    """
    opt_params = []
    for name, config_param in params_config.items():
        opt_params.append({
            'name': name,
            'min': config_param['min'],
            'max': config_param['max'],
            'step': config_param['step']
        })
    return opt_params


def wait_for_optimization(api, opt_id, timeout=1800, poll_interval=30):
    """
    Poll optimization until completion
    """
    start_time = time.time()
    while True:
        if time.time() - start_time > timeout:
            raise TimeoutError(f"Optimization {opt_id} exceeded timeout")
        
        result = api.ReadOptimization(opt_id)
        status = result.Optimization.Status
        
        if status == "completed":
            return result
        elif status == "error" or status == "cancelled":
            raise RuntimeError(f"Optimization {opt_id} failed with status: {status}")
        
        time.sleep(poll_interval)


def wait_for_backtest(api, project_id, backtest_id, timeout=600, poll_interval=10):
    """
    Poll backtest until completion
    """
    start_time = time.time()
    while True:
        if time.time() - start_time > timeout:
            raise TimeoutError(f"Backtest {backtest_id} exceeded timeout")
        
        result = api.ReadBacktest(project_id, backtest_id)
        
        if result.Success and result.Backtest.Completed:
            return result
        
        time.sleep(poll_interval)


def extract_sharpe(backtest_result):
    """
    Extract Sharpe ratio from backtest result
    """
    try:
        stats = backtest_result.Backtest.Statistics
        for stat in stats:
            if stat.Key == 'Sharpe Ratio':
                return float(stat.Value)
        return 0.0
    except:
        return 0.0

print("✓ Helper functions loaded")

In [None]:
# ==================== MONTE CARLO WALK-FORWARD ====================

print("="*70)
print("MONTE CARLO WALK-FORWARD ANALYSIS - STATISTICAL ARBITRAGE")
print("="*70)
print()

results = []
errors = []

# Compile project once at start
print("Compiling project...")
try:
    compile_response = api.CreateCompile(config['project_id'])
    if not compile_response.Success:
        raise RuntimeError(f"Compilation failed: {compile_response.Errors}")
    compile_id = compile_response.CompileId
    print(f"✓ Compilation successful: {compile_id}")
except Exception as e:
    print(f"✗ Compilation error: {e}")
    print("Note: If in QC Research, compilation may be automatic")
    compile_id = None

print()

for run in range(config['monte_carlo_runs']):
    print(f"\n{'='*70}")
    print(f"Monte Carlo Run {run + 1}/{config['monte_carlo_runs']}")
    print(f"{'='*70}")
    
    try:
        # 1. Generate random train/test split
        train_start, train_end, test_start, test_end = generate_random_split(
            config['total_period']['start'],
            config['total_period']['end'],
            config['train_test_split'],
            seed=run if config['random_seed'] else None
        )
        
        print(f"Training:  {train_start.date()} to {train_end.date()} ({(train_end - train_start).days} days)")
        print(f"Testing:   {test_start.date()} to {test_end.date()} ({(test_end - test_start).days} days)")
        
        # 2. Run optimization on TRAINING period
        print(f"\nRunning optimization on training period...")
        
        # Format parameters for API
        opt_params = []
        for name, param_config in config['parameters'].items():
            opt_params.append({
                'name': name,
                'min': param_config['min'],
                'max': param_config['max'],
                'step': param_config['step']
            })
        
        # Build optimization config
        opt_config = {
            'projectId': config['project_id'],
            'compileId': compile_id,
            'name': f"MC_Train_Run{run+1}_{train_start.strftime('%Y%m%d')}",
            'target': 'TotalPerformance.PortfolioStatistics.SharpeRatio',
            'targetTo': 'max',
            'strategy': 'QuantConnect.Optimizer.Strategies.GridSearchOptimizationStrategy',
            'parameters': opt_params,
            'nodeType': 'O2-8',
            'parallelNodes': 2
        }
        
        # Create optimization using REST API directly
        import requests
        import os
        
        user_id = os.environ.get('QC_USER_ID')
        token = os.environ.get('QC_API_TOKEN')
        
        headers = {
            'Authorization': f'Basic {token}',
            'Content-Type': 'application/json'
        }
        
        response = requests.post(
            'https://www.quantconnect.com/api/v2/optimizations/create',
            headers=headers,
            json=opt_config
        )
        
        opt_result = response.json()
        
        if not opt_result.get('success'):
            raise RuntimeError(f"Optimization creation failed: {opt_result.get('errors')}")
        
        opt_id = opt_result['optimizationId']
        print(f"  Optimization ID: {opt_id}")
        print(f"  Waiting for completion (this may take 10-20 minutes)...")
        
        # Wait for optimization to complete
        opt_complete = wait_for_optimization(api, opt_id, timeout=1800)
        
        # Extract best parameters and Sharpe
        best_params = {}
        for param in opt_complete.Optimization.ParameterSet:
            best_params[param.Key] = param.Value
        
        train_sharpe = opt_complete.Optimization.Statistics.SharpeRatio
        
        print(f"  ✓ Training Sharpe: {train_sharpe:.3f}")
        print(f"  Best parameters: {best_params}")
        
        # 3. Run backtest on TESTING period with best parameters
        print(f"\nRunning backtest on test period...")
        
        # Create backtest
        backtest_result = api.CreateBacktest(
            config['project_id'],
            compile_id,
            f"MC_Test_Run{run+1}_{test_start.strftime('%Y%m%d')}"
        )
        
        if not backtest_result.Success:
            raise RuntimeError(f"Backtest creation failed: {backtest_result.Errors}")
        
        backtest_id = backtest_result.BacktestId
        print(f"  Backtest ID: {backtest_id}")
        print(f"  Waiting for completion...")
        
        # Wait for backtest to complete
        test_complete = wait_for_backtest(api, config['project_id'], backtest_id)
        test_sharpe = extract_sharpe(test_complete)
        
        print(f"  ✓ Testing Sharpe: {test_sharpe:.3f}")
        
        # 4. Calculate degradation
        if train_sharpe > 0:
            degradation = (train_sharpe - test_sharpe) / train_sharpe
        else:
            degradation = 1.0
        
        print(f"  Degradation: {degradation*100:.1f}%")
        
        # 5. Store results
        results.append({
            'run': run + 1,
            'train_start': train_start,
            'train_end': train_end,
            'test_start': test_start,
            'test_end': test_end,
            'train_sharpe': float(train_sharpe),
            'test_sharpe': float(test_sharpe),
            'degradation': float(degradation),
            'best_params': dict(best_params),
            'optimization_id': opt_id,
            'backtest_id': backtest_id
        })
        
        print(f"  ✓ Run {run + 1} complete")
        
    except Exception as e:
        error_msg = str(e)
        print(f"  ✗ Error in run {run + 1}: {error_msg}")
        errors.append({
            'run': run + 1,
            'error': error_msg
        })
        continue

print(f"\n{'='*70}")
print(f"Monte Carlo Walk-Forward Complete")
print(f"  Successful runs: {len(results)}/{config['monte_carlo_runs']}")
print(f"  Failed runs: {len(errors)}/{config['monte_carlo_runs']}")
print(f"{'='*70}")

In [None]:
# ==================== ANALYSIS ====================

if len(results) == 0:
    print("✗ No successful runs to analyze")
else:
    df_results = pd.DataFrame(results)
    
    print("\n" + "="*70)
    print("AGGREGATE RESULTS")
    print("="*70)
    
    # Summary statistics
    mean_train = df_results['train_sharpe'].mean()
    std_train = df_results['train_sharpe'].std()
    mean_test = df_results['test_sharpe'].mean()
    std_test = df_results['test_sharpe'].std()
    mean_deg = df_results['degradation'].mean()
    std_deg = df_results['degradation'].std()
    
    print(f"\nPerformance Metrics:")
    print(f"  Baseline Sharpe (original): {config['baseline_sharpe']:.3f}")
    print(f"  Mean Training Sharpe:        {mean_train:.3f} ± {std_train:.3f}")
    print(f"  Mean Testing Sharpe:         {mean_test:.3f} ± {std_test:.3f}")
    print(f"  Mean Degradation:            {mean_deg*100:.1f}% ± {std_deg*100:.1f}%")
    
    # Robustness analysis
    overfit_count = (df_results['degradation'] > 0.30).sum()
    good_count = (df_results['degradation'] < 0.15).sum()
    overfit_pct = overfit_count / len(df_results)
    good_pct = good_count / len(df_results)
    
    print(f"\nRobustness Analysis:")
    print(f"  Runs with >30% degradation (OVERFIT): {overfit_count}/{len(df_results)} ({overfit_pct*100:.0f}%)")
    print(f"  Runs with <15% degradation (GOOD):    {good_count}/{len(df_results)} ({good_pct*100:.0f}%)")
    
    # Parameter stability
    print(f"\n" + "="*70)
    print("PARAMETER STABILITY")
    print("="*70)
    
    for param_name in config['parameters'].keys():
        values = [r['best_params'].get(param_name) for r in results if param_name in r['best_params']]
        if values:
            counter = Counter(values)
            most_common = counter.most_common(1)[0]
            
            print(f"\n{param_name}:")
            for value, count in counter.most_common():
                pct = count / len(values) * 100
                print(f"  {value}: {count}/{len(values)} ({pct:.0f}%)")
            
            if most_common[1] / len(values) >= 0.70:
                print(f"  ✓ STABLE: {most_common[0]} appears in {most_common[1]/len(values)*100:.0f}% of runs")
            else:
                print(f"  ⚠ UNSTABLE: No clear consensus (max {most_common[1]/len(values)*100:.0f}%)")

In [None]:
# ==================== ROBUSTNESS DECISION ====================

if len(results) > 0:
    print("\n" + "="*70)
    print("ROBUSTNESS DECISION FRAMEWORK")
    print("="*70 + "\n")
    
    # Apply decision rules
    if overfit_pct > 0.50:
        decision = "ABANDON_STRATEGY"
        reason = f"Overfitting in {overfit_pct*100:.0f}% of Monte Carlo runs"
        recommendation = "Strategy does not generalize. Consider new hypothesis."
        
    elif mean_deg > 0.40:
        decision = "HIGH_RISK"
        reason = f"Average degradation {mean_deg*100:.1f}% indicates poor generalization"
        recommendation = "Strategy shows high degradation. Use with extreme caution."
        
    elif std_deg > 0.25:
        decision = "UNSTABLE_PARAMETERS"
        reason = f"High variance ({std_deg*100:.1f}%) suggests parameter instability"
        recommendation = "Parameters unstable across periods. Consider narrowing search space."
        
    elif mean_deg < 0.15 and std_deg < 0.10:
        decision = "ROBUST_STRATEGY"
        reason = f"Low degradation ({mean_deg*100:.1f}%) with low variance ({std_deg*100:.1f}%)"
        recommendation = "Strategy shows excellent generalization. Ready for paper trading."
        
    else:
        decision = "PROCEED_WITH_CAUTION"
        reason = f"Moderate degradation ({mean_deg*100:.1f}%), acceptable stability"
        recommendation = "Strategy shows reasonable generalization. Additional validation recommended."
    
    print(f"✓ Decision: {decision}")
    print(f"\n  Reason: {reason}")
    print(f"\n  Recommendation: {recommendation}")
    
    # Recommended parameters
    print(f"\n" + "="*70)
    print("RECOMMENDED PARAMETERS FOR LIVE TRADING")
    print("="*70)
    
    recommended_params = {}
    for param_name in config['parameters'].keys():
        values = [r['best_params'].get(param_name) for r in results if param_name in r['best_params']]
        if values:
            most_common = Counter(values).most_common(1)[0]
            recommended_params[param_name] = most_common[0]
            print(f"  {param_name}: {most_common[0]} (chosen {most_common[1]/len(values)*100:.0f}% of the time)")
    
    print(f"\n  Fixed parameters (unchanged):")
    for param_name, value in config['fixed_parameters'].items():
        print(f"    {param_name}: {value}")
    
    print(f"\n" + "="*70)

In [None]:
# ==================== SAVE RESULTS ====================

if len(results) > 0:
    output_data = {
        'strategy': 'Statistical Arbitrage Pairs Trading',
        'hypothesis_id': 5,
        'project_id': config['project_id'],
        'configuration': {
            'period': f"{config['total_period']['start'].date()} to {config['total_period']['end'].date()}",
            'train_test_split': config['train_test_split'],
            'monte_carlo_runs': config['monte_carlo_runs'],
            'parameters': config['parameters'],
            'fixed_parameters': config['fixed_parameters'],
            'baseline_sharpe': config['baseline_sharpe']
        },
        'summary': {
            'successful_runs': len(results),
            'failed_runs': len(errors),
            'mean_train_sharpe': float(mean_train),
            'mean_test_sharpe': float(mean_test),
            'mean_degradation': float(mean_deg),
            'std_degradation': float(std_deg),
            'pct_overfit': float(overfit_pct),
            'decision': decision,
            'reason': reason,
            'recommendation': recommendation
        },
        'recommended_parameters': {
            **recommended_params,
            **config['fixed_parameters']
        },
        'detailed_results': [
            {
                'run': r['run'],
                'train_period': f"{r['train_start'].date()} to {r['train_end'].date()}",
                'test_period': f"{r['test_start'].date()} to {r['test_end'].date()}",
                'train_sharpe': r['train_sharpe'],
                'test_sharpe': r['test_sharpe'],
                'degradation': r['degradation'],
                'best_params': r['best_params'],
                'optimization_id': r['optimization_id'],
                'backtest_id': r['backtest_id']
            }
            for r in results
        ],
        'errors': errors
    }
    
    # Save to JSON
    output_filename = f"walkforward_stat_arb_h5_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
    with open(output_filename, 'w') as f:
        json.dump(output_data, f, indent=2, default=str)
    
    print(f"\n✓ Results saved to: {output_filename}")
    print(f"\n" + "="*70)
    print("MONTE CARLO WALK-FORWARD ANALYSIS COMPLETE")
    print("="*70)
    print(f"\nNext Steps:")
    print(f"  1. Review decision: {decision}")
    print(f"  2. Download results JSON file")
    print(f"  3. Update iteration_state.json locally")
    if decision == "ROBUST_STRATEGY":
        print(f"  4. ✓ Strategy validated - ready for paper trading")
    elif decision in ["PROCEED_WITH_CAUTION", "UNSTABLE_PARAMETERS"]:
        print(f"  4. ⚠ Additional validation recommended before paper trading")
    else:
        print(f"  4. ✗ Strategy failed validation - consider new hypothesis")
else:
    print("\n✗ No results to save")