# Monte Carlo Walk-Forward Optimization

This notebook implements Monte Carlo walk-forward analysis to validate strategy robustness.

**Method:**
1. Randomly sample training/testing periods (Monte Carlo)
2. Optimize parameters on training data
3. Validate on out-of-sample testing data
4. Repeat N times and analyze distribution of results

**Advantages over traditional walk-forward:**
- More robust to period selection bias
- Tests strategy across diverse market conditions
- Provides distribution of expected degradation
- Identifies parameter stability

In [None]:
# Import required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import random
from collections import Counter
import json

# QC API (available in QC Research environment)
from QuantConnect import *
from QuantConnect.Api import *

# Set random seed for reproducibility
random.seed(42)
np.random.seed(42)

In [None]:
# Configuration
config = {
    'project_id': 26120873,  # Your QC project ID
    'total_period': {
        'start': datetime(2020, 1, 1),
        'end': datetime(2023, 12, 31)
    },
    'train_test_split': 0.60,  # 60% training, 40% testing
    'monte_carlo_runs': 10,
    'parameters': {
        'rsi_oversold': {'min': 30, 'max': 45, 'step': 5},
        'bb_distance_pct': {'min': 1.02, 'max': 1.10, 'step': 0.04},
        'use_trend_filter': {'min': 0, 'max': 1, 'step': 1}
    },
    'validation_metric': 'sharpe_ratio'
}

print(f"Configuration loaded:")
print(f"  Period: {config['total_period']['start']} to {config['total_period']['end']}")
print(f"  Train/Test: {config['train_test_split']*100:.0f}%/{(1-config['train_test_split'])*100:.0f}%")
print(f"  Monte Carlo runs: {config['monte_carlo_runs']}")

In [None]:
# Helper function: Generate random train/test split
def generate_random_split(start_date, end_date, train_pct):
    """
    Generate random training and testing periods
    
    Args:
        start_date: Overall start date
        end_date: Overall end date
        train_pct: Percentage of data for training (0.0-1.0)
    
    Returns:
        tuple: (train_start, train_end, test_start, test_end)
    """
    total_days = (end_date - start_date).days
    train_days = int(total_days * train_pct)
    test_days = total_days - train_days
    
    # Ensure we have at least 3 months for testing
    min_test_days = 90
    if test_days < min_test_days:
        raise ValueError(f"Test period too short ({test_days} days). Need at least {min_test_days} days.")
    
    # Random start point for training window
    # Ensure we leave enough room for the test period
    max_start_offset = total_days - train_days - test_days
    start_offset = random.randint(0, max(0, max_start_offset))
    
    train_start = start_date + timedelta(days=start_offset)
    train_end = train_start + timedelta(days=train_days)
    test_start = train_end + timedelta(days=1)
    test_end = test_start + timedelta(days=test_days)
    
    return train_start, train_end, test_start, test_end

In [None]:
# Helper function: Run optimization on training period
def run_optimization(api, project_id, start_date, end_date, parameters):
    """
    Run parameter optimization on specified period using QC API
    
    Args:
        api: QuantConnect API instance
        project_id: QC project ID
        start_date: Training start date
        end_date: Training end date
        parameters: Dict of parameters to optimize
    
    Returns:
        dict: Optimization results with best_parameters and best_sharpe
    """
    # Note: This would use QC's optimization API
    # For now, we'll simulate with grid search backtests
    
    print(f"  Optimizing: {start_date.date()} to {end_date.date()}")
    
    # TODO: Implement actual QC optimization API call
    # optimization = api.CreateOptimization(
    #     project_id,
    #     name=f"MC_Train_{start_date.date()}",
    #     target="TotalPerformance.PortfolioStatistics.SharpeRatio",
    #     parameters=parameters
    # )
    
    # Simulate optimization result for demo
    best_params = {
        'rsi_oversold': random.choice([30, 35, 40, 45]),
        'bb_distance_pct': random.choice([1.02, 1.06, 1.10]),
        'use_trend_filter': random.choice([0, 1])
    }
    
    best_sharpe = np.random.uniform(0.5, 1.5)  # Simulated
    
    return {
        'best_parameters': best_params,
        'best_sharpe': best_sharpe,
        'period': [start_date, end_date]
    }

In [None]:
# Helper function: Run backtest with specific parameters
def run_backtest(api, project_id, start_date, end_date, parameters):
    """
    Run backtest on specified period with given parameters
    
    Args:
        api: QuantConnect API instance
        project_id: QC project ID
        start_date: Test start date
        end_date: Test end date
        parameters: Dict of parameters to use
    
    Returns:
        dict: Backtest results with sharpe_ratio
    """
    print(f"  Testing: {start_date.date()} to {end_date.date()}")
    
    # TODO: Implement actual QC backtest API call with parameters
    # backtest = api.CreateBacktest(
    #     project_id,
    #     name=f"MC_Test_{start_date.date()}",
    #     parameters=parameters
    # )
    
    # Simulate backtest result with some degradation
    # Typically OOS performance degrades by 10-30%
    degradation_factor = np.random.uniform(0.70, 0.95)
    
    # This would come from the optimization result passed in
    # For demo, generate correlated performance
    test_sharpe = np.random.uniform(0.3, 1.2) * degradation_factor
    
    return {
        'sharpe_ratio': test_sharpe,
        'total_return': np.random.uniform(0.05, 0.25),
        'max_drawdown': np.random.uniform(0.05, 0.20),
        'total_trades': np.random.randint(10, 100),
        'period': [start_date, end_date]
    }

In [None]:
# Main Monte Carlo Walk-Forward Loop
print("Starting Monte Carlo Walk-Forward Analysis...\n")

results = []
api = None  # Would initialize QC API here: Api()

for run in range(config['monte_carlo_runs']):
    print(f"\n{'='*60}")
    print(f"Monte Carlo Run {run + 1}/{config['monte_carlo_runs']}")
    print(f"{'='*60}")
    
    # Generate random train/test split
    train_start, train_end, test_start, test_end = generate_random_split(
        config['total_period']['start'],
        config['total_period']['end'],
        config['train_test_split']
    )
    
    print(f"Training:  {train_start.date()} to {train_end.date()} ({(train_end - train_start).days} days)")
    print(f"Testing:   {test_start.date()} to {test_end.date()} ({(test_end - test_start).days} days)")
    
    # Run optimization on training period
    opt_result = run_optimization(
        api,
        config['project_id'],
        train_start,
        train_end,
        config['parameters']
    )
    
    print(f"  Best parameters: {opt_result['best_parameters']}")
    print(f"  Training Sharpe: {opt_result['best_sharpe']:.3f}")
    
    # Run backtest on testing period with optimized parameters
    test_result = run_backtest(
        api,
        config['project_id'],
        test_start,
        test_end,
        opt_result['best_parameters']
    )
    
    print(f"  Testing Sharpe: {test_result['sharpe_ratio']:.3f}")
    
    # Calculate performance degradation
    if opt_result['best_sharpe'] > 0:
        degradation = (opt_result['best_sharpe'] - test_result['sharpe_ratio']) / opt_result['best_sharpe']
    else:
        degradation = 0
    
    print(f"  Degradation: {degradation*100:.1f}%")
    
    # Store results
    results.append({
        'run': run + 1,
        'train_start': train_start,
        'train_end': train_end,
        'test_start': test_start,
        'test_end': test_end,
        'train_sharpe': opt_result['best_sharpe'],
        'test_sharpe': test_result['sharpe_ratio'],
        'degradation': degradation,
        'best_params': opt_result['best_parameters'],
        'test_trades': test_result['total_trades'],
        'test_return': test_result['total_return'],
        'test_drawdown': test_result['max_drawdown']
    })

print(f"\n{'='*60}")
print("Monte Carlo Walk-Forward Analysis Complete")
print(f"{'='*60}")

In [None]:
# Convert results to DataFrame for analysis
df_results = pd.DataFrame(results)

# Display summary statistics
print("\n=" * 60)
print("AGGREGATE RESULTS")
print("=" * 60)

print(f"\nPerformance Metrics:")
print(f"  Mean Training Sharpe:  {df_results['train_sharpe'].mean():.3f} ± {df_results['train_sharpe'].std():.3f}")
print(f"  Mean Testing Sharpe:   {df_results['test_sharpe'].mean():.3f} ± {df_results['test_sharpe'].std():.3f}")
print(f"  Mean Degradation:      {df_results['degradation'].mean()*100:.1f}% ± {df_results['degradation'].std()*100:.1f}%")

print(f"\nRobustness Analysis:")
overfit_pct = (df_results['degradation'] > 0.30).sum() / len(df_results) * 100
good_pct = (df_results['degradation'] < 0.15).sum() / len(df_results) * 100

print(f"  Runs with >30% degradation: {overfit_pct:.0f}% (overfitting indicator)")
print(f"  Runs with <15% degradation: {good_pct:.0f}% (good generalization)")

# Display full results table
print(f"\n\nDetailed Results:")
display_df = df_results[['run', 'train_sharpe', 'test_sharpe', 'degradation', 'test_trades']].copy()
display_df['degradation'] = display_df['degradation'] * 100
display_df = display_df.round(3)
print(display_df.to_string(index=False))

In [None]:
# Analyze parameter stability
print("\n=" * 60)
print("PARAMETER STABILITY ANALYSIS")
print("=" * 60)

# Count frequency of each parameter value
param_names = ['rsi_oversold', 'bb_distance_pct', 'use_trend_filter']

for param in param_names:
    values = [r['best_params'][param] for r in results]
    counter = Counter(values)
    most_common = counter.most_common(1)[0]
    
    print(f"\n{param}:")
    for value, count in counter.most_common():
        pct = count / len(results) * 100
        print(f"  {value}: {count}/{len(results)} ({pct:.0f}%)")
    
    if most_common[1] / len(results) >= 0.70:
        print(f"  ✅ STABLE: {most_common[0]} chosen in {most_common[1]/len(results)*100:.0f}% of runs")
    else:
        print(f"  ⚠️  UNSTABLE: No clear consensus (max {most_common[1]/len(results)*100:.0f}%)")

In [None]:
# Visualization: Degradation Distribution
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Plot 1: Degradation distribution
axes[0, 0].hist(df_results['degradation'] * 100, bins=10, edgecolor='black', alpha=0.7)
axes[0, 0].axvline(x=30, color='r', linestyle='--', label='Overfitting threshold (30%)')
axes[0, 0].axvline(x=df_results['degradation'].mean() * 100, color='g', linestyle='--', label=f'Mean ({df_results["degradation"].mean()*100:.1f}%)')
axes[0, 0].set_xlabel('Degradation (%)')
axes[0, 0].set_ylabel('Frequency')
axes[0, 0].set_title('Distribution of Performance Degradation')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

# Plot 2: Training vs Testing Sharpe
axes[0, 1].scatter(df_results['train_sharpe'], df_results['test_sharpe'], alpha=0.6, s=100)
max_sharpe = max(df_results['train_sharpe'].max(), df_results['test_sharpe'].max())
axes[0, 1].plot([0, max_sharpe], [0, max_sharpe], 'r--', label='Perfect generalization')
axes[0, 1].set_xlabel('Training Sharpe Ratio')
axes[0, 1].set_ylabel('Testing Sharpe Ratio')
axes[0, 1].set_title('Training vs Testing Performance')
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)

# Plot 3: Sharpe ratio over runs
axes[1, 0].plot(df_results['run'], df_results['train_sharpe'], marker='o', label='Training', linewidth=2)
axes[1, 0].plot(df_results['run'], df_results['test_sharpe'], marker='s', label='Testing', linewidth=2)
axes[1, 0].fill_between(df_results['run'], df_results['train_sharpe'], df_results['test_sharpe'], alpha=0.2, color='gray')
axes[1, 0].set_xlabel('Monte Carlo Run')
axes[1, 0].set_ylabel('Sharpe Ratio')
axes[1, 0].set_title('Performance Across Monte Carlo Runs')
axes[1, 0].legend()
axes[1, 0].grid(True, alpha=0.3)

# Plot 4: Parameter stability heatmap
param_matrix = []
for param in param_names:
    values = [r['best_params'][param] for r in results]
    param_matrix.append(values)

im = axes[1, 1].imshow([param_matrix], aspect='auto', cmap='YlOrRd')
axes[1, 1].set_yticks(range(len(param_names)))
axes[1, 1].set_yticklabels(param_names)
axes[1, 1].set_xticks(range(len(results)))
axes[1, 1].set_xticklabels(range(1, len(results) + 1))
axes[1, 1].set_xlabel('Monte Carlo Run')
axes[1, 1].set_title('Parameter Values Across Runs')
plt.colorbar(im, ax=axes[1, 1])

plt.tight_layout()
plt.savefig('monte_carlo_walkforward_results.png', dpi=150, bbox_inches='tight')
plt.show()

print("\nVisualization saved to: monte_carlo_walkforward_results.png")

In [None]:
# Final Decision Framework
mean_deg = df_results['degradation'].mean()
std_deg = df_results['degradation'].std()
pct_overfit = (df_results['degradation'] > 0.30).sum() / len(df_results)

print("\n" + "=" * 60)
print("ROBUSTNESS DECISION")
print("=" * 60 + "\n")

if pct_overfit > 0.50:
    decision = "ABANDON_STRATEGY"
    reason = f"Overfitting in {pct_overfit*100:.0f}% of Monte Carlo runs"
    recommendation = "Strategy does not generalize well. Consider new hypothesis."
    
elif mean_deg > 0.40:
    decision = "HIGH_RISK"
    reason = f"Average degradation {mean_deg*100:.1f}% indicates poor generalization"
    recommendation = "Strategy shows high out-of-sample degradation. Use with caution."
    
elif std_deg > 0.25:
    decision = "UNSTABLE_PARAMETERS"
    reason = f"High variance ({std_deg*100:.1f}%) suggests parameter instability"
    recommendation = "Parameters not stable. Consider narrowing search space."
    
elif mean_deg < 0.15 and std_deg < 0.10:
    decision = "ROBUST_STRATEGY"
    reason = f"Low degradation ({mean_deg*100:.1f}%) with low variance ({std_deg*100:.1f}%)"
    recommendation = "Strategy shows excellent generalization. Ready for live testing."
    
else:
    decision = "PROCEED_WITH_CAUTION"
    reason = f"Moderate degradation ({mean_deg*100:.1f}%), acceptable stability"
    recommendation = "Strategy shows reasonable generalization. Additional validation recommended."

print(f"Decision: {decision}")
print(f"\nReason: {reason}")
print(f"\nRecommendation: {recommendation}")

# Recommended parameters (most frequently chosen)
print(f"\nRecommended Parameters for Live Trading:")
for param in param_names:
    values = [r['best_params'][param] for r in results]
    most_common = Counter(values).most_common(1)[0]
    print(f"  {param}: {most_common[0]} (chosen {most_common[1]/len(results)*100:.0f}% of the time)")

print(f"\n" + "=" * 60)

In [None]:
# Save results to JSON
output_data = {
    'configuration': config,
    'summary': {
        'mean_train_sharpe': float(df_results['train_sharpe'].mean()),
        'mean_test_sharpe': float(df_results['test_sharpe'].mean()),
        'mean_degradation': float(mean_deg),
        'std_degradation': float(std_deg),
        'pct_overfit': float(pct_overfit),
        'decision': decision,
        'reason': reason,
        'recommendation': recommendation
    },
    'recommended_parameters': {
        param: Counter([r['best_params'][param] for r in results]).most_common(1)[0][0]
        for param in param_names
    },
    'detailed_results': results
}

# Save to file
output_filename = f"walkforward_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
with open(output_filename, 'w') as f:
    json.dump(output_data, f, indent=2, default=str)

print(f"\nResults saved to: {output_filename}")