# Monte Carlo Validation Demonstration

This notebook demonstrates how to use the Monte Carlo validation framework to assess the statistical robustness of trading strategies.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import sys
sys.path.append('..')

from src.strategies.examples.moving_average import MovingAverageCrossover
from src.backtesting.engines.vectorbt_engine import VectorBTEngine
from src.validation.monte_carlo import (
    MonteCarloValidator,
    ResamplingMethod,
    ConfidenceLevel
)

# Set style
plt.style.use('seaborn-v0_8-darkgrid')
%matplotlib inline

## 1. Generate Sample Data and Run Backtest

In [None]:
# Create sample data (1 year of daily data)
dates = pd.date_range('2023-01-01', '2024-01-01', freq='D')
np.random.seed(42)

# Generate realistic price data with trend
trend = np.linspace(100, 120, len(dates))
noise = np.random.normal(0, 2, len(dates)).cumsum()
prices = trend + noise

data = pd.DataFrame({
    'open': prices + np.random.uniform(-0.5, 0.5, len(dates)),
    'high': prices + np.random.uniform(0, 1, len(dates)),
    'low': prices - np.random.uniform(0, 1, len(dates)),
    'close': prices,
    'volume': np.random.uniform(1000000, 5000000, len(dates))
}, index=dates)

# Ensure price consistency
data['high'] = data[['open', 'close', 'high']].max(axis=1)
data['low'] = data[['open', 'close', 'low']].min(axis=1)

print(f"Data shape: {data.shape}")
print(f"Date range: {data.index[0]} to {data.index[-1]}")

In [None]:
# Run backtest with Moving Average strategy
strategy = MovingAverageCrossover(parameters={
    'fast_period': 10,
    'slow_period': 30,
    'ma_type': 'sma'
})

engine = VectorBTEngine()
backtest_result = engine.run_backtest(
    strategy=strategy,
    data=data,
    initial_capital=100000,
    commission=0.001
)

# Display original backtest metrics
print("Original Backtest Metrics:")
for metric, value in backtest_result.metrics.items():
    print(f"{metric:20s}: {value:10.4f}")

## 2. Run Monte Carlo Validation

In [None]:
# Create Monte Carlo validator
mc_validator = MonteCarloValidator(
    n_simulations=1000,
    confidence_levels=[0.95, 0.99],
    resampling_method=ResamplingMethod.BOOTSTRAP,
    random_seed=42
)

# Run validation
mc_result = mc_validator.run_validation(
    backtest_result=backtest_result,
    n_jobs=-1  # Use all CPUs
)

print(f"\nCompleted {len(mc_result.simulation_results)} Monte Carlo simulations")

## 3. Analyze Confidence Intervals

In [None]:
# Display confidence intervals for key metrics
metrics_to_show = ['total_return', 'sharpe_ratio', 'max_drawdown', 'win_rate']

print("\nConfidence Intervals:")
print("="*80)
for metric in metrics_to_show:
    if metric in mc_result.confidence_intervals:
        print(f"\n{metric.upper()}:")
        print(f"  Original: {backtest_result.metrics[metric]:10.4f}")
        
        for cl in [0.95, 0.99]:
            ci = mc_result.confidence_intervals[metric][cl]
            print(f"  {int(cl*100)}% CI: [{ci['lower']:10.4f}, {ci['upper']:10.4f}]")
            print(f"  Mean: {ci['mean']:10.4f}, Median: {ci['median']:10.4f}")

## 4. Visualize Metric Distributions

In [None]:
# Plot distributions of key metrics
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
axes = axes.flatten()

for i, metric in enumerate(metrics_to_show):
    ax = axes[i]
    
    # Extract metric values from simulations
    values = [r[metric] for r in mc_result.simulation_results]
    
    # Plot histogram
    ax.hist(values, bins=50, alpha=0.7, color='blue', edgecolor='black')
    
    # Add original value line
    original_value = backtest_result.metrics[metric]
    ax.axvline(original_value, color='red', linestyle='--', linewidth=2,
               label=f'Original: {original_value:.3f}')
    
    # Add confidence interval lines
    ci_95 = mc_result.confidence_intervals[metric][0.95]
    ax.axvline(ci_95['lower'], color='green', linestyle=':', linewidth=1.5)
    ax.axvline(ci_95['upper'], color='green', linestyle=':', linewidth=1.5,
               label=f"95% CI: [{ci_95['lower']:.3f}, {ci_95['upper']:.3f}]")
    
    ax.set_title(f'Distribution of {metric.replace("_", " ").title()}')
    ax.set_xlabel('Value')
    ax.set_ylabel('Frequency')
    ax.legend()
    
plt.tight_layout()
plt.show()

## 5. Risk Analysis

In [None]:
# Get risk metrics
risk_metrics = mc_result.get_risk_metrics()

print("\nRisk Metrics:")
print("="*50)
for metric, value in risk_metrics.items():
    if 'probability' in metric or 'risk' in metric:
        print(f"{metric:35s}: {value:6.2%}")
    else:
        print(f"{metric:35s}: {value:10.4f}")

# Get percentile outcomes
percentiles = mc_result.get_percentile_outcomes([5, 10, 25, 50, 75, 90, 95])

print("\nPercentile Analysis for Total Return:")
print("="*40)
for p, value in percentiles['total_return'].items():
    print(f"{p:3.0f}th percentile: {value:8.2%}")

## 6. Compare Different Resampling Methods

In [None]:
# Run Monte Carlo with different resampling methods
methods = [
    ResamplingMethod.BOOTSTRAP,
    ResamplingMethod.BLOCK,
    ResamplingMethod.STATIONARY_BOOTSTRAP
]

method_results = {}
for method in methods:
    validator = MonteCarloValidator(
        n_simulations=500,
        confidence_levels=[0.95],
        resampling_method=method,
        random_seed=42
    )
    
    method_results[method.value] = validator.run_validation(
        backtest_result=backtest_result,
        n_simulations=500
    )

# Compare results
print("\nComparison of Resampling Methods:")
print("="*80)
print(f"{'Method':20s} {'Sharpe Mean':>12s} {'Sharpe Std':>12s} {'95% CI Width':>15s}")
print("-"*80)

for method_name, result in method_results.items():
    ci = result.confidence_intervals['sharpe_ratio'][0.95]
    ci_width = ci['upper'] - ci['lower']
    print(f"{method_name:20s} {ci['mean']:12.4f} {ci['std']:12.4f} {ci_width:15.4f}")

## 7. Export Results

In [None]:
# Export results for further analysis
import os

# Create results directory if it doesn't exist
os.makedirs('../results/monte_carlo', exist_ok=True)

# Export to CSV
mc_result.export_metrics_to_csv('../results/monte_carlo/mc_simulation_results.csv')
print("Exported simulation results to CSV")

# Export summary to JSON
mc_result.export_summary_to_json('../results/monte_carlo/mc_summary.json')
print("Exported summary statistics to JSON")

# Display summary
summary = mc_result.get_summary()
print(f"\nValidation Summary:")
print(f"- Simulations: {summary['n_simulations']}")
print(f"- Original Sharpe: {summary['original_metrics']['sharpe_ratio']:.4f}")
print(f"- 95% CI Sharpe: [{summary['confidence_intervals']['sharpe_ratio'][0.95]['lower']:.4f}, "
      f"{summary['confidence_intervals']['sharpe_ratio'][0.95]['upper']:.4f}]")

if 'risk_metrics' in summary:
    print(f"- Probability of negative Sharpe: {summary['risk_metrics']['sharpe_below_zero_probability']:.2%}")