# Advanced Monte Carlo Validation

**Project ID:** 26204235

This notebook implements advanced Monte Carlo validation metrics:
- **PSR (Probabilistic Sharpe Ratio):** ≥0.95 threshold
- **DSR (Deflated Sharpe Ratio):** Multiple testing correction
- **MinTRL (Minimum Track Record Length):** Required observations
- **Bootstrap Resampling:** 1,000 runs
- **Permutation Testing:** p < 0.05
- **MC Drawdown Distribution:** 99th percentile

In [None]:
# Initialize QuantBook
from QuantConnect import *
from QuantConnect.Research import *
import numpy as np
import pandas as pd
from scipy import stats
import json

qb = QuantBook()

In [None]:
# Get latest backtest for this project
project_id = 26204235

# List all backtests and get the most recent
project = qb.ReadProject(project_id)
backtests = project['backtests']

if not backtests:
    raise ValueError("No backtests found for this project!")

# Get most recent backtest
latest_backtest_id = backtests[0]['backtestId']
print(f'Using latest backtest: {latest_backtest_id}')

# Fetch backtest
backtest = qb.ReadBacktest(project_id, latest_backtest_id)

# Extract equity curve
equity_curve = backtest.Charts['Strategy Equity'].Series['Equity'].Values
returns = pd.Series([p.y for p in equity_curve]).pct_change().dropna()

# Extract trades
trades = backtest.Orders

print(f'Loaded {len(returns)} return observations')
print(f'Total trades: {len(trades)}')

## 1. Probabilistic Sharpe Ratio (PSR)

PSR calculates the probability that the true Sharpe ratio exceeds a benchmark.

**Threshold:** ≥0.95 (95% confidence)

In [None]:
def calculate_psr(returns, benchmark_sr=0.0):
    """Calculate Probabilistic Sharpe Ratio"""
    n = len(returns)
    sr = returns.mean() / returns.std() * np.sqrt(252)  # Annualized
    skew = stats.skew(returns)
    kurt = stats.kurtosis(returns)
    
    # Standard error of Sharpe ratio (adjusted for non-normality)
    se_sr = np.sqrt((1 + (sr**2)/2 - skew*sr + ((kurt-3)/4)*(sr**2)) / (n-1))
    
    # PSR
    psr = stats.norm.cdf((sr - benchmark_sr) / se_sr)
    
    return psr, sr, skew, kurt

psr, sharpe, skew, kurt = calculate_psr(returns)

print(f'Sharpe Ratio: {sharpe:.3f}')
print(f'Skewness: {skew:.3f}')
print(f'Kurtosis: {kurt:.3f}')
print(f'PSR: {psr:.4f}')
print(f"Status: {'✅ PASS' if psr >= 0.95 else '❌ FAIL'} (threshold: 0.95)")

## 2. Deflated Sharpe Ratio (DSR)

DSR corrects for multiple testing bias (trying many strategies/parameters).

In [None]:
def calculate_dsr(returns, n_trials=10, benchmark_sr=0.0):
    """Calculate Deflated Sharpe Ratio"""
    n = len(returns)
    sr = returns.mean() / returns.std() * np.sqrt(252)
    skew = stats.skew(returns)
    kurt = stats.kurtosis(returns)
    
    # Variance of Sharpe ratio
    var_sr = (1 + (sr**2)/2 - skew*sr + ((kurt-3)/4)*(sr**2)) / (n-1)
    
    # Expected maximum Sharpe from n_trials (under null)
    gamma = 0.5772  # Euler-Mascheroni constant
    max_sr_expected = np.sqrt(var_sr) * ((1-gamma)*stats.norm.ppf(1-1/n_trials) + gamma*stats.norm.ppf(1-1/(n_trials*np.e)))
    
    # DSR
    dsr = stats.norm.cdf((sr - max_sr_expected) / np.sqrt(var_sr))
    
    return dsr

# Assume 10 trials (conservative estimate)
dsr = calculate_dsr(returns, n_trials=10)

print(f'DSR: {dsr:.4f}')
print(f"Status: {'✅ PASS' if dsr >= 0.95 else '⚠️ MARGINAL' if dsr >= 0.90 else '❌ FAIL'}")

## 3. Minimum Track Record Length (MinTRL)

Required number of observations for statistical confidence.

In [None]:
def calculate_min_trl(returns, target_sr=1.0, confidence=0.95):
    """Calculate Minimum Track Record Length"""
    sr = returns.mean() / returns.std() * np.sqrt(252)
    skew = stats.skew(returns)
    kurt = stats.kurtosis(returns)
    
    z = stats.norm.ppf(confidence)
    
    # MinTRL formula
    min_trl = ((z / (sr - target_sr))**2) * (1 + (sr**2)/2 - skew*sr + ((kurt-3)/4)*(sr**2))
    
    return int(np.ceil(min_trl))

min_trl = calculate_min_trl(returns)
current_length = len(returns)

print(f'Current track record: {current_length} observations')
print(f'MinTRL required: {min_trl} observations')
print(f"Status: {'✅ SUFFICIENT' if current_length >= min_trl else '❌ INSUFFICIENT'}")

## 4. Bootstrap Resampling (1,000 runs)

Generate alternative equity curves to assess robustness.

In [None]:
def bootstrap_returns(returns, n_simulations=1000):
    """Bootstrap resample returns"""
    n = len(returns)
    sharpe_dist = []
    drawdown_dist = []
    
    for _ in range(n_simulations):
        # Resample with replacement
        resampled = np.random.choice(returns, size=n, replace=True)
        
        # Calculate metrics
        sr = resampled.mean() / resampled.std() * np.sqrt(252)
        sharpe_dist.append(sr)
        
        # Calculate drawdown
        cum_returns = (1 + resampled).cumprod()
        running_max = np.maximum.accumulate(cum_returns)
        drawdown = (cum_returns - running_max) / running_max
        max_dd = drawdown.min()
        drawdown_dist.append(abs(max_dd))
    
    return np.array(sharpe_dist), np.array(drawdown_dist)

print('Running 1,000 bootstrap simulations...')
sharpe_dist, drawdown_dist = bootstrap_returns(returns, n_simulations=1000)

print(f'\nBootstrap Sharpe Distribution:')
print(f'  Mean: {sharpe_dist.mean():.3f}')
print(f'  Median: {np.median(sharpe_dist):.3f}')
print(f'  5th percentile: {np.percentile(sharpe_dist, 5):.3f}')
print(f'  95th percentile: {np.percentile(sharpe_dist, 95):.3f}')

print(f'\nBootstrap Drawdown Distribution:')
print(f'  Mean: {drawdown_dist.mean():.1%}')
print(f'  Median: {np.median(drawdown_dist):.1%}')
print(f'  99th percentile (worst case): {np.percentile(drawdown_dist, 99):.1%}')

## 5. Permutation Testing

Test if results are statistically significant (p < 0.05).

In [None]:
def permutation_test(returns, n_permutations=1000):
    """Permutation test for statistical significance"""
    observed_sr = returns.mean() / returns.std() * np.sqrt(252)
    
    # Shuffle returns and calculate Sharpe
    permuted_srs = []
    for _ in range(n_permutations):
        shuffled = np.random.permutation(returns)
        sr = shuffled.mean() / shuffled.std() * np.sqrt(252)
        permuted_srs.append(sr)
    
    permuted_srs = np.array(permuted_srs)
    
    # p-value: proportion of permuted SRs >= observed SR
    p_value = (np.sum(permuted_srs >= observed_sr) + 1) / (n_permutations + 1)
    
    return p_value

print('Running permutation test (1,000 permutations)...')
p_value = permutation_test(returns)

print(f'\nPermutation Test:')
print(f'  p-value: {p_value:.4f}')
print(f"  Status: {'✅ SIGNIFICANT' if p_value < 0.05 else '❌ NOT SIGNIFICANT'} (threshold: p < 0.05)")

## Final Decision

Based on all advanced Monte Carlo metrics.

In [None]:
# Collect all results
results = {
    'psr': float(psr),
    'dsr': float(dsr),
    'sharpe_ratio': float(sharpe),
    'min_trl': int(min_trl),
    'current_trl': int(len(returns)),
    'bootstrap_sharpe_mean': float(sharpe_dist.mean()),
    'bootstrap_sharpe_5th': float(np.percentile(sharpe_dist, 5)),
    'bootstrap_drawdown_99th': float(np.percentile(drawdown_dist, 99)),
    'permutation_pvalue': float(p_value),
    'skewness': float(skew),
    'kurtosis': float(kurt)
}

# Decision logic
if psr < 0.95:
    decision = 'FAILED_PSR'
    reason = f'PSR {psr:.3f} < 0.95 (insufficient statistical significance)'
elif p_value > 0.05:
    decision = 'FAILED_PERMUTATION'
    reason = f'p-value {p_value:.4f} > 0.05 (not statistically significant)'
elif len(returns) < min_trl:
    decision = 'INSUFFICIENT_DATA'
    reason = f'Track record {len(returns)} < MinTRL {min_trl}'
else:
    decision = 'ROBUST_STRATEGY'
    reason = f'PSR {psr:.3f}, p-value {p_value:.4f}, all tests passed'

results['decision'] = decision
results['reason'] = reason

print('='*60)
print('FINAL DECISION')
print('='*60)
print(f'Decision: {decision}')
print(f'Reason: {reason}')
print('\nCopy the JSON below and paste when prompted by qc_validate collect-results:')
print('='*60)
print(json.dumps(results, indent=2))
print('='*60)