# XAUBOT Phase 2: Comprehensive Backtesting

This notebook runs all Phase 2 backtesting modules:
1. Walk-Forward Optimization (7 folds)
2. Monte Carlo Simulation (5K paths)
3. Historical Stress Testing (8 events)
4. Regime Analysis
5. Reality Gap Testing (8 friction levels)

Run all cells sequentially to complete Phase 2 validation.

In [None]:
import sys
import os
from pathlib import Path
from datetime import datetime

# Set project root
project_root = Path.cwd()
if 'xaubot' not in str(project_root):
    # Try to find xaubot directory
    if (project_root / 'xaubot').exists():
        project_root = project_root / 'xaubot'
    elif (Path.home() / 'xaubot').exists():
        project_root = Path.home() / 'xaubot'

os.chdir(project_root)
sys.path.insert(0, str(project_root))

print(f"Project root: {project_root}")
print(f"Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

## 1. Walk-Forward Optimization (7 Folds)

In [None]:
from python_training.backtesting.walk_forward import WalkForwardOptimizer

print("="*70)
print("WALK-FORWARD OPTIMIZATION")
print("="*70)

wfo = WalkForwardOptimizer(project_root)
wfo_results = wfo.run_walk_forward()

print(f"\nWFO Complete - Mean Accuracy: {wfo_results['wfo_score']['mean_accuracy']*100:.2f}%")

## 2. Monte Carlo Simulation (5K Paths)

In [None]:
from python_training.backtesting.monte_carlo import MonteCarloSimulator

print("="*70)
print("MONTE CARLO SIMULATION")
print("="*70)

mc = MonteCarloSimulator(project_root)
mc_results = mc.run_full_simulation()

# Display key metrics
shuffle = mc_results['methods']['shuffle']['metrics']
print(f"\nMonte Carlo Complete:")
print(f"  - Mean Return: {shuffle['total_return']['mean']*100:.1f}%")
print(f"  - Mean Max DD: {shuffle['max_drawdown']['mean']*100:.1f}%")
print(f"  - Risk of Ruin: {mc_results['methods']['shuffle']['risk_of_ruin']*100:.2f}%")

## 3. Historical Stress Testing (8 Events)

In [None]:
from python_training.backtesting.stress_test import StressTester

print("="*70)
print("STRESS TESTING")
print("="*70)

st = StressTester(project_root)
st_results = st.run_stress_test()

print(f"\nStress Test Complete:")
print(f"  - Events Passed: {st_results['summary']['events_passed']}/{st_results['summary']['events_tested']}")
print(f"  - Survival Rate: {st_results['summary']['survival_rate']*100:.0f}%")

## 4. Regime Analysis

In [None]:
from python_training.backtesting.regime_analysis import RegimeAnalyzer

print("="*70)
print("REGIME ANALYSIS")
print("="*70)

ra = RegimeAnalyzer(project_root)
ra_results = ra.run_regime_analysis()

print(f"\nRegime Analysis Complete:")
for regime, data in ra_results['regime_results'].items():
    if not regime.startswith('VOL_'):
        print(f"  - {regime}: {data['win_rate']*100:.1f}% WR, {data['profit_factor']:.2f} PF")

## 5. Reality Gap Testing (8 Friction Levels)

In [None]:
from python_training.backtesting.reality_gap import RealityGapTester

print("="*70)
print("REALITY GAP TESTING")
print("="*70)

rg = RealityGapTester(project_root)
rg_results = rg.run_reality_gap_test()

print(f"\nReality Gap Complete:")
print(f"  - Baseline Return: {rg_results['baseline_return']*100:.1f}%")
print(f"  - Final Level Return: {rg_results['final_level_return']*100:.1f}%")
print(f"  - Still Profitable: {'Yes' if rg_results['final_level_profitable'] else 'No'}")

## Phase 2 Summary

In [None]:
import json

print("="*70)
print("PHASE 2 BACKTESTING SUMMARY")
print("="*70)
print(f"Completed: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print()

# Load all results
results_dir = project_root / "python_training" / "backtesting" / "results"

summary = {
    "Walk-Forward": {
        "status": "PASS" if wfo_results['wfo_score']['mean_accuracy'] > 0.55 else "FAIL",
        "metric": f"{wfo_results['wfo_score']['mean_accuracy']*100:.1f}% accuracy"
    },
    "Monte Carlo": {
        "status": "PASS" if shuffle['total_return']['mean'] > 0 else "FAIL",
        "metric": f"{shuffle['total_return']['mean']*100:.1f}% mean return"
    },
    "Stress Test": {
        "status": "PASS" if st_results['summary']['survival_rate'] >= 0.50 else "FAIL",
        "metric": f"{st_results['summary']['survival_rate']*100:.0f}% survival rate"
    },
    "Regime Analysis": {
        "status": "PASS",
        "metric": "All regimes analyzed"
    },
    "Reality Gap": {
        "status": "PASS" if rg_results['final_level_profitable'] else "FAIL",
        "metric": f"{rg_results['final_level_return']*100:.1f}% at full friction"
    }
}

for test, data in summary.items():
    icon = "PASS" if data['status'] == "PASS" else "FAIL"
    print(f"  [{icon}] {test}: {data['metric']}")

# Save combined results
combined = {
    "run_date": datetime.now().isoformat(),
    "summary": summary,
    "wfo": wfo_results['wfo_score'],
    "monte_carlo": {
        "mean_return": shuffle['total_return']['mean'],
        "mean_drawdown": shuffle['max_drawdown']['mean'],
        "risk_of_ruin": mc_results['methods']['shuffle']['risk_of_ruin']
    },
    "stress_test": st_results['summary'],
    "reality_gap": {
        "baseline_return": rg_results['baseline_return'],
        "final_return": rg_results['final_level_return'],
        "profitable": rg_results['final_level_profitable']
    }
}

with open(results_dir / "phase2_combined_results.json", "w") as f:
    json.dump(combined, f, indent=2)

print(f"\nResults saved to: {results_dir / 'phase2_combined_results.json'}")
print("\n" + "="*70)
print("PHASE 2 BACKTESTING COMPLETE")
print("="*70)