# Phase 5: Backtesting & Simulation

Validate strategies before risking real capital.

This notebook covers:
- **Backtesting framework** — realistic simulation with slippage, fees, spread
- **Performance analytics** — equity curves, drawdown, trade-by-trade breakdown
- **Strategy comparison** — side-by-side evaluation
- **Walk-forward optimization** — prevent overfitting
- **Monte Carlo simulation** — stress testing

---

```bash
pip install pandas numpy matplotlib scipy
```

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from dataclasses import dataclass, field
from typing import Callable, Optional

plt.style.use('seaborn-v0_8-darkgrid')
np.random.seed(42)

def generate_ohlcv(n=750, mu=0.10, sigma=0.22, start=100.0):
    dt = 1/252
    returns = np.random.normal(mu * dt, sigma * np.sqrt(dt), n)
    close = start * np.exp(np.cumsum(returns))
    noise = sigma * np.sqrt(dt) * 0.5
    dates = pd.date_range('2022-01-01', periods=n, freq='B')
    return pd.DataFrame({
        'open': close * (1 + np.random.normal(0, noise, n)),
        'high': close * (1 + np.abs(np.random.normal(0, noise, n))),
        'low':  close * (1 - np.abs(np.random.normal(0, noise, n))),
        'close': close,
        'volume': np.random.lognormal(10, 1, n).astype(int)
    }, index=dates)

def ema(s, p): return s.ewm(span=p, adjust=False).mean()

df = generate_ohlcv()
print(f"Generated {len(df)} candles for backtesting")

---
## 5.1 Backtesting Engine

A proper backtesting engine accounts for:
- **Slippage**: You don't get the exact price you see
- **Fees**: Commissions eat into profits
- **Spread**: Bid-ask spread cost
- **Order delay**: Signal on bar N, fill on bar N+1
- **Position sizing**: Based on account equity, not fixed lots

In [None]:
@dataclass
class BacktestConfig:
    initial_equity: float = 100_000
    fee_bps: float = 10.0         # 10 bps per trade
    slippage_bps: float = 5.0     # 5 bps slippage
    spread_bps: float = 2.0       # 2 bps bid-ask spread
    leverage: float = 1.0
    risk_per_trade: float = 0.01  # 1% risk per trade

    @property
    def total_cost_bps(self):
        return self.fee_bps + self.slippage_bps + self.spread_bps


@dataclass
class BacktestResult:
    equity_curve: pd.Series
    trades: pd.DataFrame
    config: BacktestConfig
    daily_returns: pd.Series = None

    def total_return(self):
        return self.equity_curve.iloc[-1] / self.equity_curve.iloc[0] - 1

    def annual_return(self):
        n_years = len(self.equity_curve) / 252
        return (1 + self.total_return()) ** (1 / n_years) - 1 if n_years > 0 else 0

    def max_drawdown(self):
        peak = self.equity_curve.expanding().max()
        dd = (self.equity_curve - peak) / peak
        return dd.min()

    def sharpe_ratio(self, rf=0.04):
        if self.daily_returns is None:
            self.daily_returns = self.equity_curve.pct_change().dropna()
        excess = self.daily_returns.mean() * 252 - rf
        vol = self.daily_returns.std() * np.sqrt(252)
        return excess / vol if vol > 0 else 0

    def sortino_ratio(self, rf=0.04):
        if self.daily_returns is None:
            self.daily_returns = self.equity_curve.pct_change().dropna()
        excess = self.daily_returns.mean() * 252 - rf
        downside = self.daily_returns[self.daily_returns < 0].std() * np.sqrt(252)
        return excess / downside if downside > 0 else 0

    def calmar_ratio(self):
        return self.annual_return() / abs(self.max_drawdown()) if self.max_drawdown() != 0 else 0

    def summary(self):
        n_trades = len(self.trades) if len(self.trades) > 0 else 0
        winners = self.trades[self.trades['pnl'] > 0] if n_trades > 0 else pd.DataFrame()
        losers = self.trades[self.trades['pnl'] <= 0] if n_trades > 0 else pd.DataFrame()
        print("="*50)
        print("         BACKTEST RESULTS")
        print("="*50)
        print(f"  Initial equity:    ${self.config.initial_equity:>12,.2f}")
        print(f"  Final equity:      ${self.equity_curve.iloc[-1]:>12,.2f}")
        print(f"  Total return:      {self.total_return():>12.2%}")
        print(f"  Annual return:     {self.annual_return():>12.2%}")
        print(f"  Max drawdown:      {self.max_drawdown():>12.2%}")
        print(f"  Sharpe ratio:      {self.sharpe_ratio():>12.2f}")
        print(f"  Sortino ratio:     {self.sortino_ratio():>12.2f}")
        print(f"  Calmar ratio:      {self.calmar_ratio():>12.2f}")
        print(f"  Leverage:          {self.config.leverage:>12.1f}x")
        print(f"  Cost per trade:    {self.config.total_cost_bps:>11.0f} bps")
        print("-"*50)
        if n_trades > 0:
            win_rate = len(winners) / n_trades
            print(f"  Total trades:      {n_trades:>12}")
            print(f"  Win rate:          {win_rate:>12.1%}")
            if len(winners) > 0:
                print(f"  Avg winner:        ${winners['pnl'].mean():>12,.2f}")
            if len(losers) > 0:
                print(f"  Avg loser:         ${losers['pnl'].mean():>12,.2f}")
            print(f"  Total fees:        ${self.trades['fees'].sum():>12,.2f}")
        print("="*50)


def run_backtest(
    data: pd.DataFrame,
    signal_func: Callable,
    config: BacktestConfig = BacktestConfig()
) -> BacktestResult:
    """Run a backtest with realistic costs.

    signal_func(data, i) -> float: position signal at index i.
    Positive = long, negative = short, magnitude = conviction (clipped to [-1, 1]).
    """
    equity = config.initial_equity
    position = 0.0  # units held
    entry_price = 0.0

    equity_curve = [equity]
    trades = []

    for i in range(1, len(data)):
        price = data['close'].iloc[i]
        prev_price = data['close'].iloc[i - 1]

        # Update equity from price movement
        if position != 0:
            price_change = (price - prev_price) / prev_price
            position_return = position * price_change * config.leverage
            equity *= (1 + position_return * config.risk_per_trade / 0.01)  # scale to risk

        # Get signal
        signal = np.clip(signal_func(data, i), -1, 1)

        # Handle position changes
        if signal != position:
            # Close existing position
            if position != 0:
                cost = config.total_cost_bps / 10000 * abs(position) * config.leverage
                pnl_pct = position * (price - entry_price) / entry_price * config.leverage
                pnl = equity * config.risk_per_trade / 0.01 * (pnl_pct - cost)
                fee_amount = equity * config.risk_per_trade / 0.01 * cost
                trades.append({
                    'entry_date': data.index[max(0, i-1)],
                    'exit_date': data.index[i],
                    'side': 'long' if position > 0 else 'short',
                    'entry_price': entry_price,
                    'exit_price': price,
                    'pnl': pnl,
                    'pnl_pct': pnl_pct,
                    'fees': fee_amount
                })

            # Open new position
            if signal != 0:
                entry_price = price
            position = signal

        equity_curve.append(max(equity, 0))  # can't go below 0

    return BacktestResult(
        equity_curve=pd.Series(equity_curve, index=data.index[:len(equity_curve)]),
        trades=pd.DataFrame(trades),
        config=config
    )


# Example: MA crossover signal function
def ma_crossover_signal(data, i, fast=10, slow=30):
    if i < slow:
        return 0
    fast_ma = data['close'].iloc[max(0,i-fast):i].mean()
    slow_ma = data['close'].iloc[max(0,i-slow):i].mean()
    return 1.0 if fast_ma > slow_ma else -1.0


# Run backtest
config = BacktestConfig(initial_equity=100_000, leverage=3.0, fee_bps=10, slippage_bps=5)
result = run_backtest(df, ma_crossover_signal, config)
result.summary()

In [None]:
def plot_backtest(result: BacktestResult, title: str = "Backtest Results"):
    """Comprehensive backtest visualization."""
    fig, axes = plt.subplots(3, 1, figsize=(14, 12), gridspec_kw={'height_ratios': [3, 1, 1]})

    # Equity curve
    axes[0].plot(result.equity_curve.index, result.equity_curve, color='steelblue', linewidth=1.5)
    peak = result.equity_curve.expanding().max()
    axes[0].plot(peak.index, peak, color='gray', linestyle='--', alpha=0.5)
    axes[0].fill_between(result.equity_curve.index, result.equity_curve, peak,
                          where=result.equity_curve < peak, alpha=0.2, color='red')
    axes[0].set_ylabel('Equity ($)')
    axes[0].set_title(title, fontsize=14)

    # Drawdown
    dd = (result.equity_curve - peak) / peak * 100
    axes[1].fill_between(dd.index, dd, 0, color='red', alpha=0.3)
    axes[1].set_ylabel('Drawdown (%)')

    # Trade P&L
    if len(result.trades) > 0:
        colors = ['green' if p > 0 else 'red' for p in result.trades['pnl']]
        axes[2].bar(range(len(result.trades)), result.trades['pnl'], color=colors, alpha=0.6)
        axes[2].axhline(y=0, color='gray', linewidth=0.5)
        axes[2].set_ylabel('Trade P&L ($)')
        axes[2].set_xlabel('Trade #')

    plt.tight_layout()
    plt.show()

plot_backtest(result, "MA Crossover Strategy — 3x Leverage")

---
## 5.2 Cost Sensitivity Analysis

How sensitive is the strategy to transaction costs? This is critical for determining viability.

In [None]:
cost_levels = [0, 5, 10, 20, 30, 50]  # total cost in bps

fig, ax = plt.subplots(figsize=(14, 6))
results_table = []

for cost in cost_levels:
    cfg = BacktestConfig(initial_equity=100_000, leverage=3.0,
                         fee_bps=cost*0.6, slippage_bps=cost*0.3, spread_bps=cost*0.1)
    r = run_backtest(df, ma_crossover_signal, cfg)
    ax.plot(r.equity_curve.index, r.equity_curve, label=f'{cost} bps', linewidth=1.2)
    results_table.append({
        'Cost (bps)': cost,
        'Total Return': f"{r.total_return():.1%}",
        'Annual Return': f"{r.annual_return():.1%}",
        'Sharpe': f"{r.sharpe_ratio():.2f}",
        'Max DD': f"{r.max_drawdown():.1%}",
        'Total Fees': f"${r.trades['fees'].sum():,.0f}" if len(r.trades) > 0 else '$0'
    })

ax.set_title('Impact of Transaction Costs on Strategy Performance', fontsize=14)
ax.set_ylabel('Equity ($)')
ax.legend()
plt.tight_layout()
plt.show()

pd.DataFrame(results_table)

---
## 5.3 Walk-Forward Optimization

The biggest danger in backtesting is **overfitting**: tuning parameters to past data that won't work in the future.

Walk-forward optimization:
1. Optimize parameters on a training window
2. Test on the next out-of-sample window
3. Roll forward and repeat

This simulates real-world conditions where you never peek at future data.

In [None]:
def walk_forward_optimization(
    data: pd.DataFrame,
    train_size: int = 252,      # 1 year training
    test_size: int = 63,        # 3 months testing
    param_grid: dict = None,
    config: BacktestConfig = BacktestConfig()
) -> dict:
    """Walk-forward optimization for MA crossover parameters."""
    if param_grid is None:
        param_grid = {
            'fast': [5, 10, 15, 20, 30],
            'slow': [20, 30, 50, 75, 100]
        }

    oos_equity = []  # out-of-sample equity curve
    oos_dates = []
    window_results = []

    i = train_size
    while i + test_size <= len(data):
        train_data = data.iloc[i - train_size:i]
        test_data = data.iloc[i:i + test_size]

        # Optimize on training data
        best_sharpe = -np.inf
        best_params = {}

        for fast in param_grid['fast']:
            for slow in param_grid['slow']:
                if fast >= slow:
                    continue

                def signal_fn(d, idx, f=fast, s=slow):
                    if idx < s:
                        return 0
                    return 1.0 if d['close'].iloc[max(0,idx-f):idx].mean() > d['close'].iloc[max(0,idx-s):idx].mean() else -1.0

                r = run_backtest(train_data, signal_fn, config)
                sharpe = r.sharpe_ratio()
                if sharpe > best_sharpe:
                    best_sharpe = sharpe
                    best_params = {'fast': fast, 'slow': slow}

        # Test on out-of-sample data with best params
        def oos_signal(d, idx, f=best_params['fast'], s=best_params['slow']):
            if idx < s:
                return 0
            return 1.0 if d['close'].iloc[max(0,idx-f):idx].mean() > d['close'].iloc[max(0,idx-s):idx].mean() else -1.0

        oos_result = run_backtest(test_data, oos_signal, config)

        window_results.append({
            'train_start': train_data.index[0],
            'test_start': test_data.index[0],
            'test_end': test_data.index[-1],
            'best_fast': best_params['fast'],
            'best_slow': best_params['slow'],
            'train_sharpe': best_sharpe,
            'oos_return': oos_result.total_return(),
            'oos_sharpe': oos_result.sharpe_ratio()
        })

        # Collect OOS equity
        scale = (oos_equity[-1] if oos_equity else config.initial_equity) / oos_result.equity_curve.iloc[0]
        for eq, dt in zip(oos_result.equity_curve, oos_result.equity_curve.index):
            oos_equity.append(eq * scale)
            oos_dates.append(dt)

        i += test_size

    return {
        'equity_curve': pd.Series(oos_equity, index=oos_dates),
        'windows': pd.DataFrame(window_results)
    }


wf = walk_forward_optimization(
    df, train_size=252, test_size=63,
    config=BacktestConfig(leverage=2.0, fee_bps=10)
)

fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 8))

ax1.plot(wf['equity_curve'].index, wf['equity_curve'], color='steelblue', linewidth=1.5)
ax1.set_ylabel('Equity ($)')
ax1.set_title('Walk-Forward Out-of-Sample Equity Curve', fontsize=14)

# Show in-sample vs out-of-sample Sharpe
x = range(len(wf['windows']))
ax2.bar([xi - 0.15 for xi in x], wf['windows']['train_sharpe'], width=0.3, label='In-Sample Sharpe', color='blue', alpha=0.6)
ax2.bar([xi + 0.15 for xi in x], wf['windows']['oos_sharpe'], width=0.3, label='OOS Sharpe', color='green', alpha=0.6)
ax2.axhline(y=0, color='red', linewidth=0.5)
ax2.set_ylabel('Sharpe Ratio')
ax2.set_xlabel('Window #')
ax2.legend()

plt.tight_layout()
plt.show()

print("Walk-forward window details:")
wf['windows'][['best_fast', 'best_slow', 'train_sharpe', 'oos_return', 'oos_sharpe']].round(3)

### Exercise 5.3

1. Compare the in-sample Sharpe ratios to out-of-sample. Large drops indicate overfitting.
2. Increase the parameter grid (more options). Does it lead to more overfitting?
3. Reduce `train_size` to 126 (6 months). How does it affect stability?

In [None]:
# YOUR CODE HERE


---
## 5.4 Monte Carlo Simulation

Bootstrap the trade results to estimate the range of possible outcomes. This answers: "How bad could it get?" and "Am I just lucky?"

In [None]:
def monte_carlo_simulation(
    trade_returns: np.ndarray,
    n_simulations: int = 1000,
    n_trades: int = None,
    initial_equity: float = 100_000
) -> pd.DataFrame:
    """Bootstrap trade returns to simulate possible equity paths."""
    if n_trades is None:
        n_trades = len(trade_returns)

    all_paths = []
    for _ in range(n_simulations):
        # Randomly sample trades with replacement
        sampled = np.random.choice(trade_returns, size=n_trades, replace=True)
        equity = initial_equity * np.cumprod(1 + sampled)
        all_paths.append(equity)

    return pd.DataFrame(all_paths).T


# Get trade returns from our backtest
if len(result.trades) > 0:
    trade_rets = result.trades['pnl_pct'].values
    mc = monte_carlo_simulation(trade_rets, n_simulations=500)

    fig, axes = plt.subplots(1, 2, figsize=(16, 6))

    # Equity paths
    for col in mc.columns[:100]:  # plot first 100 paths
        axes[0].plot(mc[col], color='steelblue', alpha=0.03, linewidth=0.5)

    # Percentile bands
    p5 = mc.quantile(0.05, axis=1)
    p25 = mc.quantile(0.25, axis=1)
    p50 = mc.quantile(0.50, axis=1)
    p75 = mc.quantile(0.75, axis=1)
    p95 = mc.quantile(0.95, axis=1)

    axes[0].fill_between(mc.index, p5, p95, alpha=0.2, color='orange', label='5th-95th percentile')
    axes[0].fill_between(mc.index, p25, p75, alpha=0.3, color='green', label='25th-75th percentile')
    axes[0].plot(p50, color='red', linewidth=2, label='Median')
    axes[0].axhline(y=100_000, color='gray', linestyle='--', alpha=0.5)
    axes[0].set_title('Monte Carlo Equity Paths', fontsize=14)
    axes[0].set_ylabel('Equity ($)')
    axes[0].set_xlabel('Trade #')
    axes[0].legend()

    # Distribution of final equity
    final_equity = mc.iloc[-1]
    axes[1].hist(final_equity, bins=50, color='steelblue', alpha=0.7, edgecolor='white')
    axes[1].axvline(x=100_000, color='red', linestyle='--', label='Starting equity')
    axes[1].axvline(x=final_equity.median(), color='green', linestyle='--', label=f'Median: ${final_equity.median():,.0f}')
    axes[1].set_title('Distribution of Final Equity', fontsize=14)
    axes[1].set_xlabel('Final Equity ($)')
    axes[1].set_ylabel('Count')
    axes[1].legend()

    plt.tight_layout()
    plt.show()

    print(f"Monte Carlo Results ({len(mc.columns)} simulations):")
    print(f"  Probability of profit: {(final_equity > 100_000).mean():.1%}")
    print(f"  Median final equity: ${final_equity.median():,.2f}")
    print(f"  5th percentile (worst case): ${final_equity.quantile(0.05):,.2f}")
    print(f"  95th percentile (best case): ${final_equity.quantile(0.95):,.2f}")

    # Max drawdown distribution
    max_dds = []
    for col in mc.columns:
        path = mc[col]
        peak = path.expanding().max()
        dd = ((path - peak) / peak).min()
        max_dds.append(dd)
    print(f"  Median max drawdown: {np.median(max_dds):.1%}")
    print(f"  95th percentile DD: {np.percentile(max_dds, 5):.1%}")
else:
    print("No trades to simulate.")

### Exercise 5.4

1. Run Monte Carlo with 5000 simulations. Does the distribution change meaningfully?
2. What's the probability of losing more than 30% of your capital? Is this acceptable?
3. Add a "time-based" Monte Carlo: shuffle daily returns instead of trade returns. How do the results differ?

In [None]:
# YOUR CODE HERE


---
## 5.5 Comprehension Check

1. Your backtest shows 200% returns over 5 years. But when you add 20 bps of fees, it drops to 30%. What does this tell you about the strategy?
2. In walk-forward optimization, the in-sample Sharpe is consistently 2.0+ but out-of-sample is 0.3. What's happening?
3. Monte Carlo shows a 15% probability of losing 40%+ of capital. Would you still trade this strategy? What could you change?
4. Why is "number of trades" important for backtest reliability? How many trades do you need for statistical significance?
5. Name three common backtesting pitfalls besides overfitting.

In [None]:
# YOUR ANSWERS HERE
