# Moving Average Crossover Strategy Backtest

This notebook demonstrates backtesting a moving average crossover strategy using our quantitative trading framework.

## Overview
- Load and preprocess minute-level SPY data for 2024
- Apply feature engineering to calculate technical indicators
- Run a moving average crossover strategy
- Analyze performance metrics and visualize results

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob
import sys
from pathlib import Path

# Add parent directory to path for imports
sys.path.append('..')

# Import our modules
from src.data.preprocessor import DataPreprocessor
from src.data.features import FeatureEngineer
from src.strategies.examples.moving_average import MovingAverageCrossover
from src.backtesting.engines.vectorbt_engine import VectorBTEngine
from src.backtesting.costs import TransactionCostEngine, CommissionModel, SpreadModel, MarketImpactModel
from src.utils.config import ConfigLoader

# Configure display
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_rows', 100)

# Plot settings
plt.style.use('seaborn-v0_8-darkgrid')
plt.rcParams['figure.figsize'] = (12, 6)

## 1. Load and Preprocess Data

In [None]:
# Load SPY data for January 2024
spy_files = sorted(glob.glob('../data/raw/minute_aggs/by_symbol/SPY/*.csv.gz'))

if not spy_files:
    print("No SPY data found. Please ensure data has been downloaded and extracted.")
else:
    # Load first month for quick demo
    raw_data = pd.read_csv(spy_files[0], compression='gzip')
    print(f"Loaded {len(raw_data)} bars from {spy_files[0]}")
    print(f"\nData shape: {raw_data.shape}")
    print(f"\nColumns: {raw_data.columns.tolist()}")
    print(f"\nFirst few rows:")
    raw_data.head()

In [None]:
# Preprocess the data
preprocessor = DataPreprocessor()

# Convert to our standard format
clean_data = preprocessor.process_polygon_data(
    raw_data,
    symbol='SPY',
    cache_key='spy_jan_2024_processed'
)

print(f"Processed data shape: {clean_data.shape}")
print(f"Date range: {clean_data.index[0]} to {clean_data.index[-1]}")
print(f"\nData quality stats:")
print(f"- Missing values: {clean_data.isnull().sum().sum()}")
print(f"- Outliers removed: {len(raw_data) - len(clean_data)}")

# Show sample
clean_data.head()

## 2. Feature Engineering

In [None]:
# Calculate technical indicators
feature_engineer = FeatureEngineer()

# Add moving averages
data_with_features = feature_engineer.add_moving_averages(
    clean_data,
    periods=[10, 20, 30, 50]
)

# Add other indicators
data_with_features = feature_engineer.add_rsi(data_with_features, period=14)
data_with_features = feature_engineer.add_atr(data_with_features, period=14)
data_with_features = feature_engineer.add_volume_features(data_with_features)

print(f"Features added: {[col for col in data_with_features.columns if col not in clean_data.columns]}")
data_with_features.head()

## 3. Strategy Configuration and Signal Generation

In [None]:
# Create strategy with custom parameters
strategy = MovingAverageCrossover(parameters={
    'fast_period': 10,
    'slow_period': 30,
    'ma_type': 'ema',
    'use_volume_filter': True,
    'volume_threshold': 1.2,
    'position_sizing': 'fixed',
    'risk_per_trade': 0.02,
    'stop_loss': 0.02,
    'take_profit': 0.05,
    'use_stops': False  # Disable for initial test
})

# Generate signals
signals = strategy.generate_signals(data_with_features)

# Show signal statistics
print(f"Total signals generated: {(signals != 0).sum()}")
print(f"Long signals: {(signals > 0).sum()}")
print(f"Short signals: {(signals < 0).sum()}")
print(f"\nFirst 10 signals:")
signals[signals != 0].head(10)

## 4. Backtest Execution

In [None]:
# Configure transaction costs
commission_model = CommissionModel(model_type='per_share', rate=0.0005)
spread_model = SpreadModel()
impact_model = MarketImpactModel(model_type='linear', base_impact=0.0001)

cost_engine = TransactionCostEngine(
    commission_model=commission_model,
    spread_model=spread_model,
    market_impact_model=impact_model
)

# Create backtesting engine
engine = VectorBTEngine(transaction_costs=cost_engine)

# Run backtest
backtest_result = engine.run_backtest(
    strategy=strategy,
    data=data_with_features,
    initial_capital=100000,
    commission=0.0005,  # Per share
    slippage=0.0001     # 0.01%
)

# Extract portfolio
portfolio = backtest_result['portfolio']
print("Backtest completed successfully!")

## 5. Performance Analysis

In [None]:
# Calculate performance metrics
metrics = backtest_result['metrics']

print("=== Performance Summary ===")
print(f"Total Return: {metrics['total_return']:.2%}")
print(f"Annual Return: {metrics['annual_return']:.2%}")
print(f"Sharpe Ratio: {metrics['sharpe_ratio']:.2f}")
print(f"Sortino Ratio: {metrics['sortino_ratio']:.2f}")
print(f"Max Drawdown: {metrics['max_drawdown']:.2%}")
print(f"Win Rate: {metrics['win_rate']:.2%}")
print(f"\nTotal Trades: {metrics['total_trades']}")
print(f"Winning Trades: {metrics['winning_trades']}")
print(f"Losing Trades: {metrics['losing_trades']}")
print(f"\nAvg Win: ${metrics['avg_win']:.2f}")
print(f"Avg Loss: ${metrics['avg_loss']:.2f}")
print(f"Profit Factor: {metrics['profit_factor']:.2f}")

## 6. Visualization

In [None]:
# Plot equity curve
fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(12, 10), sharex=True)

# Equity curve
portfolio_value = portfolio.value()
portfolio_value.plot(ax=ax1, label='Portfolio Value', color='blue', linewidth=2)
ax1.set_ylabel('Portfolio Value ($)')
ax1.set_title('Moving Average Crossover Strategy - Equity Curve')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Drawdown
drawdown = portfolio.drawdown() * 100
drawdown.plot(ax=ax2, label='Drawdown', color='red', linewidth=1, alpha=0.8)
ax2.fill_between(drawdown.index, 0, drawdown, color='red', alpha=0.3)
ax2.set_ylabel('Drawdown (%)')
ax2.set_title('Drawdown Analysis')
ax2.legend()
ax2.grid(True, alpha=0.3)

# Price and signals
price_data = data_with_features['close']
price_data.plot(ax=ax3, label='SPY Price', color='black', linewidth=1)

# Mark entry points
long_entries = price_data[signals > 0]
short_entries = price_data[signals < 0]

ax3.scatter(long_entries.index, long_entries, color='green', marker='^', 
           s=100, label='Long Entry', alpha=0.7)
ax3.scatter(short_entries.index, short_entries, color='red', marker='v', 
           s=100, label='Short Entry', alpha=0.7)

ax3.set_xlabel('Date')
ax3.set_ylabel('Price ($)')
ax3.set_title('Price Action with Trade Signals')
ax3.legend()
ax3.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# Plot returns distribution
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))

# Daily returns histogram
daily_returns = portfolio.returns() * 100
daily_returns.hist(ax=ax1, bins=50, alpha=0.7, color='blue', edgecolor='black')
ax1.axvline(0, color='red', linestyle='--', linewidth=2)
ax1.set_xlabel('Daily Return (%)')
ax1.set_ylabel('Frequency')
ax1.set_title('Distribution of Daily Returns')
ax1.grid(True, alpha=0.3)

# Trade analysis
trades_df = backtest_result['trades']
if len(trades_df) > 0:
    trade_returns = trades_df['return'] * 100
    positive_trades = trade_returns[trade_returns > 0]
    negative_trades = trade_returns[trade_returns <= 0]
    
    ax2.hist(positive_trades, bins=20, alpha=0.7, color='green', 
             label=f'Winners ({len(positive_trades)})', edgecolor='black')
    ax2.hist(negative_trades, bins=20, alpha=0.7, color='red', 
             label=f'Losers ({len(negative_trades)})', edgecolor='black')
    ax2.axvline(0, color='black', linestyle='--', linewidth=2)
    ax2.set_xlabel('Trade Return (%)')
    ax2.set_ylabel('Frequency')
    ax2.set_title('Trade Returns Distribution')
    ax2.legend()
    ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 7. Parameter Optimization

In [None]:
# Define parameter grid for optimization
param_grid = {
    'fast_period': [5, 10, 15],
    'slow_period': [20, 30, 40],
    'ma_type': ['sma', 'ema']
}

# Run optimization
optimization_result = engine.optimize_parameters(
    strategy_class=MovingAverageCrossover,
    data=data_with_features,
    param_grid=param_grid,
    metric='sharpe_ratio',
    initial_capital=100000
)

# Show top 5 parameter combinations
print("Top 5 Parameter Combinations by Sharpe Ratio:")
print("="*60)
for i, (params, metrics) in enumerate(optimization_result['results'][:5]):
    print(f"\n{i+1}. Parameters: {params}")
    print(f"   Sharpe Ratio: {metrics['sharpe_ratio']:.2f}")
    print(f"   Total Return: {metrics['total_return']:.2%}")
    print(f"   Max Drawdown: {metrics['max_drawdown']:.2%}")

## 8. Risk Analysis

In [None]:
# Calculate additional risk metrics
returns = portfolio.returns()

# Value at Risk (VaR)
var_95 = np.percentile(returns, 5) * 100
var_99 = np.percentile(returns, 1) * 100

# Conditional Value at Risk (CVaR)
cvar_95 = returns[returns <= np.percentile(returns, 5)].mean() * 100
cvar_99 = returns[returns <= np.percentile(returns, 1)].mean() * 100

print("=== Risk Metrics ===")
print(f"95% VaR: {var_95:.2f}% (daily)")
print(f"99% VaR: {var_99:.2f}% (daily)")
print(f"95% CVaR: {cvar_95:.2f}% (daily)")
print(f"99% CVaR: {cvar_99:.2f}% (daily)")
print(f"\nAnnualized Volatility: {returns.std() * np.sqrt(252) * 100:.2f}%")
print(f"Downside Deviation: {returns[returns < 0].std() * np.sqrt(252) * 100:.2f}%")
print(f"\nMax Consecutive Wins: {metrics.get('max_consecutive_wins', 'N/A')}")
print(f"Max Consecutive Losses: {metrics.get('max_consecutive_losses', 'N/A')}")

## 9. Conclusion and Next Steps

This notebook demonstrated:
1. Loading and preprocessing minute-level market data
2. Engineering technical features
3. Implementing and backtesting a moving average crossover strategy
4. Analyzing performance metrics and visualizing results
5. Optimizing strategy parameters

### Next Steps:
- Test with longer time periods (full year)
- Compare with buy-and-hold benchmark
- Implement more sophisticated strategies (e.g., ORB)
- Add walk-forward validation
- Test on multiple symbols