# Strategy Development and Backtesting

This notebook focuses on developing and testing trading strategies:

1. **Strategy Implementation** - Build and customize trading strategies
2. **Signal Generation** - Generate buy/sell signals
3. **Backtesting** - Test strategies on historical data
4. **Performance Analysis** - Analyze strategy performance
5. **Parameter Optimization** - Find optimal strategy parameters
6. **Strategy Comparison** - Compare different strategies

In [None]:
# Import required libraries
import sys
import os
sys.path.append('../src')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')

# Import our custom modules
from api.data_collector import DataCollector
from strategies.strategies import (
    MovingAverageCrossover, 
    RSIMeanReversion, 
    BollingerBandsStrategy, 
    MomentumStrategy,
    MovingAverageConvergenceDivergence
)
from backtester import BacktestingEngine
from risk_manager import RiskManager

# Configure plotting
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
%matplotlib inline

## 1. Data Preparation

In [None]:
# Load data (reuse from previous notebook or collect fresh data)
collector = DataCollector()

# Choose a symbol for strategy development
symbol = 'AAPL'
start_date = '2020-01-01'
end_date = '2024-01-01'

print(f"Loading data for {symbol} from {start_date} to {end_date}...")

data = collector.get_historical_data(symbol, start_date, end_date, 'yahoo')

if not data.empty:
    print(f"✓ Loaded {len(data)} records")
    print(f"Date range: {data['datetime'].min()} to {data['datetime'].max()}")
    print(f"Price range: ${data['low'].min():.2f} - ${data['high'].max():.2f}")
else:
    print("❌ Failed to load data")
    
# Display first few rows
print("\nFirst 5 rows:")
print(data.head())

## 2. Strategy Implementation and Signal Generation

In [None]:
# Initialize different strategies
strategies = {
    'MA_Crossover_20_50': MovingAverageCrossover(short_window=20, long_window=50),
    'MA_Crossover_10_30': MovingAverageCrossover(short_window=10, long_window=30),
    'RSI_Mean_Reversion': RSIMeanReversion(rsi_period=14, oversold_level=30, overbought_level=70),
    'Bollinger_Bands': BollingerBandsStrategy(window=20, num_std=2.0),
    'Momentum': MomentumStrategy(lookback_period=20, momentum_threshold=0.02),
    'MACD': MovingAverageConvergenceDivergence(fast_period=12, slow_period=26, signal_period=9)
}

print(f"Initialized {len(strategies)} strategies:")
for name, strategy in strategies.items():
    print(f"  • {name}: {strategy}")

In [None]:
# Generate signals for all strategies
strategy_results = {}

for name, strategy in strategies.items():
    print(f"\nGenerating signals for {name}...")
    try:
        # Generate signals
        result = strategy.generate_signals(data.copy())
        
        if not result.empty and 'signal' in result.columns:
            # Calculate returns
            result = strategy.calculate_returns(result)
            
            # Get basic statistics
            stats = strategy.get_strategy_stats(result)
            
            strategy_results[name] = {
                'data': result,
                'strategy': strategy,
                'stats': stats
            }
            
            # Print basic info
            signals_count = (result['signal'] != 0).sum()
            print(f"  ✓ Generated {signals_count} signals")
            print(f"  ✓ Total Return: {stats.get('total_return', 0):.2%}")
            print(f"  ✓ Sharpe Ratio: {stats.get('sharpe_ratio', 0):.2f}")
        else:
            print(f"  ❌ Failed to generate signals")
    except Exception as e:
        print(f"  ❌ Error: {str(e)}")

print(f"\nSuccessfully processed {len(strategy_results)} strategies")

In [None]:
# Visualize signals for a specific strategy
strategy_to_plot = 'MA_Crossover_20_50'  # Change this to visualize different strategies

if strategy_to_plot in strategy_results:
    result_data = strategy_results[strategy_to_plot]['data']
    
    # Create subplots
    fig = make_subplots(
        rows=3, cols=1,
        shared_xaxes=True,
        vertical_spacing=0.03,
        subplot_titles=(
            f'{symbol} Price with {strategy_to_plot} Signals',
            'Strategy Returns vs Buy & Hold',
            'Cumulative Returns Comparison'
        ),
        row_heights=[0.5, 0.25, 0.25]
    )
    
    # Price chart with signals
    fig.add_trace(
        go.Scatter(x=result_data['datetime'], y=result_data['close'], 
                  name='Close Price', line=dict(color='black', width=1)),
        row=1, col=1
    )
    
    # Add moving averages if available
    if 'ma_short' in result_data.columns:
        fig.add_trace(
            go.Scatter(x=result_data['datetime'], y=result_data['ma_short'], 
                      name='Short MA', line=dict(color='blue', width=1)),
            row=1, col=1
        )
    if 'ma_long' in result_data.columns:
        fig.add_trace(
            go.Scatter(x=result_data['datetime'], y=result_data['ma_long'], 
                      name='Long MA', line=dict(color='red', width=1)),
            row=1, col=1
        )
    
    # Mark buy and sell signals
    buy_signals = result_data[result_data['signal'] == 1]
    sell_signals = result_data[result_data['signal'] == -1]
    
    if not buy_signals.empty:
        fig.add_trace(
            go.Scatter(x=buy_signals['datetime'], y=buy_signals['close'],
                      mode='markers', marker=dict(color='green', size=8, symbol='triangle-up'),
                      name='Buy Signal'),
            row=1, col=1
        )
    
    if not sell_signals.empty:
        fig.add_trace(
            go.Scatter(x=sell_signals['datetime'], y=sell_signals['close'],
                      mode='markers', marker=dict(color='red', size=8, symbol='triangle-down'),
                      name='Sell Signal'),
            row=1, col=1
        )
    
    # Daily returns comparison
    if 'strategy_return' in result_data.columns:
        fig.add_trace(
            go.Scatter(x=result_data['datetime'], y=result_data['strategy_return'],
                      name='Strategy Returns', line=dict(color='blue')),
            row=2, col=1
        )
    
    if 'price_return' in result_data.columns:
        fig.add_trace(
            go.Scatter(x=result_data['datetime'], y=result_data['price_return'],
                      name='Buy & Hold Returns', line=dict(color='gray', dash='dash')),
            row=2, col=1
        )
    
    # Cumulative returns
    if 'cumulative_return' in result_data.columns:
        fig.add_trace(
            go.Scatter(x=result_data['datetime'], y=result_data['cumulative_return'],
                      name='Strategy Cumulative Return', line=dict(color='blue')),
            row=3, col=1
        )
    
    # Buy and hold cumulative return
    buy_hold_return = (result_data['close'] / result_data['close'].iloc[0]) - 1
    fig.add_trace(
        go.Scatter(x=result_data['datetime'], y=buy_hold_return,
                  name='Buy & Hold Cumulative Return', line=dict(color='gray', dash='dash')),
        row=3, col=1
    )
    
    fig.update_layout(
        height=900,
        title=f'{strategy_to_plot} Strategy Analysis for {symbol}',
        showlegend=True
    )
    
    fig.show()
else:
    print(f"No results available for {strategy_to_plot}")

## 3. Comprehensive Backtesting

In [None]:
# Initialize backtesting engine
backtester = BacktestingEngine(
    initial_capital=100000,
    commission=0.001,  # 0.1% commission
    slippage=0.0005    # 0.05% slippage
)

print("Backtesting Engine Configuration:")
print(f"  Initial Capital: ${backtester.initial_capital:,}")
print(f"  Commission: {backtester.commission:.3%}")
print(f"  Slippage: {backtester.slippage:.3%}")

In [None]:
# Run backtests for all strategies
backtest_results = {}

print("Running comprehensive backtests...")
print("=" * 50)

for name, strategy_data in strategy_results.items():
    print(f"\nBacktesting {name}...")
    
    try:
        strategy = strategy_data['strategy']
        data_with_signals = strategy_data['data']
        
        # Run backtest
        results = backtester.run_backtest(data_with_signals, strategy)
        backtest_results[name] = results
        
        # Print summary
        print(f"  ✓ Final Portfolio Value: ${results['final_portfolio_value']:,.2f}")
        print(f"  ✓ Total Return: {results['total_return']:.2%}")
        print(f"  ✓ Sharpe Ratio: {results['sharpe_ratio']:.2f}")
        print(f"  ✓ Max Drawdown: {results['max_drawdown']:.2%}")
        print(f"  ✓ Total Trades: {results['total_trades']}")
        print(f"  ✓ Win Rate: {results['win_rate']:.2%}")
        
    except Exception as e:
        print(f"  ❌ Backtest failed: {str(e)}")

print(f"\nCompleted backtests for {len(backtest_results)} strategies")

In [None]:
# Create performance comparison table
if backtest_results:
    performance_data = []
    
    for name, results in backtest_results.items():
        performance_data.append({
            'Strategy': name,
            'Total Return': f"{results.get('total_return', 0):.2%}",
            'Annualized Return': f"{results.get('annualized_return', 0):.2%}",
            'Volatility': f"{results.get('volatility', 0):.2%}",
            'Sharpe Ratio': f"{results.get('sharpe_ratio', 0):.2f}",
            'Max Drawdown': f"{results.get('max_drawdown', 0):.2%}",
            'Win Rate': f"{results.get('win_rate', 0):.2%}",
            'Total Trades': results.get('total_trades', 0),
            'Final Value': f"${results.get('final_portfolio_value', 0):,.0f}"
        })
    
    performance_df = pd.DataFrame(performance_data)
    
    print("\nSTRATEGY PERFORMANCE COMPARISON")
    print("=" * 80)
    print(performance_df.to_string(index=False))
    
    # Save to CSV
    performance_df.to_csv('../data/processed/strategy_performance_comparison.csv', index=False)
    print("\n✓ Performance comparison saved to CSV")

In [None]:
# Visualize equity curves for all strategies
if backtest_results:
    fig = go.Figure()
    
    # Add Buy & Hold benchmark
    if data is not None and not data.empty:
        buy_hold_returns = (data['close'] / data['close'].iloc[0]) * backtester.initial_capital
        fig.add_trace(go.Scatter(
            x=data['datetime'],
            y=buy_hold_returns,
            mode='lines',
            name='Buy & Hold',
            line=dict(color='gray', dash='dash', width=2)
        ))
    
    # Add strategy equity curves
    colors = ['blue', 'red', 'green', 'orange', 'purple', 'brown']
    
    for i, (name, results) in enumerate(backtest_results.items()):
        if 'equity_curve' in results and not results['equity_curve'].empty:
            equity_curve = results['equity_curve']
            color = colors[i % len(colors)]
            
            fig.add_trace(go.Scatter(
                x=equity_curve['date'],
                y=equity_curve['portfolio_value'],
                mode='lines',
                name=name,
                line=dict(color=color, width=2)
            ))
    
    fig.update_layout(
        title=f'Strategy Performance Comparison - Equity Curves ({symbol})',
        xaxis_title='Date',
        yaxis_title='Portfolio Value ($)',
        height=600,
        hovermode='x unified'
    )
    
    fig.show()
else:
    print("No backtest results available for visualization")

## 4. Risk-Return Analysis

In [None]:
# Risk-return scatter plot
if backtest_results:
    risk_return_data = []
    
    for name, results in backtest_results.items():
        risk_return_data.append({
            'Strategy': name,
            'Volatility': results.get('volatility', 0) * 100,
            'Return': results.get('annualized_return', 0) * 100,
            'Sharpe': results.get('sharpe_ratio', 0),
            'Max_Drawdown': abs(results.get('max_drawdown', 0)) * 100
        })
    
    risk_return_df = pd.DataFrame(risk_return_data)
    
    # Create scatter plot
    fig = px.scatter(
        risk_return_df,
        x='Volatility',
        y='Return',
        size='Max_Drawdown',
        color='Sharpe',
        hover_name='Strategy',
        title='Risk-Return Analysis (Bubble size = Max Drawdown)',
        labels={
            'Volatility': 'Volatility (%)',
            'Return': 'Annualized Return (%)',
            'Sharpe': 'Sharpe Ratio'
        },
        color_continuous_scale='RdYlGn'
    )
    
    fig.update_layout(height=600)
    fig.show()
    
    print("\nRisk-Return Metrics:")
    print(risk_return_df.round(2))
else:
    print("No backtest results available for risk-return analysis")

## 5. Parameter Optimization

Let's optimize the parameters for the Moving Average Crossover strategy.

In [None]:
# Parameter optimization for Moving Average Crossover
def optimize_ma_crossover(data, short_windows, long_windows):
    """Optimize MA Crossover parameters."""
    optimization_results = []
    
    total_combinations = len(short_windows) * len(long_windows)
    current_combination = 0
    
    for short_window in short_windows:
        for long_window in long_windows:
            if short_window >= long_window:
                continue
                
            current_combination += 1
            if current_combination % 10 == 0:
                print(f"Progress: {current_combination}/{total_combinations}")
            
            try:
                # Create strategy
                strategy = MovingAverageCrossover(short_window=short_window, long_window=long_window)
                
                # Generate signals
                result = strategy.generate_signals(data.copy())
                
                if 'signal' in result.columns:
                    # Run quick backtest
                    backtester_opt = BacktestingEngine(initial_capital=100000)
                    backtest_result = backtester_opt.run_backtest(result, strategy)
                    
                    optimization_results.append({
                        'short_window': short_window,
                        'long_window': long_window,
                        'total_return': backtest_result.get('total_return', 0),
                        'sharpe_ratio': backtest_result.get('sharpe_ratio', 0),
                        'max_drawdown': backtest_result.get('max_drawdown', 0),
                        'win_rate': backtest_result.get('win_rate', 0),
                        'total_trades': backtest_result.get('total_trades', 0)
                    })
                    
            except Exception as e:
                continue
    
    return pd.DataFrame(optimization_results)

# Define parameter ranges
short_windows = range(5, 25, 5)  # 5, 10, 15, 20
long_windows = range(25, 101, 25)  # 25, 50, 75, 100

print(f"Optimizing MA Crossover parameters...")
print(f"Short windows: {list(short_windows)}")
print(f"Long windows: {list(long_windows)}")
print(f"Total combinations to test: {len(short_windows) * len(long_windows)}")

optimization_df = optimize_ma_crossover(data, short_windows, long_windows)

if not optimization_df.empty:
    print(f"\n✓ Optimization completed. Tested {len(optimization_df)} parameter combinations.")
    
    # Sort by Sharpe ratio
    best_params = optimization_df.nlargest(5, 'sharpe_ratio')
    
    print("\nTop 5 Parameter Combinations (by Sharpe Ratio):")
    print(best_params.round(4))
else:
    print("❌ Optimization failed")

In [None]:
# Visualize optimization results
if not optimization_df.empty:
    # Create heatmap of Sharpe ratios
    pivot_sharpe = optimization_df.pivot(index='short_window', columns='long_window', values='sharpe_ratio')
    
    plt.figure(figsize=(12, 8))
    sns.heatmap(pivot_sharpe, annot=True, cmap='RdYlGn', center=0, fmt='.2f')
    plt.title('MA Crossover Optimization - Sharpe Ratio Heatmap')
    plt.xlabel('Long Window')
    plt.ylabel('Short Window')
    plt.show()
    
    # Create heatmap of total returns
    pivot_returns = optimization_df.pivot(index='short_window', columns='long_window', values='total_return')
    
    plt.figure(figsize=(12, 8))
    sns.heatmap(pivot_returns, annot=True, cmap='RdYlGn', center=0, fmt='.2%')
    plt.title('MA Crossover Optimization - Total Return Heatmap')
    plt.xlabel('Long Window')
    plt.ylabel('Short Window')
    plt.show()
    
    # Save optimization results
    optimization_df.to_csv('../data/processed/ma_crossover_optimization.csv', index=False)
    print("\n✓ Optimization results saved to CSV")
else:
    print("No optimization results to visualize")

## 6. Strategy Robustness Testing

In [None]:
# Test strategy robustness using different time periods
def test_strategy_robustness(strategy, data, test_periods):
    """Test strategy performance across different time periods."""
    results = []
    
    for period_name, (start_idx, end_idx) in test_periods.items():
        period_data = data.iloc[start_idx:end_idx].copy()
        
        if len(period_data) > 100:  # Minimum data requirement
            try:
                # Generate signals
                result = strategy.generate_signals(period_data)
                
                if 'signal' in result.columns:
                    # Calculate performance
                    result = strategy.calculate_returns(result)
                    stats = strategy.get_strategy_stats(result)
                    
                    results.append({
                        'period': period_name,
                        'start_date': period_data['datetime'].iloc[0].strftime('%Y-%m-%d'),
                        'end_date': period_data['datetime'].iloc[-1].strftime('%Y-%m-%d'),
                        'total_return': stats.get('total_return', 0),
                        'sharpe_ratio': stats.get('sharpe_ratio', 0),
                        'max_drawdown': stats.get('max_drawdown', 0),
                        'win_rate': stats.get('win_rate', 0),
                        'total_trades': stats.get('total_trades', 0)
                    })
                    
            except Exception as e:
                print(f"Error testing period {period_name}: {str(e)}")
    
    return pd.DataFrame(results)

# Define test periods (split data into quarters)
data_length = len(data)
quarter_size = data_length // 4

test_periods = {
    'Full Period': (0, data_length),
    'First Half': (0, data_length // 2),
    'Second Half': (data_length // 2, data_length),
    'Q1': (0, quarter_size),
    'Q2': (quarter_size, 2 * quarter_size),
    'Q3': (2 * quarter_size, 3 * quarter_size),
    'Q4': (3 * quarter_size, data_length)
}

# Test the best MA Crossover strategy
if not optimization_df.empty:
    best_params = optimization_df.loc[optimization_df['sharpe_ratio'].idxmax()]
    best_strategy = MovingAverageCrossover(
        short_window=int(best_params['short_window']),
        long_window=int(best_params['long_window'])
    )
    
    print(f"Testing robustness of best MA Crossover strategy:")
    print(f"Parameters: Short={int(best_params['short_window'])}, Long={int(best_params['long_window'])}")
    
    robustness_results = test_strategy_robustness(best_strategy, data, test_periods)
    
    if not robustness_results.empty:
        print("\nRobustness Test Results:")
        print("=" * 60)
        print(robustness_results.round(4))
        
        # Calculate consistency metrics
        sharpe_std = robustness_results['sharpe_ratio'].std()
        return_std = robustness_results['total_return'].std()
        positive_periods = (robustness_results['total_return'] > 0).sum()
        
        print(f"\nConsistency Metrics:")
        print(f"  Sharpe Ratio Std Dev: {sharpe_std:.3f}")
        print(f"  Return Std Dev: {return_std:.3f}")
        print(f"  Positive Periods: {positive_periods}/{len(robustness_results)}")
        
        # Save results
        robustness_results.to_csv('../data/processed/strategy_robustness_test.csv', index=False)
        print("\n✓ Robustness test results saved to CSV")
    else:
        print("❌ Robustness testing failed")
else:
    print("No optimization results available for robustness testing")

## Summary and Recommendations

Based on the comprehensive analysis above:

### Key Findings:
1. **Best Performing Strategy**: [Strategy name and performance]
2. **Optimal Parameters**: [Best parameter combinations]
3. **Risk Characteristics**: [Risk metrics and drawdowns]
4. **Robustness**: [Consistency across time periods]

### Recommendations:
1. **Implementation**: Use the best-performing strategy with optimized parameters
2. **Risk Management**: Implement proper position sizing and stop-losses
3. **Monitoring**: Regularly monitor performance and adjust as needed
4. **Diversification**: Consider combining multiple strategies

### Next Steps:
1. Test on additional symbols
2. Implement ensemble methods
3. Add more sophisticated risk management
4. Develop real-time monitoring dashboard

In [None]:
# Generate final summary report
print("STRATEGY DEVELOPMENT SUMMARY REPORT")
print("=" * 50)
print(f"Symbol Analyzed: {symbol}")
print(f"Data Period: {start_date} to {end_date}")
print(f"Total Data Points: {len(data)}")
print(f"Strategies Tested: {len(strategies)}")

if backtest_results:
    # Find best strategy by Sharpe ratio
    best_strategy_name = max(backtest_results.keys(), 
                           key=lambda x: backtest_results[x].get('sharpe_ratio', -999))
    best_results = backtest_results[best_strategy_name]
    
    print(f"\nBEST STRATEGY: {best_strategy_name}")
    print(f"  Total Return: {best_results.get('total_return', 0):.2%}")
    print(f"  Sharpe Ratio: {best_results.get('sharpe_ratio', 0):.2f}")
    print(f"  Max Drawdown: {best_results.get('max_drawdown', 0):.2%}")
    print(f"  Win Rate: {best_results.get('win_rate', 0):.2%}")
    print(f"  Total Trades: {best_results.get('total_trades', 0)}")

if not optimization_df.empty:
    best_opt = optimization_df.loc[optimization_df['sharpe_ratio'].idxmax()]
    print(f"\nOPTIMAL MA CROSSOVER PARAMETERS:")
    print(f"  Short Window: {int(best_opt['short_window'])}")
    print(f"  Long Window: {int(best_opt['long_window'])}")
    print(f"  Optimized Sharpe: {best_opt['sharpe_ratio']:.3f}")

print("\n" + "=" * 50)
print("Analysis completed successfully!")
print("Check the 'data/processed' folder for saved results.")