In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import sys
from datetime import datetime, timedelta
import logging
from typing import Dict, List

import pandas as pd
import numpy as np
import yfinance as yf
import backtrader as bt
from pypfopt import expected_returns, risk_models, EfficientFrontier
from pypfopt.exceptions import OptimizationError
import quantstats as qs
import pandas_market_calendars as mcal

# Configure logging
logging.basicConfig(
    format='[%(asctime)s] %(levelname)s: %(message)s',
    level=logging.INFO,
    datefmt='%H:%M:%S'
)
logger = logging.getLogger(__name__)

## Logging Setup

In [3]:
# Setup logging configuration
def setup_logging():
    formatter = logging.Formatter(
        fmt='[%(asctime)s.%(msecs)03d]: %(levelname)s: %(message)s',
        datefmt='%H:%M:%S'
    )
    
    # Setup handlers for different log levels
    console_handler = logging.StreamHandler(sys.stdout)
    console_handler.setFormatter(formatter)
    console_handler.setLevel(logging.INFO)

    error_handler = logging.StreamHandler(sys.stderr)
    error_handler.setFormatter(formatter)
    error_handler.setLevel(logging.ERROR)

    # Debug handler for detailed logs
    debug_handler = logging.StreamHandler(sys.stdout)
    debug_handler.setFormatter(formatter)
    debug_handler.setLevel(logging.DEBUG)
    
    logger = logging.getLogger('Algorithm')
    logger.setLevel(logging.DEBUG)
    logger.handlers = []  # Remove existing handlers
    logger.addHandler(console_handler)
    logger.addHandler(error_handler)
    logger.addHandler(debug_handler)
    
    return logger

log = setup_logging()

In [4]:
# Market calendar for trading days
nyse = mcal.get_calendar('NYSE')
valid_dates = nyse.valid_days(start_date='2013-01-01', end_date='2017-01-01')

## Algo Settings

In [5]:
# Settings
MONTH = 21             # Trading days in a month
YEAR = 12 * MONTH      # Trading days in a year
N_LONGS = 50           # Number of long positions
N_SHORTS = 50          # Number of short positions
MIN_POS = 5            # Minimum positions required in each direction
VOL_SCREEN = 500       # Top 500 most liquid stocks
MAX_POS_SIZE = 0.10    # Maximum 10% in any single position
COMMISSION = 0.00075   # Commission rate
REBALANCE_FREQUENCY = 5  # Rebalance every 5 trading days
capital_base = 1e7

In [6]:
class FactorPipeline:
    def __init__(self, data):
        self.data = data

    def compute_mean_reversion(self):
        results = {}
        for symbol, hist in self.data.items():
            try:
                close_prices = hist['Close']
                monthly_returns = close_prices.resample('M').last().pct_change()
                if len(monthly_returns) >= 12:
                    latest_return = monthly_returns.iloc[-1]
                    mean_annual = monthly_returns.rolling(12).mean().iloc[-1]
                    std_annual = monthly_returns.rolling(12).std().iloc[-1]
                    if pd.notnull(std_annual) and std_annual != 0:
                        factor = (latest_return - mean_annual) / std_annual
                        results[symbol] = factor
            except Exception as e:
                logger.warning(f"Error calculating factor for {symbol}: {e}")
        return pd.Series(results)

    def filter_by_volume(self, lookback_days: int = 30):
        volume_data = {}
        for symbol, hist in self.data.items():
            avg_volume = hist['Volume'].tail(lookback_days).mean()
            volume_data[symbol] = avg_volume
        volume_series = pd.Series(volume_data)
        return volume_series.nlargest(VOL_SCREEN).index.tolist()

    def rank_stocks(self):
        factors = self.compute_mean_reversion()
        ranked = factors.sort_values(ascending=True)
        self.longs = ranked.head(N_LONGS).index.tolist()
        self.shorts = ranked.tail(N_SHORTS).index.tolist()


In [7]:
class YahooDataFeed(bt.feeds.PandasData):
    """Custom data feed for Yahoo Finance data"""
    params = (
        ('datetime', None),
        ('open', 'Open'),
        ('high', 'High'),
        ('low', 'Low'),
        ('close', 'Close'),
        ('volume', 'Volume'),
        ('openinterest', None),
    )

## Mean Reversion Factor

In [8]:
class MeanReversionSignals:
    """Calculate mean reversion signals for a universe of stocks"""
    
    def __init__(self, symbols: List[str], start_date: datetime, end_date: datetime):
        self.symbols = symbols
        self.start_date = start_date
        self.end_date = end_date
        self.data = None
        
    def fetch_data(self):
        """Fetch data for all symbols"""
        logger.info("Fetching data for %d symbols", len(self.symbols))
        all_data = {}
        
        # Fetch data for all symbols at once for efficiency
        try:
            data = yf.download(
                self.symbols,
                start=self.start_date,
                end=self.end_date,
                group_by='ticker',
                auto_adjust=True
            )
            
            # If only one symbol, data structure is different
            if len(self.symbols) == 1:
                symbol = self.symbols[0]
                all_data[symbol] = data
            else:
                # Multiple symbols
                for symbol in self.symbols:
                    if symbol in data.columns.levels[0]:
                        symbol_data = data[symbol].copy()
                        if not symbol_data.empty:
                            all_data[symbol] = symbol_data
                            
            logger.info(f"Successfully fetched data for {len(all_data)} symbols")
            
        except Exception as e:
            logger.error(f"Error fetching data: {e}")
            
        self.data = all_data
        if not self.data:
            raise ValueError("No data was fetched for any symbols")
            
        return self.data
    
    def calculate_mean_reversion_factor(self) -> pd.Series:
        """Calculate mean reversion factor for all stocks"""
        if self.data is None:
            self.fetch_data()
            
        if not self.data:
            raise ValueError("No data available for calculation")
            
        results = {}
        for symbol, hist in self.data.items():
            try:
                # Get the closing prices
                close_prices = hist['Close'] if 'Close' in hist.columns else hist['Adj Close']
                
                # Calculate monthly returns
                monthly_returns = close_prices.resample('M').last().pct_change()
                
                if len(monthly_returns) >= 12:
                    latest_return = monthly_returns.iloc[-1]
                    mean_annual = monthly_returns.rolling(12).mean().iloc[-1]
                    std_annual = monthly_returns.rolling(12).std().iloc[-1]
                    
                    if pd.notnull(std_annual) and std_annual != 0:
                        factor = (latest_return - mean_annual) / std_annual
                        results[symbol] = factor
                        
            except Exception as e:
                logger.warning(f"Error calculating factor for {symbol}: {e}")
        
        if not results:
            raise ValueError("Could not calculate factors for any symbols")
            
        return pd.Series(results)
    
    def filter_by_volume(self, lookback_days: int = 30) -> List[str]:
        """Filter stocks by trading volume"""
        volume_data = {}
        
        for symbol, hist in self.data.items():
            avg_volume = hist['Volume'].tail(lookback_days).mean()
            volume_data[symbol] = avg_volume
            
        volume_series = pd.Series(volume_data)
        return volume_series.nlargest(VOL_SCREEN).index.tolist()

In [9]:
class MeanReversionStrategy(bt.Strategy):
    params = (
        ('month_length', MONTH),
        ('year_length', YEAR),
        ('n_longs', N_LONGS),
        ('n_shorts', N_SHORTS),
        ('min_positions', MIN_POS),
        ('max_pos_size', MAX_POS_SIZE),
        ('rebalance_freq', REBALANCE_FREQUENCY)
    )

    def __init__(self):
        self.orders = {}
        self.current_weights = {}
        self.last_rebalance = 0
        self.tracked_metrics = []

        self.monthly_returns = {}
        self.volumes = {}
        for data in self.datas:
            self.monthly_returns[data._name] = bt.indicators.PctChange(
                data.close, period=self.p.month_length
            )
            self.volumes[data._name] = bt.indicators.SMA(
                data.volume, period=30
            )

    def before_trading_start(self):
        """Fetch and rank stocks before each trading session"""
        if self.data.datetime.date(0) not in valid_dates:
            return

        # Initialize the pipeline with the latest data
        self.pipeline = FactorPipeline(self.fetch_latest_data())
        self.pipeline.rank_stocks()

        # Store the long and short stocks for use in the next trading step
        self.longs = self.pipeline.longs
        self.shorts = self.pipeline.shorts


    def get_volume_filtered_universe(self):
        vol_data = {}
        for data in self.datas:
            vol = self.volumes[data._name][0]
            if not np.isnan(vol):
                vol_data[data._name] = vol
        if not vol_data:
            return []
        vol_series = pd.Series(vol_data)
        vol_cutoff = vol_series.quantile(0.25)
        return vol_series[vol_series >= vol_cutoff].index.tolist()

    def optimize_portfolio(self, prices: pd.DataFrame, short: bool = False) -> Dict[str, float]:
        try:
            returns = expected_returns.mean_historical_return(prices=prices, frequency=252)
            cov = risk_models.sample_cov(prices=prices, frequency=252)
            weight_bounds = (0, self.p.max_pos_size) if not short else (-self.p.max_pos_size, 0)
            ef = EfficientFrontier(expected_returns=returns, cov_matrix=cov, weight_bounds=weight_bounds, solver='SCS')
            ef.max_sharpe()
            weights = ef.clean_weights()
            if short:
                return {asset: -weight for asset, weight in weights.items()}
            return weights
        except OptimizationError as e:
            logger.warning(f"Portfolio optimization failed: {e}, using equal weights")
            if short:
                return {asset: -1 / len(prices.columns) for asset in prices.columns}
            return {asset: 1 / len(prices.columns) for asset in prices.columns}

    def next(self):
        if len(self) - self.last_rebalance < self.p.rebalance_freq:
            return
        self.last_rebalance = len(self)
        logger.info(f"Processing date: {self.data0.datetime.date(0)}")

        valid_universe = self.get_volume_filtered_universe()
        if not valid_universe:
            logger.warning("No stocks meet volume criteria")
            return

        factors = {}
        prices = pd.DataFrame()
        for data in self.datas:
            if data._name not in valid_universe:
                continue
            if len(data) > self.p.year_length:
                prices[data._name] = data.close.get(size=252)
                monthly_ret = self.monthly_returns[data._name][0]
                returns = [self.monthly_returns[data._name][-i] for i in range(12)]
                valid_returns = [r for r in returns if not np.isnan(r)]
                if len(valid_returns) >= 6:
                    mean_annual = np.nanmean(valid_returns)
                    std_annual = np.nanstd(valid_returns)
                    if std_annual > 0 and not np.isnan(monthly_ret):
                        factors[data._name] = float((monthly_ret - mean_annual) / std_annual)
                        logger.info(f"Factor for {data._name}: {factors[data._name]:.4f}")

        if len(factors) >= 2 * self.p.min_positions:
            sorted_factors = sorted(factors.items(), key=lambda x: x[1])
            longs = [item[0] for item in sorted_factors[:self.p.n_longs]]
            shorts = [item[0] for item in sorted_factors[-self.p.n_shorts:]]
            logger.info(f"Long positions: {longs}")
            logger.info(f"Short positions: {shorts}")
            if len(longs) >= self.p.min_positions and len(shorts) >= self.p.min_positions:
                try:
                    long_prices = prices[longs]
                    short_prices = prices[shorts]
                    long_weights = self.optimize_portfolio(long_prices)
                    short_weights = self.optimize_portfolio(short_prices, short=True)
                    logger.info(f"Long weights: {long_weights}")
                    logger.info(f"Short weights: {short_weights}")
                    for data in self.datas:
                        symbol = data._name
                        if self.getposition(data).size and self.broker.get_orders_open(data):
                            continue
                        target_weight = long_weights.get(symbol, 0) + short_weights.get(symbol, 0)
                        target_weight = np.clip(target_weight, -self.p.max_pos_size, self.p.max_pos_size)
                        current_weight = self.current_weights.get(symbol, 0)
                        if abs(target_weight - current_weight) > 0.01:
                            logger.info(f"Placing order for {symbol}: target weight = {target_weight:.4f}")
                            self.order_target_percent(data, target_weight)
                            self.current_weights[symbol] = target_weight
                except Exception as e:
                    logger.warning(f"Portfolio optimization failed: {e}")
            else:
                logger.warning("Insufficient positions meet criteria")
        else:
            logger.warning(f"Not enough factors ({len(factors)}) for minimum positions ({2 * self.p.min_positions})")

## Run Algorithm

In [10]:
def run_backtest(symbols: List[str], 
                 start_date: datetime,
                 end_date: datetime,
                 initial_capital: float = 1e7):
    """Run backtest with the strategy"""

    if not symbols:
        raise ValueError("No symbols provided for backtest")

    logger.info("Fetching data for the pipeline...")

    # Step 1: Fetch data for all symbols
    all_data = {}
    for symbol in symbols:
        df = yf.download(symbol, start=start_date, end=end_date)
        if not df.empty:
            all_data[symbol] = df
            logger.info(f"Fetched data for {symbol}")

    # Step 2: Create a FactorPipeline instance
    pipeline = FactorPipeline(all_data)

    # Step 3: Filter by volume and compute rankings
    filtered_symbols = pipeline.filter_by_volume(lookback_days=30)
    pipeline.rank_stocks()

    # Get the final list of symbols to trade: longs and shorts
    symbols_to_trade = set(pipeline.longs + pipeline.shorts)

    # Step 4: Setup Backtrader
    cerebro = bt.Cerebro()
    cerebro.addstrategy(MeanReversionStrategy)
    cerebro.addanalyzer(bt.analyzers.Returns, _name='returns')
    cerebro.addanalyzer(bt.analyzers.DrawDown, _name='drawdown')
    cerebro.addanalyzer(bt.analyzers.SharpeRatio, _name='sharpe')
    cerebro.addobserver(bt.observers.Value)

    # Step 5: Add filtered data to Backtrader
    logger.info("Adding filtered data to the backtest...")

    for symbol in symbols_to_trade:
        if symbol in all_data:
            df = all_data[symbol]
            feed = YahooDataFeed(
                dataname=df,
                name=symbol,
                fromdate=start_date,
                todate=end_date
            )
            cerebro.adddata(feed)
            logger.info(f"Added {symbol} to backtest")

    # Step 6: Set Broker Parameters
    cerebro.broker.setcash(initial_capital)
    cerebro.broker.setcommission(commission=COMMISSION, margin=False, mult=1.0)
    cerebro.broker.set_slippage_perc(0.0005)

    # Step 7: Run Backtest
    logger.info("Starting backtest...")
    results = cerebro.run()
    strat = results[0]

    # Step 8: Extract Backtest Results
    final_value = cerebro.broker.getvalue()
    returns = (final_value - initial_capital) / initial_capital

    logger.info(f"Final Portfolio Value: ${final_value:,.2f}")
    logger.info(f"Return: {returns:.2%}")

    # Step 9: Extract Values for Analysis
    values = strat.observers.value.lines.value.array[::2]
    dates = [bt.num2date(x) for x in strat.datas[0].datetime.array]

    # Ensure lengths match
    min_length = min(len(values), len(dates))
    values = values[:min_length]
    dates = dates[:min_length]

    # Create DataFrame with daily values
    df_values = pd.DataFrame({
        'portfolio_value': values
    }, index=pd.DatetimeIndex(dates))

    # Calculate returns properly for quantstats
    df_values = df_values.resample('D').last().fillna(method='ffill')
    portfolio_returns = df_values['portfolio_value'].pct_change().dropna()

    # Step 10: Save Basic Metrics to File
    metrics_data = {
        'Total Return': f"{returns:.2%}",
        'Final Portfolio Value': f"${final_value:,.2f}",
        'Sharpe Ratio': strat.analyzers.sharpe.get_analysis()['sharperatio'],
        'Max Drawdown': f"{strat.analyzers.drawdown.get_analysis()['max']['drawdown']:.2%}"
    }

    with open('backtest_results.txt', 'w') as f:
        for metric, value in metrics_data.items():
            f.write(f"{metric}: {value}\n")

    return results, cerebro, start_date, end_date


In [16]:
import pandas as pd
import yfinance as yf
from datetime import datetime
import requests

def get_sp500_symbols():
    """Get current S&P 500 constituents using Wikipedia"""
    try:
        # Get SP500 list from Wikipedia
        url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
        tables = pd.read_html(url)
        sp500_table = tables[0]
        symbols = sp500_table['Symbol'].tolist()
        
        # Clean symbols (remove special characters, etc.)
        symbols = [sym.replace('.', '-') for sym in symbols]
        logger.info(f"Retrieved {len(symbols)} S&P 500 symbols")
        return symbols
        
    except Exception as e:
        logger.error(f"Error fetching S&P 500 symbols: {e}")
        return []

def validate_symbols(symbols, start_date, end_date):
    """Validate which symbols have data for the entire period"""
    valid_symbols = []
    
    logger.info("Validating symbols...")
    total = len(symbols)
    
    for i, symbol in enumerate(symbols, 1):
        try:
            # Try to get a small amount of data to verify the symbol exists
            df = yf.download(symbol, start=start_date, end=end_date, progress=False)
            if not df.empty and len(df) > 50:  # Require at least 50 days of data
                valid_symbols.append(symbol)
            
            if i % 50 == 0:  # Log progress every 50 symbols
                logger.info(f"Validated {i}/{total} symbols. Found {len(valid_symbols)} valid symbols.")
                
        except Exception as e:
            logger.warning(f"Error validating {symbol}: {e}")
            continue
    
    logger.info(f"Found {len(valid_symbols)} valid symbols out of {total}")
    return valid_symbols

def main():
    """Main function to run the backtest"""
    start_date = datetime(2013, 1, 1)
    end_date = datetime(2017, 1, 1)

    # Step 1: Get and validate symbols
    logger.info("Fetching S&P 500 symbols for the backtest...")
    sp500_symbols = get_sp500_symbols()

    if not sp500_symbols:
        logger.error("No symbols found for the backtest.")
        return

    # Validate the symbols to make sure they have sufficient data
    valid_symbols = validate_symbols(sp500_symbols, start_date, end_date)

    if not valid_symbols:
        logger.error("No valid symbols found after validation.")
        return

    # Step 2: Run the backtest with validated symbols
    try:
        logger.info("Starting the backtest...")
        results, cerebro, start, end = run_backtest(
            symbols=valid_symbols,
            start_date=start_date,
            end_date=end_date,
            initial_capital=1e7
        )
        logger.info("Backtest completed successfully.")

        return results, cerebro, start, end

    except Exception as e:
        logger.error(f"An error occurred during the backtest: {e}")

results, cerebro, start, end = main()


[00:42:01] INFO: Fetching S&P 500 symbols for the backtest...
[00:42:02] INFO: Retrieved 503 S&P 500 symbols
[00:42:02] INFO: Validating symbols...
[00:42:04] ERROR: 
1 Failed download:
[00:42:04] ERROR: ['ABNB']: YFPricesMissingError('$%ticker%: possibly delisted; no price data found  (1d 2013-01-01 00:00:00 -> 2017-01-01 00:00:00) (Yahoo error = "Data doesn\'t exist for startDate = 1357016400, endDate = 1483246800")')
[00:42:07] ERROR: 
1 Failed download:
[00:42:07] ERROR: ['AMTM']: YFPricesMissingError('$%ticker%: possibly delisted; no price data found  (1d 2013-01-01 00:00:00 -> 2017-01-01 00:00:00) (Yahoo error = "Data doesn\'t exist for startDate = 1357016400, endDate = 1483246800")')
[00:42:10] INFO: Validated 50/503 symbols. Found 48 valid symbols.
[00:42:16] ERROR: 
1 Failed download:
[00:42:16] ERROR: ['CARR']: YFPricesMissingError('$%ticker%: possibly delisted; no price data found  (1d 2013-01-01 00:00:00 -> 2017-01-01 00:00:00) (Yahoo error = "Data doesn\'t exist for startD

## Persist Results for use with `pyfolio`

In [61]:
def analyze_backtest_results(cerebro, results):
    """Analyze backtest results and create visualizations"""

    # Extract strategy instance
    strat = results[0]

    # Get portfolio values and associated datetimes from Backtrader
    portfolio_values = strat.observers.value.lines.value.array
    dates = [bt.num2date(d) for d in strat.datas[0].datetime.array]

    # Ensure lengths match: truncate the longer one if needed
    min_length = min(len(portfolio_values), len(dates))
    portfolio_values = portfolio_values[:min_length]
    dates = dates[:min_length]

    # Create the portfolio value series using the correct length and date range
    portfolio_series = pd.Series(portfolio_values, index=pd.to_datetime(dates))

    # Drop duplicate dates to ensure we have unique entries
    portfolio_series = portfolio_series[~portfolio_series.index.duplicated(keep='first')]

    # Ensure the index is a regular datetime index with no missing dates (continuous time series)
    complete_index = pd.date_range(start=portfolio_series.index.min(), end=portfolio_series.index.max(), freq='D')
    portfolio_series = portfolio_series.reindex(complete_index).ffill()

    # Calculate daily returns
    returns = portfolio_series.pct_change().dropna()

    # Extract orders and create a transactions DataFrame
    transactions = []
    for order in strat._orders:
        if order.status == bt.Order.Completed:
            transactions.append({
                'dt': bt.num2date(order.executed.dt),
                'symbol': order.data._name,
                'amount': order.executed.size,
                'price': order.executed.price,
                'txn_dollars': order.executed.size * order.executed.price
            })
    transactions_df = pd.DataFrame(transactions)

    if not transactions_df.empty:
        transactions_df.set_index('dt', inplace=True)

    # Save results to HDF5
    with pd.HDFStore('backtests.h5') as store:
        store.put('returns/mean_reversion', returns)
        if not transactions_df.empty:
            store.put('transactions/mean_reversion', transactions_df)

    # Create visualization
    fig, axes = plt.subplots(nrows=2, figsize=(14, 8))

    # Plot cumulative returns
    cum_returns = (1 + returns).cumprod() - 1
    cum_returns.plot(ax=axes[0], title='Cumulative Returns')
    axes[0].set_ylabel('Return (%)')

    # Plot cumulative transactions
    if not transactions_df.empty:
        transactions_df['txn_dollars'].cumsum().plot(
            ax=axes[1], title='Cumulative Transactions'
        )
        axes[1].set_ylabel('Transaction Value ($)')

    sns.despine()
    plt.tight_layout()

    # Save the analysis plots to a file
    plt.savefig('mean_reversion_analysis.png')
    plt.show()

    # Generate basic statistics
    stats = {
        "Total Return": f"{(cum_returns[-1] * 100):.2f}%",
        "Mean Daily Return": f"{(returns.mean() * 100):.4f}%",
        "Volatility (Std Dev)": f"{(returns.std() * 100):.4f}%",
        "Max Drawdown": f"{(cum_returns.min() * 100):.2f}%",
        "Sharpe Ratio": f"{(returns.mean() / returns.std()):.4f}" if returns.std() != 0 else "N/A"
    }

    # Print the statistics to console
    print("\nBacktest Analysis Statistics:")
    for key, value in stats.items():
        print(f"{key}: {value}")

    return returns, transactions_df

In [54]:
def compare_strategies(returns_dict, transactions_dict):
    """Compare multiple strategy results"""
    fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(16, 10))
    
    # Plot returns
    for name, returns in returns_dict.items():
        cum_returns = (1 + returns).cumprod() - 1
        cum_returns.plot(ax=axes[0][0], label=name)
    axes[0][0].set_title('Cumulative Returns Comparison')
    axes[0][0].legend()
    
    # Plot transactions
    for name, txns in transactions_dict.items():
        if not txns.empty:
            txns.groupby(txns.index.date)['txn_dollars'].sum().cumsum().plot(
                ax=axes[0][1], label=name)
    axes[0][1].set_title('Cumulative Transactions Comparison')
    axes[0][1].legend()
    
    # Add performance metrics
    metrics = []
    for name, returns in returns_dict.items():
        sharpe = qs.stats.sharpe(returns)
        max_dd = qs.stats.max_drawdown(returns)
        total_return = (cum_returns.iloc[-1]) if not returns.empty else 0.0
        metrics.append({
            'Strategy': name,
            'Sharpe Ratio': sharpe,
            'Max Drawdown': max_dd,
            'Total Return': total_return
        })
    
    metrics_df = pd.DataFrame(metrics).set_index('Strategy')
    
    # Plot metrics
    metrics_df[['Sharpe Ratio']].plot(kind='bar', ax=axes[1][0])
    axes[1][0].set_title('Sharpe Ratio Comparison')
    metrics_df[['Max Drawdown']].plot(kind='bar', ax=axes[1][1])
    axes[1][1].set_title('Max Drawdown Comparison')
    
    plt.tight_layout()
    
    return metrics_df

In [55]:
# Example usage:
def run_analysis(cerebro, results, start_date, end_date):
    """Analyze and compare strategy results"""
    
    # Analyze mean reversion strategy
    returns, transactions = analyze_backtest_results(cerebro, results)
    
    # Load equal weight strategy results (if available)
    try:
        with pd.HDFStore('backtests.h5') as store:
            returns_ew = store['returns/equal_weight']
            tx_ew = store['transactions/equal_weight']
            
            # Compare strategies
            returns_dict = {
                'Mean Reversion': returns,
                'Equal Weight': returns_ew
            }
            
            transactions_dict = {
                'Mean Reversion': transactions,
                'Equal Weight': tx_ew
            }
            
            metrics_df = compare_strategies(returns_dict, transactions_dict)
            print("\nStrategy Comparison:")
            print(metrics_df)
            
    except KeyError:
        print("Equal weight strategy results not found. Showing only mean reversion results.")
    
    return returns, transactions

In [62]:
import matplotlib.pyplot as plt
import seaborn as sns

returns, transactions = run_analysis(cerebro, results, start, end)


<IPython.core.display.Javascript object>


Backtest Analysis Statistics:
Total Return: -16.99%
Mean Daily Return: -0.0124%
Volatility (Std Dev): 0.2786%
Max Drawdown: -16.99%
Sharpe Ratio: -0.0444
Equal weight strategy results not found. Showing only mean reversion results.
