In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import sys
from datetime import datetime, timedelta
import logging
from typing import Dict, List

import pandas as pd
import numpy as np
import yfinance as yf
import backtrader as bt
from pypfopt import expected_returns, risk_models, EfficientFrontier
from pypfopt.exceptions import OptimizationError
import quantstats as qs
import pandas_market_calendars as mcal

# Configure logging
logging.basicConfig(
    format='[%(asctime)s] %(levelname)s: %(message)s',
    level=logging.INFO,
    datefmt='%H:%M:%S'
)
logger = logging.getLogger(__name__)

## Logging Setup

In [3]:
# Setup logging configuration
def setup_logging():
    formatter = logging.Formatter(
        fmt='[%(asctime)s.%(msecs)03d]: %(levelname)s: %(message)s',
        datefmt='%H:%M:%S'
    )
    
    # Setup handlers for different log levels
    console_handler = logging.StreamHandler(sys.stdout)
    console_handler.setFormatter(formatter)
    console_handler.setLevel(logging.INFO)

    error_handler = logging.StreamHandler(sys.stderr)
    error_handler.setFormatter(formatter)
    error_handler.setLevel(logging.ERROR)

    # Debug handler for detailed logs
    debug_handler = logging.StreamHandler(sys.stdout)
    debug_handler.setFormatter(formatter)
    debug_handler.setLevel(logging.DEBUG)
    
    logger = logging.getLogger('Algorithm')
    logger.setLevel(logging.DEBUG)
    logger.handlers = []  # Remove existing handlers
    logger.addHandler(console_handler)
    logger.addHandler(error_handler)
    logger.addHandler(debug_handler)
    
    return logger

log = setup_logging()

In [4]:
# Market calendar for trading days
nyse = mcal.get_calendar('NYSE')
valid_dates = nyse.valid_days(start_date='2013-01-01', end_date='2017-01-01')

## Algo Settings

In [5]:
# Settings
MONTH = 21             # Trading days in a month
YEAR = 12 * MONTH      # Trading days in a year
N_LONGS = 50           # Number of long positions
N_SHORTS = 50          # Number of short positions
MIN_POS = 5            # Minimum positions required in each direction
VOL_SCREEN = 500       # Top 500 most liquid stocks
MAX_POS_SIZE = 0.10    # Maximum 10% in any single position
COMMISSION = 0.00075   # Commission rate
REBALANCE_FREQUENCY = 5  # Rebalance every 5 trading days
capital_base = 1e7

In [6]:
class FactorPipeline:
    def __init__(self, data):
        self.data = data
        self.longs = []
        self.shorts = []

    def compute_mean_reversion(self):
        results = {}
        for symbol, hist in self.data.items():
            try:
                close_prices = hist['Close']
                monthly_returns = close_prices.resample('M').last().pct_change()
                if len(monthly_returns) >= 12:
                    # Use .iloc[-1] to get the last value as a scalar
                    latest_return = monthly_returns.iloc[-1]
                    mean_annual = monthly_returns.rolling(12).mean().iloc[-1]
                    std_annual = monthly_returns.rolling(12).std().iloc[-1]
                    
                    # Make sure we have valid numbers
                    if (pd.notnull(latest_return) and 
                        pd.notnull(mean_annual) and 
                        pd.notnull(std_annual) and 
                        std_annual > 0):
                        factor = float((latest_return - mean_annual) / std_annual)
                        results[symbol] = factor
            except Exception as e:
                logger.warning(f"Error calculating factor for {symbol}: {e}")
        return pd.Series(results)

    def filter_by_volume(self, lookback_days: int = 30):
        volume_data = {}
        for symbol, hist in self.data.items():
            try:
                # Make sure to get a scalar value
                avg_volume = float(hist['Volume'].tail(lookback_days).mean())
                if pd.notnull(avg_volume):
                    volume_data[symbol] = avg_volume
            except Exception as e:
                logger.warning(f"Error calculating volume for {symbol}: {e}")
                
        volume_series = pd.Series(volume_data)
        if len(volume_series) > 0:
            return volume_series.nlargest(min(VOL_SCREEN, len(volume_series))).index.tolist()
        return []

    def rank_stocks(self):
        try:
            factors = self.compute_mean_reversion()
            if len(factors) > 0:
                ranked = factors.sort_values(ascending=True)
                n_longs = min(N_LONGS, len(ranked))
                n_shorts = min(N_SHORTS, len(ranked))
                
                self.longs = ranked.head(n_longs).index.tolist()
                self.shorts = ranked.tail(n_shorts).index.tolist()
            else:
                self.longs = []
                self.shorts = []
                
        except Exception as e:
            logger.error(f"Error in rank_stocks: {e}")
            self.longs = []
            self.shorts = []

In [7]:
class YahooDataFeed(bt.feeds.PandasData):
    """Custom data feed for Yahoo Finance data"""
    params = (
        ('datetime', None),
        ('open', 'Open'),
        ('high', 'High'),
        ('low', 'Low'),
        ('close', 'Close'),
        ('volume', 'Volume'),
        ('openinterest', None),
    )

## Mean Reversion Factor

In [8]:
class MeanReversionSignals:
    """Calculate mean reversion signals for a universe of stocks"""
    
    def __init__(self, symbols: List[str], start_date: datetime, end_date: datetime):
        self.symbols = symbols
        self.start_date = start_date
        self.end_date = end_date
        self.data = None
        
    def fetch_data(self):
        """Fetch data for all symbols"""
        logger.info("Fetching data for %d symbols", len(self.symbols))
        all_data = {}
        
        # Fetch data for all symbols at once for efficiency
        try:
            data = yf.download(
                self.symbols,
                start=self.start_date,
                end=self.end_date,
                group_by='ticker',
                auto_adjust=True
            )
            
            # If only one symbol, data structure is different
            if len(self.symbols) == 1:
                symbol = self.symbols[0]
                all_data[symbol] = data
            else:
                # Multiple symbols
                for symbol in self.symbols:
                    if symbol in data.columns.levels[0]:
                        symbol_data = data[symbol].copy()
                        if not symbol_data.empty:
                            all_data[symbol] = symbol_data
                            
            logger.info(f"Successfully fetched data for {len(all_data)} symbols")
            
        except Exception as e:
            logger.error(f"Error fetching data: {e}")
            
        self.data = all_data
        if not self.data:
            raise ValueError("No data was fetched for any symbols")
            
        return self.data
    
    def calculate_mean_reversion_factor(self) -> pd.Series:
        """Calculate mean reversion factor for all stocks"""
        if self.data is None:
            self.fetch_data()
            
        if not self.data:
            raise ValueError("No data available for calculation")
            
        results = {}
        for symbol, hist in self.data.items():
            try:
                # Get the closing prices
                close_prices = hist['Close'] if 'Close' in hist.columns else hist['Adj Close']
                
                # Calculate monthly returns
                monthly_returns = close_prices.resample('M').last().pct_change()
                
                if len(monthly_returns) >= 12:
                    latest_return = monthly_returns.iloc[-1]
                    mean_annual = monthly_returns.rolling(12).mean().iloc[-1]
                    std_annual = monthly_returns.rolling(12).std().iloc[-1]
                    
                    if pd.notnull(std_annual) and std_annual != 0:
                        factor = (latest_return - mean_annual) / std_annual
                        results[symbol] = factor
                        
            except Exception as e:
                logger.warning(f"Error calculating factor for {symbol}: {e}")
        
        if not results:
            raise ValueError("Could not calculate factors for any symbols")
            
        return pd.Series(results)
    
    def filter_by_volume(self, lookback_days: int = 30) -> List[str]:
        """Filter stocks by trading volume"""
        volume_data = {}
        
        for symbol, hist in self.data.items():
            avg_volume = hist['Volume'].tail(lookback_days).mean()
            volume_data[symbol] = avg_volume
            
        volume_series = pd.Series(volume_data)
        return volume_series.nlargest(VOL_SCREEN).index.tolist()

In [9]:
class MeanReversionStrategy(bt.Strategy):
    params = (
        ('month_length', MONTH),
        ('year_length', YEAR),
        ('n_longs', N_LONGS),
        ('n_shorts', N_SHORTS),
        ('min_positions', MIN_POS),
        ('max_pos_size', MAX_POS_SIZE),
        ('rebalance_freq', REBALANCE_FREQUENCY)
    )

    def __init__(self):
        self.orders = {}
        self.current_weights = {}
        self.last_rebalance = 0
        self.tracked_metrics = []

        self.monthly_returns = {}
        self.volumes = {}
        for data in self.datas:
            self.monthly_returns[data._name] = bt.indicators.PctChange(
                data.close, period=self.p.month_length
            )
            self.volumes[data._name] = bt.indicators.SMA(
                data.volume, period=30
            )

    def before_trading_start(self):
        """Fetch and rank stocks before each trading session"""
        if self.data.datetime.date(0) not in valid_dates:
            return

        # Initialize the pipeline with the latest data
        self.pipeline = FactorPipeline(self.fetch_latest_data())
        self.pipeline.rank_stocks()

        # Store the long and short stocks for use in the next trading step
        self.longs = self.pipeline.longs
        self.shorts = self.pipeline.shorts


    def get_volume_filtered_universe(self):
        vol_data = {}
        for data in self.datas:
            vol = self.volumes[data._name][0]
            if not np.isnan(vol):
                vol_data[data._name] = vol
        if not vol_data:
            return []
        vol_series = pd.Series(vol_data)
        vol_cutoff = vol_series.quantile(0.25)
        return vol_series[vol_series >= vol_cutoff].index.tolist()

    def optimize_portfolio(self, prices: pd.DataFrame, short: bool = False) -> Dict[str, float]:
        try:
            returns = expected_returns.mean_historical_return(prices=prices, frequency=252)
            cov = risk_models.sample_cov(prices=prices, frequency=252)
            weight_bounds = (0, self.p.max_pos_size) if not short else (-self.p.max_pos_size, 0)
            ef = EfficientFrontier(expected_returns=returns, cov_matrix=cov, weight_bounds=weight_bounds, solver='SCS')
            ef.max_sharpe()
            weights = ef.clean_weights()
            if short:
                return {asset: -weight for asset, weight in weights.items()}
            return weights
        except OptimizationError as e:
            logger.warning(f"Portfolio optimization failed: {e}, using equal weights")
            if short:
                return {asset: -1 / len(prices.columns) for asset in prices.columns}
            return {asset: 1 / len(prices.columns) for asset in prices.columns}

    def next(self):
        if len(self) - self.last_rebalance < self.p.rebalance_freq:
            return
        self.last_rebalance = len(self)
        logger.info(f"Processing date: {self.data0.datetime.date(0)}")

        valid_universe = self.get_volume_filtered_universe()
        if not valid_universe:
            logger.warning("No stocks meet volume criteria")
            return

        factors = {}
        prices = pd.DataFrame()
        for data in self.datas:
            if data._name not in valid_universe:
                continue
            if len(data) > self.p.year_length:
                prices[data._name] = data.close.get(size=252)
                monthly_ret = self.monthly_returns[data._name][0]
                returns = [self.monthly_returns[data._name][-i] for i in range(12)]
                valid_returns = [r for r in returns if not np.isnan(r)]
                if len(valid_returns) >= 6:
                    mean_annual = np.nanmean(valid_returns)
                    std_annual = np.nanstd(valid_returns)
                    if std_annual > 0 and not np.isnan(monthly_ret):
                        factors[data._name] = float((monthly_ret - mean_annual) / std_annual)
                        logger.info(f"Factor for {data._name}: {factors[data._name]:.4f}")

        if len(factors) >= 2 * self.p.min_positions:
            sorted_factors = sorted(factors.items(), key=lambda x: x[1])
            longs = [item[0] for item in sorted_factors[:self.p.n_longs]]
            shorts = [item[0] for item in sorted_factors[-self.p.n_shorts:]]
            logger.info(f"Long positions: {longs}")
            logger.info(f"Short positions: {shorts}")
            if len(longs) >= self.p.min_positions and len(shorts) >= self.p.min_positions:
                try:
                    long_prices = prices[longs]
                    short_prices = prices[shorts]
                    long_weights = self.optimize_portfolio(long_prices)
                    short_weights = self.optimize_portfolio(short_prices, short=True)
                    logger.info(f"Long weights: {long_weights}")
                    logger.info(f"Short weights: {short_weights}")
                    for data in self.datas:
                        symbol = data._name
                        if self.getposition(data).size and self.broker.get_orders_open(data):
                            continue
                        target_weight = long_weights.get(symbol, 0) + short_weights.get(symbol, 0)
                        target_weight = np.clip(target_weight, -self.p.max_pos_size, self.p.max_pos_size)
                        current_weight = self.current_weights.get(symbol, 0)
                        if abs(target_weight - current_weight) > 0.01:
                            logger.info(f"Placing order for {symbol}: target weight = {target_weight:.4f}")
                            self.order_target_percent(data, target_weight)
                            self.current_weights[symbol] = target_weight
                except Exception as e:
                    logger.warning(f"Portfolio optimization failed: {e}")
            else:
                logger.warning("Insufficient positions meet criteria")
        else:
            logger.warning(f"Not enough factors ({len(factors)}) for minimum positions ({2 * self.p.min_positions})")

## Run Algorithm

In [10]:
def run_backtest(symbols: List[str], 
                 start_date: datetime,
                 end_date: datetime,
                 initial_capital: float = 1e7):
    """Run backtest with the strategy"""

    if not symbols:
        raise ValueError("No symbols provided for backtest")

    logger.info(f"Starting backtest with {len(symbols)} symbols...")

    # Step 1: Fetch data for all symbols
    all_data = {}
    for symbol in symbols:
        try:
            df = yf.download(symbol, start=start_date, end=end_date, progress=False)
            if not df.empty and len(df) > 50:  # Ensure minimum data points
                all_data[symbol] = df
                logger.debug(f"Fetched data for {symbol}")
        except Exception as e:
            logger.warning(f"Error fetching data for {symbol}: {e}")
            continue

    if not all_data:
        raise ValueError("No valid data fetched for any symbols")

    # Step 2: Create a FactorPipeline instance
    pipeline = FactorPipeline(all_data)

    # Step 3: Filter by volume and compute rankings
    filtered_symbols = pipeline.filter_by_volume(lookback_days=30)
    if not filtered_symbols:
        raise ValueError("No symbols passed volume filter")
    
    pipeline.rank_stocks()
    
    # Get the final list of symbols to trade
    symbols_to_trade = list(set(pipeline.longs + pipeline.shorts))
    if not symbols_to_trade:
        raise ValueError("No symbols to trade after ranking")

    logger.info(f"Trading {len(symbols_to_trade)} symbols after filtering")

    # Step 4: Setup Backtrader
    cerebro = bt.Cerebro()
    cerebro.addstrategy(MeanReversionStrategy)
    cerebro.addanalyzer(bt.analyzers.Returns, _name='returns')
    cerebro.addanalyzer(bt.analyzers.DrawDown, _name='drawdown')
    cerebro.addanalyzer(bt.analyzers.SharpeRatio, _name='sharpe')
    cerebro.addobserver(bt.observers.Value)

    # Step 5: Add filtered data to Backtrader
    data_added = False
    for symbol in symbols_to_trade:
        if symbol in all_data:
            try:
                df = all_data[symbol]
                if len(df) > 0:  # Verify we have data
                    feed = YahooDataFeed(
                        dataname=df,
                        name=symbol,
                        fromdate=start_date,
                        todate=end_date
                    )
                    cerebro.adddata(feed)
                    data_added = True
                    logger.debug(f"Added {symbol} to backtest")
            except Exception as e:
                logger.warning(f"Error adding data for {symbol}: {e}")
                continue

    if not data_added:
        raise ValueError("No data could be added to backtest")

    # Step 6: Set Broker Parameters
    cerebro.broker.setcash(initial_capital)
    cerebro.broker.setcommission(commission=COMMISSION, margin=False, mult=1.0)
    cerebro.broker.set_slippage_perc(0.0005)

    # Step 7: Run Backtest
    logger.info("Starting backtest execution...")
    try:
        results = cerebro.run()
        if not results or len(results) == 0:
            raise ValueError("Backtest returned no results")
            
        strat = results[0]

        # Step 8: Extract Backtest Results
        final_value = cerebro.broker.getvalue()
        returns = (final_value - initial_capital) / initial_capital

        logger.info(f"Final Portfolio Value: ${final_value:,.2f}")
        logger.info(f"Return: {returns:.2%}")

        # Step 9: Extract Values for Analysis
        if hasattr(strat, 'observers') and hasattr(strat.observers, 'value'):
            values = strat.observers.value.get(size=0)  # Get all values
            if len(values) > 0 and len(strat.datas) > 0:
                dates = [bt.num2date(x) for x in strat.datas[0].datetime.get(size=0)]
                
                # Ensure lengths match
                min_length = min(len(values), len(dates))
                values = values[:min_length]
                dates = dates[:min_length]

                # Create DataFrame with daily values
                df_values = pd.DataFrame({
                    'portfolio_value': values
                }, index=pd.DatetimeIndex(dates))

                df_values = df_values.resample('D').last().fillna(method='ffill')
                portfolio_returns = df_values['portfolio_value'].pct_change().dropna()

        return results, cerebro, start_date, end_date

    except Exception as e:
        logger.error(f"Error during backtest execution: {e}")
        raise

In [11]:
def create_symbol_availability_db():
    """Create a database of symbol availability periods"""
    # Get SP500 symbols
    url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
    tables = pd.read_html(url)
    sp500_table = tables[0]
    symbols = [sym.replace('.', '-') for sym in sp500_table['Symbol'].tolist()]
    
    # Create availability database
    availability_db = {}
    total = len(symbols)
    
    for i, symbol in enumerate(symbols, 1):
        try:
            # Get full history to determine data availability
            df = yf.download(symbol, progress=False)
            if not df.empty:
                availability_db[symbol] = {
                    'start_date': df.index[0].date(),
                    'end_date': df.index[-1].date(),
                    'total_days': len(df)
                }
            
            if i % 50 == 0:
                logger.info(f"Processed {i}/{total} symbols")
                
        except Exception as e:
            logger.warning(f"Error checking {symbol}: {e}")
            continue
    
    return availability_db

# Create and save the availability database
symbol_availability_db = create_symbol_availability_db()

# Print summary
print("\nAvailability Database Summary:")
print(f"Total symbols tracked: {len(symbol_availability_db)}")

[14:48:42] ERROR: 
1 Failed download:
[14:48:42] ERROR: ['AEE']: ReadTimeout(ReadTimeoutError("HTTPSConnectionPool(host='query2.finance.yahoo.com', port=443): Read timed out. (read timeout=10)"))
[14:48:51] INFO: Processed 50/503 symbols
[14:49:13] ERROR: 
1 Failed download:
[14:49:13] ERROR: ['CAH']: ReadTimeout(ReadTimeoutError("HTTPSConnectionPool(host='query2.finance.yahoo.com', port=443): Read timed out. (read timeout=10)"))
[14:49:17] INFO: Processed 100/503 symbols
[14:50:21] ERROR: 
1 Failed download:
[14:50:21] ERROR: ['CVS']: ConnectionError(ReadTimeoutError("HTTPSConnectionPool(host='query2.finance.yahoo.com', port=443): Read timed out."))
[14:50:28] INFO: Processed 150/503 symbols
[14:54:15] ERROR: 
1 Failed download:
[14:54:15] ERROR: ['EVRG']: ConnectionError(ReadTimeoutError("HTTPSConnectionPool(host='query2.finance.yahoo.com', port=443): Read timed out."))
[14:54:15] ERROR: 
1 Failed download:
[14:54:15] ERROR: ['ES']: ConnectionError(MaxRetryError('HTTPSConnectionPool(


Availability Database Summary:
Total symbols tracked: 178


In [12]:
def validate_symbols_from_db(availability_db, start_date, end_date, min_days=50):
    """
    Validate symbols using the availability database
    
    Parameters:
    - availability_db: Dictionary containing symbol availability data
    - start_date: datetime, start of backtest period
    - end_date: datetime, end of backtest period
    - min_days: int, minimum number of trading days required
    
    Returns:
    - List of valid symbols
    """
    valid_symbols = []
    
    # Convert datetime to date if necessary
    if isinstance(start_date, datetime):
        start_date = start_date.date()
    if isinstance(end_date, datetime):
        end_date = end_date.date()
    
    for symbol, availability in availability_db.items():
        if (availability['start_date'] <= start_date and 
            availability['end_date'] >= end_date and 
            availability['total_days'] >= min_days):
            valid_symbols.append(symbol)
    
    return valid_symbols

def main():
    """Main function to run the backtest"""
    try:
        # User-defined backtest period
        start_date = datetime(2013, 1, 1)
        end_date = datetime(2017, 1, 1)

        # Validate symbols using the availability database
        valid_symbols = validate_symbols_from_db(
            symbol_availability_db, 
            start_date, 
            end_date, 
            min_days=50
        )
        
        if not valid_symbols:
            logger.error("No valid symbols found for the specified period.")
            return None, None, None, None

        # Run the backtest with validated symbols
        logger.info(f"Starting backtest with {len(valid_symbols)} symbols...")
        results, cerebro, start, end = run_backtest(
            symbols=valid_symbols,
            start_date=start_date,
            end_date=end_date,
            initial_capital=1e7
        )
        logger.info("Backtest completed successfully.")

        return results, cerebro, start, end

    except Exception as e:
        logger.error(f"An error occurred during the backtest: {e}")
        return None, None, None, None

# Run the backtest
results, cerebro, start, end = main()
if results is None:
    logger.error("Backtest failed to complete successfully")
else:
    print("Backtest completed successfully")

[14:54:20] INFO: Starting backtest with 162 symbols...
[14:54:20] INFO: Starting backtest with 162 symbols...
[14:54:20] ERROR: 
1 Failed download:
[14:54:20] ERROR: ['MMM']: ConnectionError(MaxRetryError('HTTPSConnectionPool(host=\'query2.finance.yahoo.com\', port=443): Max retries exceeded with url: /v8/finance/chart/%ticker%?period1=1357016400&period2=1483246800&interval=1d&includePrePost=False&events=div%2Csplits%2CcapitalGains&crumb=FII%2FOTMD0ny (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x13acc8100>: Failed to resolve \'query2.finance.yahoo.com\' ([Errno 8] nodename nor servname provided, or not known)"))'))
[14:54:20] ERROR: 
1 Failed download:
[14:54:20] ERROR: ['AOS']: ConnectionError(MaxRetryError('HTTPSConnectionPool(host=\'query2.finance.yahoo.com\', port=443): Max retries exceeded with url: /v8/finance/chart/%ticker%?period1=1357016400&period2=1483246800&interval=1d&includePrePost=False&events=div%2Csplits%2CcapitalGains&crumb=FII%2FOTMD

## Persist Results for use with `pyfolio`

In [13]:
def analyze_backtest_results(cerebro, results):
    """Analyze backtest results and create visualizations"""

    # Extract strategy instance
    strat = results[0]

    # Get portfolio values and associated datetimes from Backtrader
    portfolio_values = strat.observers.value.lines.value.array
    dates = [bt.num2date(d) for d in strat.datas[0].datetime.array]

    # Ensure lengths match: truncate the longer one if needed
    min_length = min(len(portfolio_values), len(dates))
    portfolio_values = portfolio_values[:min_length]
    dates = dates[:min_length]

    # Create the portfolio value series using the correct length and date range
    portfolio_series = pd.Series(portfolio_values, index=pd.to_datetime(dates))

    # Drop duplicate dates to ensure we have unique entries
    portfolio_series = portfolio_series[~portfolio_series.index.duplicated(keep='first')]

    # Ensure the index is a regular datetime index with no missing dates (continuous time series)
    complete_index = pd.date_range(start=portfolio_series.index.min(), end=portfolio_series.index.max(), freq='D')
    portfolio_series = portfolio_series.reindex(complete_index).ffill()

    # Calculate daily returns
    returns = portfolio_series.pct_change().dropna()

    # Extract orders and create a transactions DataFrame
    transactions = []
    for order in strat._orders:
        if order.status == bt.Order.Completed:
            transactions.append({
                'dt': bt.num2date(order.executed.dt),
                'symbol': order.data._name,
                'amount': order.executed.size,
                'price': order.executed.price,
                'txn_dollars': order.executed.size * order.executed.price
            })
    transactions_df = pd.DataFrame(transactions)

    if not transactions_df.empty:
        transactions_df.set_index('dt', inplace=True)

    # Save results to HDF5
    with pd.HDFStore('backtests.h5') as store:
        store.put('returns/mean_reversion', returns)
        if not transactions_df.empty:
            store.put('transactions/mean_reversion', transactions_df)

    # Create visualization
    fig, axes = plt.subplots(nrows=2, figsize=(14, 8))

    # Plot cumulative returns
    cum_returns = (1 + returns).cumprod() - 1
    cum_returns.plot(ax=axes[0], title='Cumulative Returns')
    axes[0].set_ylabel('Return (%)')

    # Plot cumulative transactions
    if not transactions_df.empty:
        transactions_df['txn_dollars'].cumsum().plot(
            ax=axes[1], title='Cumulative Transactions'
        )
        axes[1].set_ylabel('Transaction Value ($)')

    sns.despine()
    plt.tight_layout()

    # Save the analysis plots to a file
    plt.savefig('mean_reversion_analysis.png')
    plt.show()

    # Generate basic statistics
    stats = {
        "Total Return": f"{(cum_returns[-1] * 100):.2f}%",
        "Mean Daily Return": f"{(returns.mean() * 100):.4f}%",
        "Volatility (Std Dev)": f"{(returns.std() * 100):.4f}%",
        "Max Drawdown": f"{(cum_returns.min() * 100):.2f}%",
        "Sharpe Ratio": f"{(returns.mean() / returns.std()):.4f}" if returns.std() != 0 else "N/A"
    }

    # Print the statistics to console
    print("\nBacktest Analysis Statistics:")
    for key, value in stats.items():
        print(f"{key}: {value}")

    return returns, transactions_df

In [14]:
def compare_strategies(returns_dict, transactions_dict):
    """Compare multiple strategy results"""
    fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(16, 10))
    
    # Plot returns
    for name, returns in returns_dict.items():
        cum_returns = (1 + returns).cumprod() - 1
        cum_returns.plot(ax=axes[0][0], label=name)
    axes[0][0].set_title('Cumulative Returns Comparison')
    axes[0][0].legend()
    
    # Plot transactions
    for name, txns in transactions_dict.items():
        if not txns.empty:
            txns.groupby(txns.index.date)['txn_dollars'].sum().cumsum().plot(
                ax=axes[0][1], label=name)
    axes[0][1].set_title('Cumulative Transactions Comparison')
    axes[0][1].legend()
    
    # Add performance metrics
    metrics = []
    for name, returns in returns_dict.items():
        sharpe = qs.stats.sharpe(returns)
        max_dd = qs.stats.max_drawdown(returns)
        total_return = (cum_returns.iloc[-1]) if not returns.empty else 0.0
        metrics.append({
            'Strategy': name,
            'Sharpe Ratio': sharpe,
            'Max Drawdown': max_dd,
            'Total Return': total_return
        })
    
    metrics_df = pd.DataFrame(metrics).set_index('Strategy')
    
    # Plot metrics
    metrics_df[['Sharpe Ratio']].plot(kind='bar', ax=axes[1][0])
    axes[1][0].set_title('Sharpe Ratio Comparison')
    metrics_df[['Max Drawdown']].plot(kind='bar', ax=axes[1][1])
    axes[1][1].set_title('Max Drawdown Comparison')
    
    plt.tight_layout()
    
    return metrics_df

In [15]:
# Example usage:
def run_analysis(cerebro, results, start_date, end_date):
    """Analyze and compare strategy results"""
    
    # Analyze mean reversion strategy
    returns, transactions = analyze_backtest_results(cerebro, results)
    
    # Load equal weight strategy results (if available)
    try:
        with pd.HDFStore('backtests.h5') as store:
            returns_ew = store['returns/equal_weight']
            tx_ew = store['transactions/equal_weight']
            
            # Compare strategies
            returns_dict = {
                'Mean Reversion': returns,
                'Equal Weight': returns_ew
            }
            
            transactions_dict = {
                'Mean Reversion': transactions,
                'Equal Weight': tx_ew
            }
            
            metrics_df = compare_strategies(returns_dict, transactions_dict)
            print("\nStrategy Comparison:")
            print(metrics_df)
            
    except KeyError:
        print("Equal weight strategy results not found. Showing only mean reversion results.")
    
    return returns, transactions

In [16]:
import matplotlib.pyplot as plt
import seaborn as sns

returns, transactions = run_analysis(cerebro, results, start, end)


TypeError: 'NoneType' object is not subscriptable