In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from Backtest import Backtest

In [11]:
import pandas as pd
from datetime import datetime
import os
from pathlib import Path
import logging
import yaml
import sys

# Set up logging with more detailed format
logging.basicConfig(
    level=logging.DEBUG,  # Changed to DEBUG level
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    stream=sys.stdout  # Ensure output goes to notebook
)
logger = logging.getLogger(__name__)

def setup_directories():
    """Create necessary directories for the backtest system."""
    try:
        # Define base directories
        __file__ = os.path.abspath('main.ipynb')
        base_dir = os.path.abspath(os.path.dirname(__file__))
        data_dir = os.path.join(base_dir, 'data')
        db_dir = os.path.join(data_dir, 'db')
        cache_dir = os.path.join(data_dir, 'cache')
        
        logger.debug(f"Setting up directories:")
        logger.debug(f"Base dir: {base_dir}")
        logger.debug(f"Data dir: {data_dir}")
        logger.debug(f"DB dir: {db_dir}")
        logger.debug(f"Cache dir: {cache_dir}")
        
        # Create directories
        Path(data_dir).mkdir(exist_ok=True)
        Path(db_dir).mkdir(exist_ok=True)
        Path(cache_dir).mkdir(exist_ok=True)
        
        # Verify directories were created
        for dir_path in [data_dir, db_dir, cache_dir]:
            if not os.path.exists(dir_path):
                raise RuntimeError(f"Failed to create directory: {dir_path}")
            else:
                logger.debug(f"Verified directory exists: {dir_path}")
        
        # Create default config if it doesn't exist
        config_path = os.path.join(base_dir, 'config.yaml')
        if not os.path.exists(config_path):
            logger.debug("Creating default config.yaml")
            default_config = {
                'cache': {
                    'max_memory_cache_size': 1000,
                    'cache_expiry_days': 1,
                    'update_frequency': '1d',
                    'compression_type': 'parquet'
                },
                'download': {
                    'max_retries': 3,
                    'retry_delay': 5,
                    'batch_size': 100,
                    'timeout': 30
                },
                'validation': {
                    'min_data_points': 50,
                    'max_missing_pct': 0.1,
                    'price_threshold': 0.01
                }
            }
            with open(config_path, 'w') as f:
                yaml.dump(default_config, f)
            logger.debug("Created config.yaml successfully")
        else:
            logger.debug("config.yaml already exists")
        
        return {
            'base_dir': base_dir,
            'data_dir': data_dir,
            'db_dir': db_dir,
            'cache_dir': cache_dir,
            'config_path': config_path
        }
    except Exception as e:
        logger.error(f"Error in setup_directories: {str(e)}")
        raise

def test_backtest():
    """Test the Backtest class and its data fetching capabilities."""
    logger.info("Starting backtest test")
    
    # Set up test parameters
    symbols = ['AAPL', 'GOOGL', 'MSFT', 'AMZN', 'META']
    start_date = '2023-01-01'
    end_date = '2024-01-01'
    
    try:
        # Set up directories first
        logger.info("Setting up directories...")
        dirs = setup_directories()
        
        logger.info("Testing with directories:")
        for key, value in dirs.items():
            logger.info(f"{key}: {value}")
            
        # Verify config file
        if not os.path.exists(dirs['config_path']):
            raise RuntimeError(f"Config file not found at {dirs['config_path']}")
        
        # Initialize Backtest with proper config path
        logger.info("Initializing Backtest class...")
        backtest = Backtest(config_path=dirs['config_path'])
        
        # Fetch historical data
        logger.info(f"Fetching historical data for symbols: {symbols}")
        historical_data = backtest.fetch_historical_data(symbols, start_date, end_date)
        
        # Verify data
        if not historical_data:
            logger.warning("No historical data was returned!")
        else:
            logger.info(f"Retrieved data for {len(historical_data)} symbols")
            
            # Print summary statistics for each symbol
            for symbol, df in historical_data.items():
                logger.info(f"\nSummary for {symbol}:")
                logger.info(f"Date Range: {df.index.min()} to {df.index.max()}")
                logger.info(f"Number of trading days: {len(df)}")
                logger.info("\nPrice Statistics:")
                logger.info(df['Close'].describe())
        
        return historical_data
        
    except Exception as e:
        logger.error(f"Error during testing: {str(e)}")
        logger.error("Stack trace:", exc_info=True)
        raise

if __name__ == "__main__":
    historical_data = test_backtest()

2024-12-18 14:27:31,262 - __main__ - INFO - Starting backtest test
2024-12-18 14:27:31,262 - __main__ - INFO - Setting up directories...
2024-12-18 14:27:31,264 - __main__ - INFO - Testing with directories:
2024-12-18 14:27:31,264 - __main__ - INFO - base_dir: /Users/calvinseamons/Repositories/NivlacSignals/CS470_Project/ML_Backtesting
2024-12-18 14:27:31,264 - __main__ - INFO - data_dir: /Users/calvinseamons/Repositories/NivlacSignals/CS470_Project/ML_Backtesting/data
2024-12-18 14:27:31,265 - __main__ - INFO - db_dir: /Users/calvinseamons/Repositories/NivlacSignals/CS470_Project/ML_Backtesting/data/db
2024-12-18 14:27:31,265 - __main__ - INFO - cache_dir: /Users/calvinseamons/Repositories/NivlacSignals/CS470_Project/ML_Backtesting/data/cache
2024-12-18 14:27:31,265 - __main__ - INFO - config_path: /Users/calvinseamons/Repositories/NivlacSignals/CS470_Project/ML_Backtesting/config.yaml
2024-12-18 14:27:31,266 - __main__ - INFO - Initializing Backtest class...
2024-12-18 14:27:31,266 -


=== Starting get_data ===
Input symbols: ['AAPL', 'GOOGL', 'MSFT', 'AMZN', 'META']
Date range: 2023-01-01 00:00:00 to 2024-01-01 00:00:00

Validating symbols...

=== Starting symbol validation ===
Validating 5 symbols: ['AAPL', 'GOOGL', 'MSFT', 'AMZN', 'META']

Checking symbol: AAPL
Database query result for AAPL: (1, '2023-01-03', '2023-12-29')
Date range in DB: 2023-01-03 to 2023-12-29
Adding AAPL to valid symbols - date range acceptable

Checking symbol: GOOGL
Database query result for GOOGL: (1, '2023-01-03', '2023-12-29')
Date range in DB: 2023-01-03 to 2023-12-29
Adding GOOGL to valid symbols - date range acceptable

Checking symbol: MSFT
Database query result for MSFT: (1, '2023-01-03', '2023-12-29')
Date range in DB: 2023-01-03 to 2023-12-29
Adding MSFT to valid symbols - date range acceptable

Checking symbol: AMZN
Database query result for AMZN: (1, '2023-01-03', '2023-12-29')
Date range in DB: 2023-01-03 to 2023-12-29
Adding AMZN to valid symbols - date range acceptable

Ch