In [9]:
import yfinance as yf
import pandas as pd
import numpy as np

def calculate_features(df):
    """Engineers all necessary features from the raw OHLCV data."""
    # Create a copy to avoid modifying the original DataFrame
    df = df.copy()
    
    # 1. Relative Strength Index (RSI) - Fixed to handle division by zero
    delta = df['spy_close'].diff()  # Use spy_close instead of 'Close'
    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
    
    # Handle division by zero and NaN values
    rs = np.where((loss == 0) | (loss.isna()), np.nan, gain / loss)
    df['rsi'] = 100 - (100 / (1 + rs))
    
    # 2. Bollinger Bands
    df['sma_20'] = df['spy_close'].rolling(window=20).mean()
    df['std_20'] = df['spy_close'].rolling(window=20).std()
    df['bollinger_upper'] = df['sma_20'] + (df['std_20'] * 2)
    df['bollinger_lower'] = df['sma_20'] - (df['std_20'] * 2)

    # 3. Rolling Z-Score
    df['z_score_20'] = (df['spy_close'] - df['sma_20']) / df['std_20']

    # 4. Average True Range (ATR) - Fixed column references
    high_low = df['High'] - df['Low']
    high_close = np.abs(df['High'] - df['spy_close'].shift())  # Use spy_close
    low_close = np.abs(df['Low'] - df['spy_close'].shift())    # Use spy_close
    tr = pd.concat([high_low, high_close, low_close], axis=1).max(axis=1)
    df['atr'] = tr.rolling(window=14).mean()
    
    return df

def get_market_data(start_date="2005-01-01"):
    """Downloads SPY and VIX data and engineers features."""
    spy_data = yf.download('SPY', start=start_date, auto_adjust=True)
    vix_data = yf.download('^VIX', start=start_date, auto_adjust=True)

    # Reset any potential multi-level indexes and ensure clean column structure
    if isinstance(spy_data.columns, pd.MultiIndex):
        spy_data.columns = spy_data.columns.droplevel(1)  # Remove the second level if it exists
    if isinstance(vix_data.columns, pd.MultiIndex):
        vix_data.columns = vix_data.columns.droplevel(1)  # Remove the second level if it exists
    
    # Select only the columns we need
    spy_data = spy_data[['Open', 'High', 'Low', 'Close', 'Volume']].copy()
    vix_data = vix_data[['Close']].copy()
    vix_data = vix_data.rename(columns={'Close': 'vix'})
    
    # Align the date indices properly
    common_dates = spy_data.index.intersection(vix_data.index)
    spy_data = spy_data.loc[common_dates]
    vix_data = vix_data.loc[common_dates]
    
    # Combine into a single DataFrame using merge
    df = spy_data.copy()
    df['vix'] = vix_data['vix']
    df.rename(columns={'Close': 'spy_close'}, inplace=True)  # Rename for clarity

    # Calculate features using the function
    df = calculate_features(df)
    
    # Calculate daily returns for the backtest
    df['daily_return'] = df['spy_close'].pct_change()

    return df.dropna()

# Example usage:
if __name__ == '__main__':
    market_data = get_market_data()
    print("Data loaded and features engineered:")
    print(market_data.tail())
    print("\nColumn names:")
    print(market_data.columns.tolist())
    print(f"\nShape: {market_data.shape}")

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Data loaded and features engineered:
Price             Open        High         Low   spy_close    Volume  \
Date                                                                   
2025-09-29  664.359985  665.280029  661.859985  663.679993  73499000   
2025-09-30  662.929993  666.650024  661.609985  666.179993  86288000   
2025-10-01  663.169983  669.369995  663.059998  668.450012  72545400   
2025-10-02  670.450012  670.570007  666.780029  669.219971  56896000   
2025-10-03  669.989990  672.679993  668.159973  669.210022  70419500   

Price             vix        rsi      sma_20    std_20  bollinger_upper  \
Date                                                                      
2025-09-29  16.120001  70.684255  655.202975  8.053631       671.310238   
2025-09-30  16.280001  71.176374  656.586984  7.382584       671.352152   
2025-10-01  16.290001  68.527029  657.911475  6.985894       671.883263   
2025-10-02  16.629999  69.660078  659.006207  6.955112       672.916432   
2025-10-


