In [None]:
import pandas as pd
import numpy as np
import datetime as dt
import yfinance as yf
import statsmodels.api as sm
from statsmodels.regression.rolling import RollingOLS
import matplotlib.pyplot as plt
import ta
import warnings
import requests
warnings.filterwarnings("ignore")

# Scrape the S&P 500 constituents table from Wikipedia
url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
response = requests.get(url, headers=headers)
sp500 = pd.read_html(response.text)[0]

# Standardize ticker symbols and extract a unique list of S&P 500 tickers
sp500['Symbol'] = sp500['Symbol'].str.replace('.', '-')
symbols_list = sp500['Symbol'].unique().tolist()

end_date = '2025-12-15'
start_date = pd.to_datetime(end_date) - pd.DateOffset(365*8)

# Download data
df = yf.download(tickers=symbols_list, start=start_date, end=end_date).stack()
df.index.names = ['date', 'ticker']
df.columns = df.columns.str.lower()

df

In [None]:
# Calculate Garman-Klass volatility using high, low, open and adjusted close prices
df['garman_klass_vol'] = ((np.log(df['high']) - np.log(df['low']))**2 / 2 - (2 * np.log(2) - 1) * (np.log(df['close']) - np.log(df['open']))**2)

# Calculate rsi on each stock
df['rsi'] = df.groupby(level=1)['close'].transform(lambda x: ta.momentum.rsi(close=x, window=20))

# Calculate Bollinger Bands for all stocks
df['bb_low'] = df.groupby(level=1)['close'].transform(lambda x: ta.volatility.BollingerBands(close=np.log1p(x), window=20).bollinger_lband())

df['bb_mid'] = df.groupby(level=1)['close'].transform(lambda x: ta.volatility.BollingerBands(close=np.log1p(x), window=20).bollinger_mavg())

df['bb_high'] = df.groupby(level=1)['close'].transform(lambda x: ta.volatility.BollingerBands(close=np.log1p(x), window=20).bollinger_hband())

# Compute a z‑score normalized 14‑day ATR(Average True Range) to capture each stock’s relative volatility profile
def compute_atr(stock_data):
    atr_indicator = ta.volatility.AverageTrueRange(
        high=stock_data['high'],
        low=stock_data['low'],
        close=stock_data['close'],
        window=14
    )
    atr = atr_indicator.average_true_range()
    return atr.sub(atr.mean()).div(atr.std())

df['atr'] = df.groupby(level=1, group_keys=False).apply(compute_atr)

# Compute a z‑score normalized MACD(Moving Average Convergence Divergence) signal to capture momentum shifts in each stock’s trend
def compute_macd(close):
    macd_indicator = ta.trend.MACD(close=close, window_slow=26, window_fast=12, window_sign=9)
    macd = macd_indicator.macd()
    return macd.sub(macd.mean()).div(macd.std())

df['macd'] = df.groupby(level=1, group_keys=False)['close'].apply(compute_macd)

# Calculate dollar volume (price × shares traded) to measure liquidity in millions of dollars
df['dollar_volume'] = (df['close']*df['volume']) / 1e6
df

Unnamed: 0_level_0,Price,close,high,low,open,volume,garman_klass_vol,rsi,bb_low,bb_mid,bb_high,atr,macd,dollar_volume
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2017-12-18,A,63.697105,64.497319,63.527648,63.913630,2306300.0,0.000110,,,,,-2.656965,,146.904634
2017-12-18,AAPL,41.316124,41.498793,40.950785,40.955470,117684400.0,0.000059,,,,,-1.819492,,4862.263259
2017-12-18,ABBV,69.745361,70.391741,69.290763,69.375997,4768300.0,0.000113,,,,,-2.172871,,332.566806
2017-12-18,ABT,49.003815,49.108077,48.152327,48.239215,6053900.0,0.000098,,,,,-2.741538,,296.664194
2017-12-18,ACGL,28.961201,29.338389,28.923166,29.217944,1212300.0,0.000071,,,,,-2.030007,,35.109664
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-12-12,XYZ,64.750000,65.120003,62.799999,63.610001,6642300.0,0.000536,47.124784,4.078423,4.145287,4.212151,-0.669111,-0.379975,430.088925
2025-12-12,YUM,151.059998,151.169998,148.570007,148.899994,1894500.0,0.000070,55.902247,4.964153,5.006037,5.047920,1.343385,-0.208794,286.183165
2025-12-12,ZBH,91.589996,93.160004,90.570000,92.599998,2734200.0,0.000351,43.820179,4.484522,4.540548,4.596574,-0.415566,-0.266276,250.425368
2025-12-12,ZBRA,268.390015,274.269989,265.970001,273.630005,543200.0,0.000328,51.744682,5.418711,5.530683,5.642655,-0.375786,0.082978,145.789456
