In [1]:
import yfinance as yf
import pandas as pd
import datetime

# Get the current date as the end date
end_date = datetime.datetime.today().strftime('%Y-%m-%d')

# Example list of S&P 500 tickers (full list can be obtained elsewhere)
sp500_tickers = ["AAPL", "NVDA", "MSFT", "GOOG", "GOOGL", "AMZN", "META", "AVGO", "LLY", "TSLA", 
                 "WMT", "JPM", "V", "XOM", "UNH", "ORCL", "MA", "HD", "PG", "COST", "JNJ", 
                 "NFLX", "ABBV", "BAC", "KO", "CRM", "CVX", "MRK", "TMUS", "AMD", "PEP", 
                 "ACN", "LIN", "TMO", "MCD", "CSCO", "ADBE", "WFC", "IBM", "GE", "ABT", 
                 "DHR", "AXP", "MS", "CAT", "NOW", "QCOM", "PM", "ISRG", "VZ"]

# Download data for all tickers at once
data = yf.download(sp500_tickers, start='2003-01-01', end=end_date, group_by='ticker')

# The data now has a MultiIndex column: first level is price type, second level is ticker
# To have a cleaner dataframe, we can stack the price types and unstack the tickers

# Now, 'close_data' is a dataframe with dates as index and tickers as columns
print(data.head())


[*********************100%***********************]  50 of 50 completed


Ticker       AMD                                       TSLA                 \
Price       Open  High   Low Close Adj Close    Volume Open High Low Close   
Date                                                                         
2003-01-02  6.56  7.11  6.47  7.01      7.01  17474800  NaN  NaN NaN   NaN   
2003-01-03  7.01  7.19  6.86  6.94      6.94   6235700  NaN  NaN NaN   NaN   
2003-01-06  7.05  7.23  7.00  7.16      7.16   6833200  NaN  NaN NaN   NaN   
2003-01-07  7.25  7.48  7.10  7.17      7.17   9785100  NaN  NaN NaN   NaN   
2003-01-08  7.06  7.10  6.63  6.69      6.69  13952300  NaN  NaN NaN   NaN   

Ticker      ...         VZ                                      AAPL  \
Price       ...        Low      Close  Adj Close    Volume      Open   
Date        ...                                                        
2003-01-02  ...  35.136539  36.125793  12.283099   7294044  0.256429   
2003-01-03  ...  35.847004  36.377605  12.368718   5745323  0.264286   
2003-01-06  ...

In [2]:
print(data.tail(1))

Ticker             AMD                                                  \
Price             Open        High         Low       Close   Adj Close   
Date                                                                     
2024-11-26  142.550003  142.800003  136.619995  137.720001  137.720001   

Ticker                 TSLA                                      ...     VZ  \
Price         Volume   Open        High         Low       Close  ...    Low   
Date                                                             ...          
2024-11-26  32092400  341.0  346.959991  335.660004  338.230011  ...  43.75   

Ticker                                            AAPL              \
Price           Close  Adj Close    Volume        Open        High   
Date                                                                 
2024-11-26  44.369999  44.369999  17247000  233.330002  235.570007   

Ticker                                                    
Price              Low       Close   Adj Close

In [3]:
data.to_parquet('sp500_50stocks_data.parquet')

In [4]:
data.columns

MultiIndex([( 'AMD',      'Open'),
            ( 'AMD',      'High'),
            ( 'AMD',       'Low'),
            ( 'AMD',     'Close'),
            ( 'AMD', 'Adj Close'),
            ( 'AMD',    'Volume'),
            ('TSLA',      'Open'),
            ('TSLA',      'High'),
            ('TSLA',       'Low'),
            ('TSLA',     'Close'),
            ...
            (  'VZ',       'Low'),
            (  'VZ',     'Close'),
            (  'VZ', 'Adj Close'),
            (  'VZ',    'Volume'),
            ('AAPL',      'Open'),
            ('AAPL',      'High'),
            ('AAPL',       'Low'),
            ('AAPL',     'Close'),
            ('AAPL', 'Adj Close'),
            ('AAPL',    'Volume')],
           names=['Ticker', 'Price'], length=300)