In [2]:
import pandas as pd
import yfinance as yf

In [3]:
snp_url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
snp_table = pd.read_html(snp_url)[0]

snp_table.head()

Unnamed: 0,Symbol,Security,GICS Sector,GICS Sub-Industry,Headquarters Location,Date added,CIK,Founded
0,MMM,3M,Industrials,Industrial Conglomerates,"Saint Paul, Minnesota",1957-03-04,66740,1902
1,AOS,A. O. Smith,Industrials,Building Products,"Milwaukee, Wisconsin",2017-07-26,91142,1916
2,ABT,Abbott,Health Care,Health Care Equipment,"North Chicago, Illinois",1957-03-04,1800,1888
3,ABBV,AbbVie,Health Care,Pharmaceuticals,"North Chicago, Illinois",2012-12-31,1551152,2013 (1888)
4,ACN,Accenture,Information Technology,IT Consulting & Other Services,"Dublin, Ireland",2011-07-06,1467373,1989


In [4]:
snp_tickers = snp_table["Symbol"].to_list()
snp_tickers = [ticker.replace('.', "-") for ticker in snp_tickers]
snp_tickers_by_sector = snp_table[["Symbol", "GICS Sector"]]

snp_tickers_by_sector.head()

Unnamed: 0,Symbol,GICS Sector
0,MMM,Industrials
1,AOS,Industrials
2,ABT,Health Care
3,ABBV,Health Care
4,ACN,Information Technology


In [5]:
start_date = "1990-01-01"
end_date = "2023-09-12"

snp_prices = yf.download(snp_tickers, start_date, end_date)["Adj Close"]

[*********************100%***********************]  503 of 503 completed


In [6]:
snp_prices.head(3)

Unnamed: 0_level_0,A,AAL,AAPL,ABBV,ABT,ACGL,ACN,ADBE,ADI,ADM,...,WYNN,XEL,XOM,XRAY,XYL,YUM,ZBH,ZBRA,ZION,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1990-01-02,,,0.263761,,1.837212,,,1.18834,1.021221,4.656209,...,,4.213068,4.034966,0.855866,,,,,1.818957,
1990-01-03,,,0.265531,,1.843845,,,1.247023,0.96817,4.656209,...,,4.200066,3.99462,0.855866,,,,,1.851439,
1990-01-04,,,0.266417,,1.840528,,,1.305707,0.954908,4.630907,...,,4.096035,3.95427,0.821631,,,,,1.851439,


In [7]:
len(snp_prices)

8488

In [8]:
indices = {
    "VTI": "STOCKS",
    "AGG": "BONDS",
    "DBC": "COMMODITIES",
    "^VIX": "VOLATILITY"
}

indices_prices = yf.download(list(indices.keys()), start_date, end_date)["Adj Close"].dropna()
indices_prices.rename(columns=indices, inplace=True)

indices_prices.head(3)

[*********************100%***********************]  4 of 4 completed


Unnamed: 0_level_0,BONDS,COMMODITIES,STOCKS,VOLATILITY
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2006-02-06,58.718147,21.916014,45.468452,13.04
2006-02-07,58.677162,21.28208,45.025742,13.59
2006-02-08,58.64785,21.191517,45.349682,12.83


In [9]:
len(indices_prices)

4429

In [10]:
snp_prices_reindexed = snp_prices.reindex(indices_prices.index).dropna(axis=1)
snp_prices_reindexed.head(3)

Unnamed: 0_level_0,A,AAL,AAPL,ABT,ACGL,ACN,ADBE,ADI,ADM,ADP,...,WTW,WY,WYNN,XEL,XOM,XRAY,YUM,ZBH,ZBRA,ZION
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2006-02-06,21.155682,27.594042,2.040143,13.528105,6.003333,23.419563,39.52,25.989521,19.771849,22.702063,...,60.795563,13.990589,34.024216,9.861901,33.966381,23.323494,13.171959,58.793865,43.349998,58.16637
2006-02-07,20.705034,27.339506,2.049237,13.429254,5.937778,23.639191,39.259998,26.118898,19.871237,22.846333,...,61.587666,13.916323,33.315826,9.714868,33.188068,23.336805,13.014937,59.287476,44.0,57.862595
2006-02-08,21.033888,28.272818,2.085917,13.413301,5.885556,23.749002,38.799999,26.455246,20.288673,23.258522,...,62.731754,13.990589,33.167553,9.615091,33.276222,23.394472,12.865635,59.613647,46.18,58.307129


In [11]:
indices_prices.to_csv("data/indices.csv")

In [12]:
snp_prices_reindexed.to_csv("data/snp_prices.csv")

In [14]:
snp_tickers_by_sector.to_csv("data/stocks_by_sector.csv")