In [1]:
# Import libraries
import yfinance as yf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

  _empty_series = pd.Series()


In [2]:
# Set up the list of the S&P500 companies
sp500url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
sp500list = pd.read_html(sp500url)
sp500df = sp500list[0]
sp500df.head()
symbols = list(sp500df['Symbol'].values)

In [6]:
# Download daily data from 2000
df = yf.download(symbols,start='2000-01-01')

[*********************100%%**********************]  503 of 503 completed

2 Failed downloads:
['BRK.B']: Exception('%ticker%: No timezone found, symbol may be delisted')
['BF.B']: Exception('%ticker%: No price data found, symbol may be delisted (1d 2000-01-01 -> 2024-02-16)')


### Adjusting for failed downloads
As we can see tickers 'BRK.B' and 'BF.B' were not found in the yfinance library since yfinance and Wikipedia use different separators for these tickers (yfinance uses '-' instead of '.'). Therefore, we replace the tickers for these companies in the code cell below. 

In [20]:
# Fix the names
sp500df.at[62,'Symbol'] = 'BRK-B'
sp500df.at[78,'Symbol'] = 'BF-B'    
symbols = list(sp500df['Symbol'].values)

In [21]:
# Fixed for BRK-B and BF-B
all_sp_500 = yf.download(symbols,start='2000-01-01') 

[*********************100%%**********************]  503 of 503 completed


In [52]:
# Transform multi-level index to single-level index
all_sp_500
adj_close = all_sp_500["Adj Close"]
adj_close
#single_level["Date"]
#single_level['Date'] = pd.to_datetime(single_level['Date'])
# all_sp_500 = all_sp_500.sort_values(by=['Date','Ticker'])
# all_sp_500

Ticker,A,AAL,AAPL,ABBV,ABNB,ABT,ACGL,ACN,ADBE,ADI,...,WYNN,XEL,XOM,XRAY,XYL,YUM,ZBH,ZBRA,ZION,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000-01-03,43.683865,,0.846127,,,9.037449,1.277778,,16.274673,28.572287,...,,7.053044,18.328703,6.669167,,4.703721,,25.027779,35.769436,
2000-01-04,40.346893,,0.774790,,,8.779236,1.270833,,14.909400,27.126852,...,,7.215448,17.977634,6.669167,,4.609174,,24.666668,34.037354,
2000-01-05,37.844173,,0.786128,,,8.763100,1.388889,,15.204173,27.522863,...,,7.493861,18.957693,6.784459,,4.632809,,25.138889,33.997089,
2000-01-06,36.403221,,0.718097,,,9.069728,1.375000,,15.328291,26.770441,...,,7.424255,19.937767,6.793329,,4.593416,,23.777779,34.480446,
2000-01-07,39.436817,,0.752113,,,9.166556,1.451389,,16.072987,27.522863,...,,7.424255,19.879238,6.775589,,4.490988,,23.513889,34.561012,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-02-12,132.309998,14.930,187.149994,173.070007,153.800003,112.529999,83.099998,367.429993,611.840027,194.050003,...,105.720001,58.970001,102.220001,33.520000,123.300003,131.360001,125.849998,259.399994,40.810200,196.690002
2024-02-13,129.759995,14.610,185.039993,173.289993,150.820007,111.339996,84.059998,362.809998,601.890015,184.690002,...,104.199997,58.279999,101.339996,32.450001,121.730003,131.210007,123.160004,245.800003,38.930000,183.490005
2024-02-14,132.240005,14.860,184.149994,174.419998,148.199997,112.059998,84.870003,370.980011,604.659973,186.320007,...,105.209999,58.680000,100.839996,32.669998,124.160004,134.009995,121.839996,252.960007,40.419998,184.080002
2024-02-15,134.750000,14.900,183.860001,176.589996,157.690002,113.639999,87.400002,371.510010,590.440002,187.539993,...,105.730003,59.009998,103.730003,33.860001,124.419998,135.399994,124.400002,283.750000,41.520000,189.649994


In [None]:
adj_close_quarterly = all_sp_500_quarterly['Adj Close']


In [None]:
# Assuming all_sp_500_quarterly has a datetime index
all_sp_500_quarterly['Year'] = all_sp_500_quarterly.index.year

# Filter rows for the desired years (1992 up until 2023)
selected_rows = all_sp_500_quarterly[(all_sp_500_quarterly['Year'] >= 1992) & (all_sp_500_quarterly['Year'] <= 2023)]

# Select the 'Adj Close' column from the filtered rows
adj_close_quarterly = selected_rows['Adj Close']

# Drop the 'Year' column if it was added for filtering
selected_rows = selected_rows.drop('Year', axis=1, errors='ignore')

In [None]:
filtered_adjusted_close = adj_close_quarterly
filtered_adjusted_close.head()
transposed_and_filtered_adjusted_close = filtered_adjusted_close.transpose()
transposed_and_filtered_adjusted_close.head()


In [None]:
# Save the transposed and filtered DataFrame to a CSV file
transposed_and_filtered_adjusted_close.to_csv('transposed_and_filtered_adjusted_close.csv', index=True)
