In [27]:
import bs4 as bs
import datetime as dt
import os
import pandas as pd
import numpy as np
import yfinance as yf
import pickle 
import requests

def save_sp500_tickers():
    resp = requests.get('http://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
    soup = bs.BeautifulSoup(resp.text, 'lxml')
    table = soup.find('table', {'class': 'wikitable sortable'})
    tickers = []
    names = []
    symbols=[[]]
    for row in table.findAll('tr')[1:]:
        ticker = row.findAll('td')[0].text.replace('.', '-')
        ticker = ticker[:-1]
        tickers.append(ticker)
        name = row.findAll('td')[1].text
        name = name
        names.append(name)

    with open("sp500tickers.pickle", "wb") as f:
        pickle.dump(symbols, f)
    symbols=np.column_stack((tickers,names))
    return symbols
symbols=save_sp500_tickers()


In [28]:
print(len(symbols))
print(symbols)

502
[['MMM' '3M']
 ['AOS' 'A. O. Smith']
 ['ABT' 'Abbott']
 ...
 ['ZBH' 'Zimmer Biomet']
 ['ZION' 'Zions Bancorporation']
 ['ZTS' 'Zoetis']]


In [29]:

def get_data_from_yahoo(reload_sp500=True):
    if reload_sp500:
        symbols = save_sp500_tickers()
    else:
        with open("sp500tickers.pickle", "rb") as f:
            symbols = pickle.load(f)
    if not os.path.exists('stock_dfs'):
        os.makedirs('stock_dfs')

    start_date='2023-1-1'
    end_date = '2023-6-30'
    for i in range(len(symbols)):
        ticker=symbols[i][0]

        if not os.path.exists('stock_dfs/{}.csv'.format(ticker)):

            tickerData = yf.Ticker(ticker)
            df = tickerData.history(interval='1d', start=start_date, end=end_date)
            df.reset_index(inplace=True)
            df.set_index("Date", inplace=True)
            df['Name'] = symbols[i][1]
            df['Symbol'] = symbols[i][0]
            df.to_csv('stock_dfs/{}.csv'.format(ticker))
        else:
            print('Already have {}'.format(ticker))


get_data_from_yahoo()

Already have MMM
Already have AOS
Already have ABT
Already have ABBV
Already have ACN
Already have ADM
Already have ADBE
Already have ADP
Already have AES
Already have AFL
Already have A
Already have ABNB
Already have APD
Already have AKAM
Already have ALK
Already have ALB
Already have ARE
Already have ALGN
Already have ALLE
Already have LNT
Already have ALL
Already have GOOGL
Already have GOOG
Already have MO
Already have AMZN
Already have AMCR
Already have AMD
Already have AEE
Already have AAL
Already have AEP
Already have AXP
Already have AIG
Already have AMT
Already have AWK
Already have AMP
Already have AME
Already have AMGN
Already have APH
Already have ADI
Already have ANSS
Already have AON
Already have APA
Already have AAPL
Already have AMAT
Already have APTV
Already have ACGL
Already have ANET
Already have AJG
Already have AIZ
Already have T
Already have ATO
Already have ADSK
Already have AZO
Already have AVB
Already have AVY
Already have AXON
Already have BKR
Already have BAL

In [30]:
def compile_data():
    '''
    with open("sp500tickers.pickle", "rb") as f:
        symbols= pickle.load(f)
    '''
    #symbols = save_sp500_tickers()
    main_df = pd.DataFrame()

    for i in range(len(symbols)):
        ticker=symbols[i][0]
        df = pd.read_csv('stock_dfs/{}.csv'.format(ticker))
        df.set_index('Date', inplace=True)
       # df.drop(['Open', 'High', 'Low', 'Adj Close', 'Volume'],1,inplace=True)
       
        if main_df.empty:
            main_df = df
        else:
            main_df=pd.concat([main_df,df])

    return main_df

compile_data()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Name,Symbol,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-01-03 00:00:00-05:00,116.395598,117.468371,115.294099,117.305542,2612800,0.0,0.0,3M,MMM,
2023-01-04 00:00:00-05:00,118.148422,120.006616,117.535411,119.872520,2769700,0.0,0.0,3M,MMM,
2023-01-05 00:00:00-05:00,118.972160,119.316980,117.295956,117.774872,2606600,0.0,0.0,3M,MMM,
2023-01-06 00:00:00-05:00,119.403191,121.769027,118.531561,121.376320,2417000,0.0,0.0,3M,MMM,
2023-01-09 00:00:00-05:00,121.644503,124.000773,120.792034,121.443359,2871300,0.0,0.0,3M,MMM,
...,...,...,...,...,...,...,...,...,...,...
2023-06-23 00:00:00-04:00,168.728928,169.307642,166.823116,168.000519,2412100,0.0,0.0,Zoetis,ZTS,
2023-06-26 00:00:00-04:00,167.541546,168.000529,164.188919,167.172348,1729200,0.0,0.0,Zoetis,ZTS,
2023-06-27 00:00:00-04:00,168.429595,170.714563,166.783206,170.295486,1452300,0.0,0.0,Zoetis,ZTS,
2023-06-28 00:00:00-04:00,170.016090,170.425194,168.379690,168.728928,1686800,0.0,0.0,Zoetis,ZTS,


In [31]:
main_df=compile_data()

In [32]:
main_df.head()


Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Name,Symbol,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-01-03 00:00:00-05:00,116.395598,117.468371,115.294099,117.305542,2612800,0.0,0.0,3M,MMM,
2023-01-04 00:00:00-05:00,118.148422,120.006616,117.535411,119.87252,2769700,0.0,0.0,3M,MMM,
2023-01-05 00:00:00-05:00,118.97216,119.31698,117.295956,117.774872,2606600,0.0,0.0,3M,MMM,
2023-01-06 00:00:00-05:00,119.403191,121.769027,118.531561,121.37632,2417000,0.0,0.0,3M,MMM,
2023-01-09 00:00:00-05:00,121.644503,124.000773,120.792034,121.443359,2871300,0.0,0.0,3M,MMM,


In [33]:
ver=dt.datetime.today().strftime('%Y-%m-%d')
ver

'2023-10-17'

In [34]:
compression_opts = dict(method='zip',

                        archive_name=str(ver)+'_SP500.csv')  

main_df.to_csv(str(ver)+'_SP500.csv', index=False,

          compression=compression_opts)  
