In [1]:
# Imports
import pandas as pd
import yfinance as yf
import pandas_datareader.data as web

### Getting the data

In [None]:
# Getting list of S&P500 stocks
sp_url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
sp500_constituents = pd.read_html(sp_url, header=0)[0]

In [None]:
# Replacing '.' with '-' as Yahoo Finance
for x in range(len(sp500_constituents)):
    sp500_constituents.iloc[x, 0] = sp500_constituents.iloc[x, 0].replace('.','-')

In [None]:
# Making directory for storing data
data_path = Path('./data/')
if not data_path.exists():
        print('Creating directory')
        data_path.mkdir()
    else: 
        print('Directory exists')

In [None]:
# Getting company details from Yahoo Finance
info = pd.DataFrame(columns=pd.DataFrame(pd.Series(yf.Ticker('ABT').info)).T.columns)
for symbol in sp500_constituents.Symbol:
    try:
        print(symbol)
        ticker = yf.Ticker(symbol)
        financial = pd.Series(ticker.info)
        info = info.append(pd.DataFrame(financial).T)
    except:
        print(symbol, "didn't load! Try again.")
info.set_index(keys=["symbol"], inplace=True)
info.index.name = "Ticker"
info.to_csv('./data/info.csv')

In [None]:
# Getting quarterly financials from Yahoo Finance
financials = pd.DataFrame()
for symbol in sp500_constituents.Symbol:
    try:
        #print(symbol)
        ticker = yf.Ticker(symbol)
        financial = ticker.quarterly_financials.transpose()
        financial.index.name = "Date"
        financial["Ticker"] = f'{symbol}'
        financial.reset_index(inplace=True)
        financial.set_index(["Ticker", "Date"], inplace=True)
        financials = financials.append(financial)
    except:
        print(symbol, "didn't load! Try again.")
financials.to_csv('./data/financials.csv')

In [None]:
# Getting quarterly balance sheets from Yahoo Finance
balances = pd.DataFrame()
for symbol in sp500_constituents.Symbol:
    try:
        #print(symbol)
        ticker = yf.Ticker(symbol)
        financial = ticker.quarterly_balance_sheet.transpose()
        financial.index.name = "Date"
        financial["Ticker"] = f'{symbol}'
        financial.reset_index(inplace=True)
        financial.set_index(["Ticker", "Date"], inplace=True)
        balances = balances.append(financial)
    except:
        print(symbol, "didn't load! Try again.")
balances.to_csv('./data/balances.csv')

In [None]:
# Getting quarterly cashflow statements from Yahoo Finance
cashflow = pd.DataFrame()
for symbol in sp500_constituents.Symbol:
    try:
        #print(symbol)
        ticker = yf.Ticker(symbol)
        financial = ticker.quarterly_cashflow.transpose()
        financial.index.name = "Date"
        financial["Ticker"] = f'{symbol}'
        financial.reset_index(inplace=True)
        financial.set_index(["Ticker", "Date"], inplace=True)
        cashflow = cashflow.append(financial)
    except:
        print(symbol, "didn't load! Try again.")
cashflow.to_csv('./data/cashflow.csv')

In [None]:
# Getting quarterly earnings from Yahoo Finance
earnings = pd.DataFrame()
for symbol in sp500_constituents.Symbol:
    try:
        #print(symbol)
        ticker = yf.Ticker(symbol)
        financial = ticker.quarterly_earnings
        financial["Ticker"] = f'{symbol}'
        financial.reset_index(inplace=True)
        financial.set_index(["Ticker", "Quarter"], inplace=True)
        earnings = earnings.append(financial)
    except:
        print(symbol, "didn't load! Try again.")
earnings.to_csv('./data/earnings.csv')

In [None]:
# Getting market data from Yahoo Finance
ohlcv = pd.DataFrame()
for symbol in sp500_constituents.Symbol:
    try:
        #print(symbol)
        ticker = yf.Ticker(symbol)
        market = ticker.history(start='2014-01-02', end='2020-12-31', actions=True, auto_adjust=True, back_adjust=False)
        market["Ticker"] = f'{symbol}'
        market.reset_index(inplace=True)
        market.sort_values(by=["Date"], ascending=False, inplace=True)
        market.set_index(["Ticker", "Date"], inplace=True)
        ohlcv = ohlcv.append(market.drop(columns=["Dividends", "Stock Splits"]))
    except:
        print(symbol, "didn't load! Try again.")
ohlcv.to_csv('./data/ohlcv.csv')