## Get Stock Symbols from S&P 500

In [48]:
import yfinance as yf
import pandas as pd
import datetime as dt
import json

In [49]:
data = pd.read_html(
    'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')

In [50]:
sp500 = data[0]
sp500 = sp500[[
    'Symbol', 'Security', 'GICS Sector', 'GICS Sub-Industry',
    'Date first added'
]]
sp500.head(10)

Unnamed: 0,Symbol,Security,GICS Sector,GICS Sub-Industry,Date first added
0,MMM,3M Company,Industrials,Industrial Conglomerates,1976-08-09
1,ABT,Abbott Laboratories,Health Care,Health Care Equipment,1964-03-31
2,ABBV,AbbVie Inc.,Health Care,Pharmaceuticals,2012-12-31
3,ABMD,Abiomed,Health Care,Health Care Equipment,2018-05-31
4,ACN,Accenture,Information Technology,IT Consulting & Other Services,2011-07-06
5,ATVI,Activision Blizzard,Communication Services,Interactive Home Entertainment,2015-08-31
6,ADBE,Adobe Inc.,Information Technology,Application Software,1997-05-05
7,AMD,Advanced Micro Devices,Information Technology,Semiconductors,2017-03-20
8,AAP,Advance Auto Parts,Consumer Discretionary,Automotive Retail,2015-07-09
9,AES,AES Corp,Utilities,Independent Power Producers & Energy Traders,1998-10-02


In [51]:
sp500_symbols = sp500['Symbol'].values.tolist()
sp500_sectors = list(set(sp500['GICS Sector'].values.tolist()))

sp500_symbols_new = []

for symbol in sp500_symbols:
    if '.' in symbol:
        sp500_symbols_new.append(symbol.replace('.', '-'))
    else:
        sp500_symbols_new.append(symbol)

sp500_symbols = sp500_symbols_new

## Retrieve Dividend Data

In [None]:
div_data = {}
year_threshold = 40

#sp500_symbols = ['MMM']

for symbol in sp500_symbols:
    prices = yf.Ticker(symbol).history(period='max')
    dividends = prices[prices['Dividends'] > 0]
    if len(dividends) > 0:
        first_year = dividends.index[0].year
    else:
        continue
    last_year = dt.datetime.today().year
    annual_div = {}

    # get annual dividend sum from first year it paid out div
    for year in range(first_year, last_year):
        div_sum = dividends[dividends.index.year == year]['Dividends'].sum()
        if div_sum == 0:  # dividend discontinued
            break
        annual_div[year] = div_sum
    # min # years
    # div_data[symbol][0] = annual dividend sum
    # div_data[symbol][1] = # years dividends were paid out

    additionals = {}
    additionals['consecutive_yrs'] = len(annual_div)
    if len(annual_div) >= year_threshold:
        div_data[symbol] = []
        div_data[symbol].append(annual_div)
        div_data[symbol].append(additionals)

In [None]:
div_data

## Calculate 5Y, 10Y, 25Y Average Dividend Growth Rate

In [None]:
last_year = dt.datetime.today().year - 1
five_year = last_year - 5
ten_year = last_year - 10
twenty_five_year = last_year - 25

In [None]:
for symbol in sp500_symbols:
    rate_change = []
    for year in range(five_year, last_year + 1):
        rate_change.append(100 * (div_data[symbol][0][year] - div_data[symbol][0][year-1])/div_data[symbol][0][year-1])
    five_year_growth_avg = sum(rate_change) / len(rate_change)

    rate_change = []
    for year in range(ten_year, last_year + 1):
        rate_change.append(100 * (div_data[symbol][0][year] - div_data[symbol][0][year-1])/div_data[symbol][0][year-1])
    ten_year_growth_avg = sum(rate_change) / len(rate_change)

    rate_change = []
    for year in range(twenty_five_year, last_year + 1):
        rate_change.append(100 * (div_data[symbol][0][year] - div_data[symbol][0][year-1])/div_data[symbol][0][year-1])
    twenty_five_year_growth_avg = sum(rate_change) / len(rate_change)

    div_data[symbol][1]['5yrsGrowthAvg'] = five_year_growth_avg
    div_data[symbol][1]['10yrsGrowthAvg'] = ten_year_growth_avg
    div_data[symbol][1]['25yrsGrowthAvg'] = twenty_five_year_growth_avg

In [None]:
div_data

## Export & Save Dividend Data to JSON

In [None]:
with open('data/historical_div_sp500.json', 'w') as fp:
    json.dump(div_data, fp)